diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index 4dda3ec2216f..c8d7b6a63318 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -11,6 +11,7 @@ namespace llvm { class StringRef; +template class StringSwitch; } // namespace llvm namespace clang { @@ -103,9 +104,62 @@ enum class OffloadArch { Generic, // A processor model named 'generic' if the target backend defines a // public one. // Intel CPUs + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, GRANITERAPIDS, // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, BMG_G21, + LNL_M, LAST, CudaDefault = OffloadArch::SM_52, @@ -122,17 +176,41 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { } static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::GRANITERAPIDS && Arch < OffloadArch::BMG_G21; + return Arch >= OffloadArch::SKYLAKEAVX512 && + Arch <= OffloadArch::GRANITERAPIDS; } static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::BMG_G21 && Arch < OffloadArch::LAST; + return Arch >= OffloadArch::BDW && Arch < OffloadArch::LAST; } static inline bool IsIntelOffloadArch(OffloadArch Arch) { return IsIntelCPUOffloadArch(Arch) || IsIntelGPUOffloadArch(Arch); } +// Check if the given Arch value is a Generic AMD GPU. +// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. +// This list is used to filter out GFX*_GENERIC AMD GPUs in +// `IsSYCLSupportedAMDGPUArch`. +static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { + return Arch == OffloadArch::GFX9_GENERIC || + Arch == OffloadArch::GFX10_1_GENERIC || + Arch == OffloadArch::GFX10_3_GENERIC || + Arch == OffloadArch::GFX11_GENERIC || + Arch == OffloadArch::GFX12_GENERIC; +} + +// Check if the given Arch value is a valid SYCL supported AMD GPU. +static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && + !IsAMDGenericGPUArch(Arch); +} + +// Check if the given Arch value is a valid SYCL supported NVidia GPU. +static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; +} + const char *OffloadArchToString(OffloadArch A); const char *OffloadArchToVirtualArchString(OffloadArch A); diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index dce9ffaedb90..1cca176efb06 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -89,9 +89,62 @@ static const OffloadArchToStringMap ArchNames[] = { GFX(1250), // gfx1250 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, // Intel CPUs + {OffloadArch::SKYLAKEAVX512, "skylake-avx512", ""}, + {OffloadArch::COREAVX2, "core-avx2", ""}, + {OffloadArch::COREI7AVX, "corei7-avx", ""}, + {OffloadArch::COREI7, "corei7", ""}, + {OffloadArch::WESTMERE, "westmere", ""}, + {OffloadArch::SANDYBRIDGE, "sandybridge", ""}, + {OffloadArch::IVYBRIDGE, "ivybridge", ""}, + {OffloadArch::BROADWELL, "broadwell", ""}, + {OffloadArch::COFFEELAKE, "coffeelake", ""}, + {OffloadArch::ALDERLAKE, "alderlake", ""}, + {OffloadArch::SKYLAKE, "skylake", ""}, + {OffloadArch::SKX, "skx", ""}, + {OffloadArch::CASCADELAKE, "cascadelake", ""}, + {OffloadArch::ICELAKECLIENT, "icelake-client", ""}, + {OffloadArch::ICELAKESERVER, "icelake-server", ""}, + {OffloadArch::SAPPHIRERAPIDS, "sapphirerapids", ""}, {OffloadArch::GRANITERAPIDS, "graniterapids", ""}, // Intel GPUS + {OffloadArch::BDW, "bdw", ""}, + {OffloadArch::SKL, "skl", ""}, + {OffloadArch::KBL, "kbl", ""}, + {OffloadArch::CFL, "cfl", ""}, + {OffloadArch::APL, "apl", ""}, + {OffloadArch::BXT, "bxt", ""}, + {OffloadArch::GLK, "glk", ""}, + {OffloadArch::WHL, "whl", ""}, + {OffloadArch::AML, "aml", ""}, + {OffloadArch::CML, "cml", ""}, + {OffloadArch::ICLLP, "icllp", ""}, + {OffloadArch::ICL, "icl", ""}, + {OffloadArch::EHL, "ehl", ""}, + {OffloadArch::JSL, "jsl", ""}, + {OffloadArch::TGLLP, "tgllp", ""}, + {OffloadArch::TGL, "tgl", ""}, + {OffloadArch::RKL, "rkl", ""}, + {OffloadArch::ADL_S, "adl_s", ""}, + {OffloadArch::RPL_S, "rpl_s", ""}, + {OffloadArch::ADL_P, "adl_p", ""}, + {OffloadArch::ADL_N, "adl_n", ""}, + {OffloadArch::DG1, "dg1", ""}, + {OffloadArch::ACM_G10, "acm_g10", ""}, + {OffloadArch::DG2_G10, "dg2_g10", ""}, + {OffloadArch::ACM_G11, "acm_g11", ""}, + {OffloadArch::DG2_G11, "dg2_g11", ""}, + {OffloadArch::ACM_G12, "acm_g12", ""}, + {OffloadArch::DG2_G12, "dg2_g12", ""}, + {OffloadArch::PVC, "pvc", ""}, + {OffloadArch::PVC_VG, "pvc_vg", ""}, + {OffloadArch::MTL_U, "mtl_u", ""}, + {OffloadArch::MTL_S, "mtl_s", ""}, + {OffloadArch::ARL_U, "arl_u", ""}, + {OffloadArch::ARL_S, "arl_s", ""}, + {OffloadArch::MTL_H, "mtl_h", ""}, + {OffloadArch::ARL_H, "arl_h", ""}, {OffloadArch::BMG_G21, "bmg_g21", ""}, + {OffloadArch::LNL_M, "lnl_m", ""}, {OffloadArch::Generic, "generic", ""}, // clang-format on }; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 186a72971fce..8520271aba65 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1378,85 +1378,60 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, << SYCLTargetsValues->getAsString(C.getInputArgs()) << SYCLForceTarget->getAsString(C.getInputArgs()); - for (StringRef Val : SYCLTargetsValues->getValues()) { - StringRef Arch; - StringRef UserTargetName(Val); - if (auto Device = gen::isGPUTarget(Val)) { - if (Device->empty()) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } - Arch = Device->data(); - UserTargetName = "spir64_gen"; - } else if (auto Device = gen::isGPUTarget(Val)) { - if (Device->empty()) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } - Arch = Device->data(); - UserTargetName = "nvptx64-nvidia-cuda"; - } else if (auto Device = gen::isGPUTarget(Val)) { - if (Device->empty()) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } - Arch = Device->data(); - UserTargetName = "amdgcn-amd-amdhsa"; - } + std::multiset SYCLTriples; + for (StringRef SYCLTargetTriple : SYCLTargetsValues->getValues()) + SYCLTriples.insert(SYCLTargetTriple); - llvm::Triple DeviceTriple(getSYCLDeviceTriple(UserTargetName)); - if (!isValidSYCLTriple(DeviceTriple)) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } + llvm::StringMap FoundNormalizedTriples; + llvm::Triple TT; + for (StringRef Triple : SYCLTriples) { + + if (Triple.starts_with("intel_gpu_")) { + TT = getSYCLDeviceTriple("spir64_gen"); + } else if (Triple.starts_with("nvidia_gpu_")) { + TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); + } else if (Triple.starts_with("amd_gpu_")) { + TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); + } else + TT = getSYCLDeviceTriple(Triple); - // For any -fsycl-targets=spir64_gen additions, we will scan the - // additional -X* options for potential -device settings. These - // need to be added as a known Arch to the packager. - if (DeviceTriple.isSPIRAOT() && Arch.empty() && - DeviceTriple.getSubArch() == llvm::Triple::SPIRSubArch_gen) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, DeviceTriple, *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known arch to - // be associated with the current spir64_gen entry. Grab the - // right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - break; - } - } - } + std::string NormalizedName = TT.normalize(); - // Make sure we don't have a duplicate triple. - std::string NormalizedName = getSYCLDeviceTriple(Val).normalize(); - auto Duplicate = FoundNormalizedTriples.find(NormalizedName); - if (Duplicate != FoundNormalizedTriples.end()) { + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Triple); + + if (!Inserted) { Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Val << Duplicate->second; + << Triple << TripleIt->second; continue; } - // Store the current triple so that we can check for duplicates in - // the following iterations. - FoundNormalizedTriples[NormalizedName] = Val; - SYCLTriples.insert(DeviceTriple.normalize()); - if (!Arch.empty()) - DerivedArchs[DeviceTriple.getTriple()].insert(Arch); - } - if (!SYCLTriples.empty()) { - for (const auto &SYCLTriple : SYCLTriples) { - llvm::Triple Triple(SYCLTriple.getKey()); - UniqueSYCLTriplesVec.push_back(Triple); + // If the specified target is invalid, emit a diagnostic. + if (!isValidSYCLTriple(TT)) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; + continue; } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); + UniqueSYCLTriplesVec.push_back(TT); + } + if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { + // Add the default triple (spir64) toolchain. + llvm::Triple DefaultTriple = + C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, + DefaultTriple, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); } - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); } else Diag(clang::diag::warn_drv_empty_joined_argument) << SYCLTargetsValues->getAsString(C.getInputArgs()); @@ -1478,90 +1453,52 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, Diag(clang::diag::err_drv_sycl_offload_arch_new_driver); return; } - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), - HostTC->getTriple()); + llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); + llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); + llvm::Triple IntelGPUTriple("spir64_gen-unknown-unknown"); + llvm::Triple IntelCPUTriple("spir64_x86_64-unknown-unknown"); // Attempt to deduce the offloading triple from the set of architectures. // We need to temporarily create these toolchains so that we can access // tools for inferring architectures. - llvm::DenseSet Archs; - if (NVPTXTriple) { - auto TempTC = std::make_unique( - *this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None); - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true)) - Archs.insert(Arch); - } - if (AMDTriple) { - auto TempTC = std::make_unique( - *this, *AMDTriple, *HostTC, C.getInputArgs()); - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true)) - Archs.insert(Arch); - } - if (!AMDTriple && !NVPTXTriple) { - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true)) - Archs.insert(Arch); - } - for (StringRef Arch : Archs) { - if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( - getProcessorFromTargetID(*NVPTXTriple, Arch)))) { - DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); - } else if (AMDTriple && - IsSYCLSupportedAMDGPUArch(StringToOffloadArch( - getProcessorFromTargetID(*AMDTriple, Arch)))) { - DerivedArchs[AMDTriple->getTriple()].insert(Arch); - } else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) { - DerivedArchs[getSYCLDeviceTriple("spir64_x86_64").getTriple()].insert( - Arch); - } else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) { - StringRef IntelGPUArch; - // For Intel Graphics AOT target, valid values for '--offload-arch' - // are mapped to valid device names accepted by OCLOC (the Intel GPU AOT - // compiler) via the '-device' option. The mapIntelGPUArchName - // function maps the accepted values for '--offload-arch' to enable SYCL - // offloading to Intel GPUs and the corresponding '-device' value passed - // to OCLOC. - IntelGPUArch = mapIntelGPUArchName(Arch).data(); - DerivedArchs[getSYCLDeviceTriple("spir64_gen").getTriple()].insert( - IntelGPUArch); - } else { + + for (StringRef Arch : + C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { + bool IsNVPTX = IsSYCLSupportedNVidiaGPUArch( + StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); + bool IsAMDGPU = IsSYCLSupportedAMDGPUArch( + StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); + bool IsIntelGPU = IsIntelGPUOffloadArch( + StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); + bool IsIntelCPU = IsIntelCPUOffloadArch( + StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); + + if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && + !IsIntelCPU && !Arch.equals_insensitive("native")) { Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; return; } } - // Emit an error if architecture value is not provided - // to --offload-arch. - if (Archs.empty()) { - Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); - return; - } - - for (const auto &TripleAndArchs : DerivedArchs) - SYCLTriples.insert(TripleAndArchs.first()); - for (const auto &Val : SYCLTriples) { - llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val.getKey())); - std::string NormalizedName = SYCLTargetTriple.normalize(); + for (const llvm::Triple &TT : + {AMDTriple, NVPTXTriple, IntelGPUTriple, IntelCPUTriple}) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + C.getDefaultToolChain().getTriple()); - // Make sure we don't have a duplicate triple. - auto Duplicate = FoundNormalizedTriples.find(NormalizedName); - if (Duplicate != FoundNormalizedTriples.end()) { - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Val.getKey() << Duplicate->second; - continue; + llvm::SmallVector Archs = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/false); + if (!Archs.empty()) { + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = Archs; } - - // Store the current triple so that we can check for duplicates in the - // following iterations. - FoundNormalizedTriples[NormalizedName] = Val.getKey(); - UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); } - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + auto TCRange = C.getOffloadToolChains(Action::OFK_SYCL); + if (TCRange.first == TCRange.second) { + Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); + return; + } } else { // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V. @@ -1590,13 +1527,17 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // We'll need to use the SYCL and host triples as the key into // getOffloadingDeviceToolChain, because the device toolchains we're // going to create will depend on both. - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - for (const auto &TT : UniqueSYCLTriplesVec) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - HostTC->getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); + if ((IsSYCL && !C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && + !HasSYCLTargetsOption) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + for (const auto &TT : UniqueSYCLTriplesVec) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + HostTC->getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + + OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); + } } // @@ -7547,8 +7488,21 @@ static StringRef getCanonicalArchString(Compilation &C, C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; return StringRef(); + } else if (Triple.isSPIRAOT() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && + (Arch == OffloadArch::UNKNOWN || !IsIntelGPUOffloadArch(Arch))) { + if (SpecificToolchain) + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_gen" << ArchStr; + return StringRef(); + } else if (Triple.isSPIRAOT() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64 && + (Arch == OffloadArch::UNKNOWN || !IsIntelCPUOffloadArch(Arch))) { + if (SpecificToolchain) + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_x86_64" << ArchStr; + return StringRef(); } - if (IsNVIDIAOffloadArch(Arch)) return Args.MakeArgStringRef(OffloadArchToString(Arch)); @@ -7561,6 +7515,13 @@ static StringRef getCanonicalArchString(Compilation &C, } return Args.MakeArgStringRef(getCanonicalTargetID(*Arch, Features)); } + if (IsIntelGPUOffloadArch(Arch)) { + return Args.MakeArgStringRef(ArchStr); + } + + if (IsIntelCPUOffloadArch(Arch)) { + return Args.MakeArgStringRef(ArchStr); + } // If the input isn't CUDA or HIP just return the architecture. return ArchStr; @@ -7598,19 +7559,52 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; + StringRef Arch; for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. std::unique_ptr ExtractedArg = nullptr; if (Kind == Action::OFK_SYCL) { + // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" + // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" + if (TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && + Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { + const ToolChain *HostTC = + C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC->getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + Arch = TargetArgs[i + 1]; + if (!Arch.empty()) + Archs.insert(Arch); + break; + } + } + } // For SYCL based offloading, we allow for -Xsycl-target-backend - // and -Xsycl-target-backend= for specifying options. - if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for + // specifying options. + if (!(TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && + Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); - } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { + // -Xsycl-target-backend --offload-arch=gfx1150 + } else if (!(TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_gen) && + Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); @@ -7631,6 +7625,50 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } } + if (Kind == Action::OFK_SYCL && + Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + for (StringRef SYCLTargetValue : Arg->getValues()) { + if (auto Device = + tools::SYCL::gen::isGPUTarget( + SYCLTargetValue)) { + if (SpecificToolchain && + !(TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) + continue; + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; + continue; + } + if (IsIntelGPUOffloadArch(StringToOffloadArch( + getProcessorFromTargetID(TC->getTriple(), Device->data())))) + Arch = Device->data(); + } else if (auto Device = tools::SYCL::gen::isGPUTarget< + tools::SYCL::gen::NvidiaGPU>(SYCLTargetValue)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; + continue; + } + if (IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( + getProcessorFromTargetID(TC->getTriple(), Device->data())))) + Arch = Device->data(); + } else if (auto Device = tools::SYCL::gen::isGPUTarget< + clang::driver::tools::SYCL::gen::AmdGPU>( + SYCLTargetValue)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; + continue; + } + if (IsSYCLSupportedAMDGPUArch(StringToOffloadArch( + getProcessorFromTargetID(TC->getTriple(), Device->data())))) + Arch = Device->data(); + } else { + Arch = StringRef(); + } + if (!Arch.empty()) + Archs.insert(Arch); + } + } + // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0285bf5873ff..785162203dcc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10294,9 +10294,17 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, if (Input.getType() == types::TY_Tempfilelist) File = C.getArgs().MakeArgString("@" + File); - StringRef Arch = OffloadAction->getOffloadingArch() - ? OffloadAction->getOffloadingArch() - : TCArgs.getLastArgValue(options::OPT_march_EQ); + StringRef Arch; + if (OffloadAction->getOffloadingArch()) { + if (TC->getTripleString() == "spir64_gen-unknown-unknown") { + Arch = mapIntelGPUArchName(OffloadAction->getOffloadingArch()); + } else { + Arch = OffloadAction->getOffloadingArch(); + } + } else { + TCArgs.getLastArgValue(options::OPT_march_EQ); + } + StringRef Kind = Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 9570f81dfa96..cb16c9ce6f99 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -533,7 +533,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, // If requested, use a custom linker script to handle very large device code // sections. - if (Args.hasArg(options::OPT_fsycl, options::OPT_fopenmp_targets_EQ) && + if (Args.hasArg(options::OPT_fsycl, options::OPT_offload_targets_EQ) && Args.hasFlag(options::OPT_flink_huge_device_code, options::OPT_fno_link_huge_device_code, false)) { // Create temporary linker script. Keep it if save-temps is enabled. diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index e70c35a4f526..8082e8c88060 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -21,125 +21,6 @@ using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; -// Struct that relates an AOT target value with -// Intel CPUs and Intel GPUs. -struct StringToOffloadArchSYCLMap { - const char *ArchName; - SYCLSupportedIntelArchs IntelArch; -}; - -// Mapping of supported SYCL offloading architectures. -static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { - // Intel CPU mapping. - {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, - {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, - {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, - {"corei7", SYCLSupportedIntelArchs::COREI7}, - {"westmere", SYCLSupportedIntelArchs::WESTMERE}, - {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, - {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, - {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, - {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, - {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, - {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, - {"skx", SYCLSupportedIntelArchs::SKX}, - {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, - {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, - {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, - {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, - {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, - // Intel GPU mapping. - {"bdw", SYCLSupportedIntelArchs::BDW}, - {"skl", SYCLSupportedIntelArchs::SKL}, - {"kbl", SYCLSupportedIntelArchs::KBL}, - {"cfl", SYCLSupportedIntelArchs::CFL}, - {"apl", SYCLSupportedIntelArchs::APL}, - {"bxt", SYCLSupportedIntelArchs::BXT}, - {"glk", SYCLSupportedIntelArchs::GLK}, - {"whl", SYCLSupportedIntelArchs::WHL}, - {"aml", SYCLSupportedIntelArchs::AML}, - {"cml", SYCLSupportedIntelArchs::CML}, - {"icllp", SYCLSupportedIntelArchs::ICLLP}, - {"icl", SYCLSupportedIntelArchs::ICL}, - {"ehl", SYCLSupportedIntelArchs::EHL}, - {"jsl", SYCLSupportedIntelArchs::JSL}, - {"tgllp", SYCLSupportedIntelArchs::TGLLP}, - {"tgl", SYCLSupportedIntelArchs::TGL}, - {"rkl", SYCLSupportedIntelArchs::RKL}, - {"adl_s", SYCLSupportedIntelArchs::ADL_S}, - {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, - {"adl_p", SYCLSupportedIntelArchs::ADL_P}, - {"adl_n", SYCLSupportedIntelArchs::ADL_N}, - {"dg1", SYCLSupportedIntelArchs::DG1}, - {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, - {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, - {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, - {"pvc", SYCLSupportedIntelArchs::PVC}, - {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, - {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, - {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, - {"arl_u", SYCLSupportedIntelArchs::ARL_U}, - {"arl_s", SYCLSupportedIntelArchs::ARL_S}, - {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, - {"arl_h", SYCLSupportedIntelArchs::ARL_H}, - {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, - {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs -clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) { - auto result = std::find_if( - std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap), - [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { - return ArchNameAsString == map.ArchName; - }); - if (result == std::end(StringToArchNamesMap)) - return SYCLSupportedIntelArchs::UNKNOWN; - return result->IntelArch; -} - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) { - StringRef Arch; - Arch = llvm::StringSwitch(ArchName) - .Case("bdw", "bdw") - .Case("skl", "skl") - .Case("kbl", "kbl") - .Case("cfl", "cfl") - .Cases("apl", "bxt", "apl") - .Case("glk", "glk") - .Case("whl", "whl") - .Case("aml", "aml") - .Case("cml", "cml") - .Cases("icllp", "icl", "icllp") - .Cases("ehl", "jsl", "ehl") - .Cases("tgllp", "tgl", "tgllp") - .Case("rkl", "rkl") - .Cases("adl_s", "rpl_s", "adl_s") - .Case("adl_p", "adl_p") - .Case("adl_n", "adl_n") - .Case("dg1", "dg1") - .Cases("acm_g10", "dg2_g10", "acm_g10") - .Cases("acm_g11", "dg2_g11", "acm_g11") - .Cases("acm_g12", "dg2_g12", "acm_g12") - .Case("pvc", "pvc") - .Case("pvc_vg", "pvc_vg") - .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") - .Case("mtl_h", "mtl_h") - .Case("arl_h", "arl_h") - .Case("bmg_g21", "bmg_g21") - .Case("lnl_m", "lnl_m") - .Default(""); - return Arch; -} - SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D) : D(D), InstallationCandidates() { InstallationCandidates.emplace_back(D.Dir + "/.."); @@ -1392,97 +1273,135 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) { return Device; } +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) { + StringRef Arch; + Arch = llvm::StringSwitch(ArchName) + .Case("bdw", "bdw") + .Case("skl", "skl") + .Case("kbl", "kbl") + .Case("cfl", "cfl") + .Cases("apl", "bxt", "apl") + .Case("glk", "glk") + .Case("whl", "whl") + .Case("aml", "aml") + .Case("cml", "cml") + .Cases("icllp", "icl", "icllp") + .Cases("ehl", "jsl", "ehl") + .Cases("tgllp", "tgl", "tgllp") + .Case("rkl", "rkl") + .Cases("adl_s", "rpl_s", "adl_s") + .Case("adl_p", "adl_p") + .Case("adl_n", "adl_n") + .Case("dg1", "dg1") + .Cases("acm_g10", "dg2_g10", "acm_g10") + .Cases("acm_g11", "dg2_g11", "acm_g11") + .Cases("acm_g12", "dg2_g12", "acm_g12") + .Case("pvc", "pvc") + .Case("pvc_vg", "pvc_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") + .Case("mtl_h", "mtl_h") + .Case("arl_h", "arl_h") + .Case("bmg_g21", "bmg_g21") + .Case("lnl_m", "lnl_m") + .Default(ArchName); + return Arch; +} + SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { SmallString<64> Macro; - StringRef Ext = llvm::StringSwitch(DeviceName) - .Case("bdw", "INTEL_GPU_BDW") - .Case("skl", "INTEL_GPU_SKL") - .Case("kbl", "INTEL_GPU_KBL") - .Case("cfl", "INTEL_GPU_CFL") - .Case("apl", "INTEL_GPU_APL") - .Case("glk", "INTEL_GPU_GLK") - .Case("whl", "INTEL_GPU_WHL") - .Case("aml", "INTEL_GPU_AML") - .Case("cml", "INTEL_GPU_CML") - .Case("icllp", "INTEL_GPU_ICLLP") - .Case("ehl", "INTEL_GPU_EHL") - .Case("tgllp", "INTEL_GPU_TGLLP") - .Case("rkl", "INTEL_GPU_RKL") - .Case("adl_s", "INTEL_GPU_ADL_S") - .Case("adl_p", "INTEL_GPU_ADL_P") - .Case("adl_n", "INTEL_GPU_ADL_N") - .Case("dg1", "INTEL_GPU_DG1") - .Case("acm_g10", "INTEL_GPU_ACM_G10") - .Case("acm_g11", "INTEL_GPU_ACM_G11") - .Case("acm_g12", "INTEL_GPU_ACM_G12") - .Case("pvc", "INTEL_GPU_PVC") - .Case("pvc_vg", "INTEL_GPU_PVC_VG") - .Case("mtl_u", "INTEL_GPU_MTL_U") - .Case("mtl_h", "INTEL_GPU_MTL_H") - .Case("arl_h", "INTEL_GPU_ARL_H") - .Case("bmg_g21", "INTEL_GPU_BMG_G21") - .Case("bmg_g31", "INTEL_GPU_BMG_G31") - .Case("lnl_m", "INTEL_GPU_LNL_M") - .Case("ptl_h", "INTEL_GPU_PTL_H") - .Case("ptl_u", "INTEL_GPU_PTL_U") - .Case("wcl", "INTEL_GPU_WCL") - .Case("sm_50", "NVIDIA_GPU_SM_50") - .Case("sm_52", "NVIDIA_GPU_SM_52") - .Case("sm_53", "NVIDIA_GPU_SM_53") - .Case("sm_60", "NVIDIA_GPU_SM_60") - .Case("sm_61", "NVIDIA_GPU_SM_61") - .Case("sm_62", "NVIDIA_GPU_SM_62") - .Case("sm_70", "NVIDIA_GPU_SM_70") - .Case("sm_72", "NVIDIA_GPU_SM_72") - .Case("sm_75", "NVIDIA_GPU_SM_75") - .Case("sm_80", "NVIDIA_GPU_SM_80") - .Case("sm_86", "NVIDIA_GPU_SM_86") - .Case("sm_87", "NVIDIA_GPU_SM_87") - .Case("sm_89", "NVIDIA_GPU_SM_89") - .Case("sm_90", "NVIDIA_GPU_SM_90") - .Case("sm_90a", "NVIDIA_GPU_SM_90A") - .Case("gfx700", "AMD_GPU_GFX700") - .Case("gfx701", "AMD_GPU_GFX701") - .Case("gfx702", "AMD_GPU_GFX702") - .Case("gfx703", "AMD_GPU_GFX703") - .Case("gfx704", "AMD_GPU_GFX704") - .Case("gfx705", "AMD_GPU_GFX705") - .Case("gfx801", "AMD_GPU_GFX801") - .Case("gfx802", "AMD_GPU_GFX802") - .Case("gfx803", "AMD_GPU_GFX803") - .Case("gfx805", "AMD_GPU_GFX805") - .Case("gfx810", "AMD_GPU_GFX810") - .Case("gfx900", "AMD_GPU_GFX900") - .Case("gfx902", "AMD_GPU_GFX902") - .Case("gfx904", "AMD_GPU_GFX904") - .Case("gfx906", "AMD_GPU_GFX906") - .Case("gfx908", "AMD_GPU_GFX908") - .Case("gfx909", "AMD_GPU_GFX909") - .Case("gfx90a", "AMD_GPU_GFX90A") - .Case("gfx90c", "AMD_GPU_GFX90C") - .Case("gfx940", "AMD_GPU_GFX940") - .Case("gfx941", "AMD_GPU_GFX941") - .Case("gfx942", "AMD_GPU_GFX942") - .Case("gfx1010", "AMD_GPU_GFX1010") - .Case("gfx1011", "AMD_GPU_GFX1011") - .Case("gfx1012", "AMD_GPU_GFX1012") - .Case("gfx1013", "AMD_GPU_GFX1013") - .Case("gfx1030", "AMD_GPU_GFX1030") - .Case("gfx1031", "AMD_GPU_GFX1031") - .Case("gfx1032", "AMD_GPU_GFX1032") - .Case("gfx1033", "AMD_GPU_GFX1033") - .Case("gfx1034", "AMD_GPU_GFX1034") - .Case("gfx1035", "AMD_GPU_GFX1035") - .Case("gfx1036", "AMD_GPU_GFX1036") - .Case("gfx1100", "AMD_GPU_GFX1100") - .Case("gfx1101", "AMD_GPU_GFX1101") - .Case("gfx1102", "AMD_GPU_GFX1102") - .Case("gfx1103", "AMD_GPU_GFX1103") - .Case("gfx1150", "AMD_GPU_GFX1150") - .Case("gfx1151", "AMD_GPU_GFX1151") - .Case("gfx1200", "AMD_GPU_GFX1200") - .Case("gfx1201", "AMD_GPU_GFX1201") - .Default(""); + StringRef Ext = + llvm::StringSwitch(DeviceName) + .Case("bdw", "INTEL_GPU_BDW") + .Case("skl", "INTEL_GPU_SKL") + .Case("kbl", "INTEL_GPU_KBL") + .Case("cfl", "INTEL_GPU_CFL") + .Cases("apl", "bxt", "INTEL_GPU_APL") + .Case("glk", "INTEL_GPU_GLK") + .Case("whl", "INTEL_GPU_WHL") + .Case("aml", "INTEL_GPU_AML") + .Case("cml", "INTEL_GPU_CML") + .Cases("icllp", "icl", "INTEL_GPU_ICLLP") + .Cases("ehl", "jsl", "INTEL_GPU_EHL") + .Cases("tgllp", "tgl", "INTEL_GPU_TGLLP") + .Case("rkl", "INTEL_GPU_RKL") + .Cases("adl_s", "rpl_s", "INTEL_GPU_ADL_S") + .Case("adl_p", "INTEL_GPU_ADL_P") + .Case("adl_n", "INTEL_GPU_ADL_N") + .Case("dg1", "INTEL_GPU_DG1") + .Cases("acm_g10", "dg2_g10", "INTEL_GPU_ACM_G10") + .Cases("acm_g11", "dg2_g11", "INTEL_GPU_ACM_G11") + .Cases("acm_g12", "dg2_g12", "INTEL_GPU_ACM_G12") + .Case("pvc", "INTEL_GPU_PVC") + .Case("pvc_vg", "INTEL_GPU_PVC_VG") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "INTEL_GPU_MTL_U") + .Case("mtl_h", "INTEL_GPU_MTL_H") + .Case("arl_h", "INTEL_GPU_ARL_H") + .Case("bmg_g21", "INTEL_GPU_BMG_G21") + .Case("bmg_g31", "INTEL_GPU_BMG_G31") + .Case("lnl_m", "INTEL_GPU_LNL_M") + .Case("ptl_h", "INTEL_GPU_PTL_H") + .Case("ptl_u", "INTEL_GPU_PTL_U") + .Case("wcl", "INTEL_GPU_WCL") + .Case("sm_50", "NVIDIA_GPU_SM_50") + .Case("sm_52", "NVIDIA_GPU_SM_52") + .Case("sm_53", "NVIDIA_GPU_SM_53") + .Case("sm_60", "NVIDIA_GPU_SM_60") + .Case("sm_61", "NVIDIA_GPU_SM_61") + .Case("sm_62", "NVIDIA_GPU_SM_62") + .Case("sm_70", "NVIDIA_GPU_SM_70") + .Case("sm_72", "NVIDIA_GPU_SM_72") + .Case("sm_75", "NVIDIA_GPU_SM_75") + .Case("sm_80", "NVIDIA_GPU_SM_80") + .Case("sm_86", "NVIDIA_GPU_SM_86") + .Case("sm_87", "NVIDIA_GPU_SM_87") + .Case("sm_89", "NVIDIA_GPU_SM_89") + .Case("sm_90", "NVIDIA_GPU_SM_90") + .Case("sm_90a", "NVIDIA_GPU_SM_90A") + .Case("gfx700", "AMD_GPU_GFX700") + .Case("gfx701", "AMD_GPU_GFX701") + .Case("gfx702", "AMD_GPU_GFX702") + .Case("gfx703", "AMD_GPU_GFX703") + .Case("gfx704", "AMD_GPU_GFX704") + .Case("gfx705", "AMD_GPU_GFX705") + .Case("gfx801", "AMD_GPU_GFX801") + .Case("gfx802", "AMD_GPU_GFX802") + .Case("gfx803", "AMD_GPU_GFX803") + .Case("gfx805", "AMD_GPU_GFX805") + .Case("gfx810", "AMD_GPU_GFX810") + .Case("gfx900", "AMD_GPU_GFX900") + .Case("gfx902", "AMD_GPU_GFX902") + .Case("gfx904", "AMD_GPU_GFX904") + .Case("gfx906", "AMD_GPU_GFX906") + .Case("gfx908", "AMD_GPU_GFX908") + .Case("gfx909", "AMD_GPU_GFX909") + .Case("gfx90a", "AMD_GPU_GFX90A") + .Case("gfx90c", "AMD_GPU_GFX90C") + .Case("gfx940", "AMD_GPU_GFX940") + .Case("gfx941", "AMD_GPU_GFX941") + .Case("gfx942", "AMD_GPU_GFX942") + .Case("gfx1010", "AMD_GPU_GFX1010") + .Case("gfx1011", "AMD_GPU_GFX1011") + .Case("gfx1012", "AMD_GPU_GFX1012") + .Case("gfx1013", "AMD_GPU_GFX1013") + .Case("gfx1030", "AMD_GPU_GFX1030") + .Case("gfx1031", "AMD_GPU_GFX1031") + .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1033", "AMD_GPU_GFX1033") + .Case("gfx1034", "AMD_GPU_GFX1034") + .Case("gfx1035", "AMD_GPU_GFX1035") + .Case("gfx1036", "AMD_GPU_GFX1036") + .Case("gfx1100", "AMD_GPU_GFX1100") + .Case("gfx1101", "AMD_GPU_GFX1101") + .Case("gfx1102", "AMD_GPU_GFX1102") + .Case("gfx1103", "AMD_GPU_GFX1103") + .Case("gfx1150", "AMD_GPU_GFX1150") + .Case("gfx1151", "AMD_GPU_GFX1151") + .Case("gfx1200", "AMD_GPU_GFX1200") + .Case("gfx1201", "AMD_GPU_GFX1201") + .Default(""); if (!Ext.empty()) { Macro = "__SYCL_TARGET_"; Macro += Ext; diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 7e3b4d0aca5b..bc95910dcf34 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -17,109 +17,6 @@ namespace clang { namespace driver { -// List of architectures (Intel CPUs and Intel GPUs) -// that support SYCL offloading. -enum class SYCLSupportedIntelArchs { - // Intel CPUs - UNKNOWN, - SKYLAKEAVX512, - COREAVX2, - COREI7AVX, - COREI7, - WESTMERE, - SANDYBRIDGE, - IVYBRIDGE, - BROADWELL, - COFFEELAKE, - ALDERLAKE, - SKYLAKE, - SKX, - CASCADELAKE, - ICELAKECLIENT, - ICELAKESERVER, - SAPPHIRERAPIDS, - GRANITERAPIDS, - // Intel GPUs - BDW, - SKL, - KBL, - CFL, - APL, - BXT, - GLK, - WHL, - AML, - CML, - ICLLP, - ICL, - EHL, - JSL, - TGLLP, - TGL, - RKL, - ADL_S, - RPL_S, - ADL_P, - ADL_N, - DG1, - ACM_G10, - DG2_G10, - ACM_G11, - DG2_G11, - ACM_G12, - DG2_G12, - PVC, - PVC_VG, - MTL_U, - MTL_S, - ARL_U, - ARL_S, - MTL_H, - ARL_H, - BMG_G21, - LNL_M, -}; - -// Check if the given Arch value is a Generic AMD GPU. -// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. -// This list is used to filter out GFX*_GENERIC AMD GPUs in -// `IsSYCLSupportedAMDGPUArch`. -static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { - return Arch == OffloadArch::GFX9_GENERIC || - Arch == OffloadArch::GFX10_1_GENERIC || - Arch == OffloadArch::GFX10_3_GENERIC || - Arch == OffloadArch::GFX11_GENERIC || - Arch == OffloadArch::GFX12_GENERIC; -} - -// Check if the given Arch value is a valid SYCL supported AMD GPU. -static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && - !IsAMDGenericGPUArch(Arch); -} - -// Check if the given Arch value is a valid SYCL supported NVidia GPU. -static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; -} - -// Check if the given Arch value is a valid SYCL supported Intel CPU. -static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && - Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; -} - -// Check if the given Arch value is a valid SYCL supported Intel GPU. -static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::BDW && - Arch <= SYCLSupportedIntelArchs::LNL_M; -} - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs -StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); - // This is a mapping between the user provided --offload-arch value for Intel // GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU // AOT compiler). diff --git a/clang/test/Driver/sycl-linker-wrapper.cpp b/clang/test/Driver/sycl-linker-wrapper.cpp index 8652314e055e..0a307f124b5e 100644 --- a/clang/test/Driver/sycl-linker-wrapper.cpp +++ b/clang/test/Driver/sycl-linker-wrapper.cpp @@ -206,7 +206,7 @@ // ------- // Generate .o file as linker wrapper input. // -// RUN: %clang %s -fsycl -fsycl-targets=native_cpu -c --offload-new-driver -o %t6.o +// RUN: %clang %s -fsycl -fsycl-targets=native_cpu -fno-sycl-libspirv -c --offload-new-driver -o %t6.o // // RUN: clang-linker-wrapper "--host-triple=x86_64-unknown-linux-gnu" "-sycl-device-library-location=%S/Inputs/native_cpu" "--sycl-post-link-options=SYCL_POST_LINK_OPTIONS" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" %t6.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-CMDS-NATIVE-CPU %s // CHK-CMDS-NATIVE-CPU: "{{.*}}/spirv-to-ir-wrapper" {{.*}} --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global diff --git a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp index df7873fade58..f04e82a4ffb5 100644 --- a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp +++ b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp @@ -121,4 +121,3 @@ // TARGET-TRIPLE-GPU: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" // CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl" // CLANG-OFFLOAD-PACKAGER-GPU-OPTS: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl{{.*}}" - diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index f29fa402716e..d53e8dde56fe 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -415,16 +415,16 @@ // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: compiler, {1}, ir, (host-sycl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {6}, ir -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {11}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {6}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {11}, ir // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: clang-offload-packager, {7, 12}, image, (device-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {13}, ir // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (host-sycl) diff --git a/clang/test/Driver/sycl-unique-prefix.cpp b/clang/test/Driver/sycl-unique-prefix.cpp index 6f39f9633702..73d874474c79 100644 --- a/clang/test/Driver/sycl-unique-prefix.cpp +++ b/clang/test/Driver/sycl-unique-prefix.cpp @@ -3,11 +3,11 @@ // RUN: touch %t_file2.cpp // RUN: %clangxx -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown,spir64_gen-unknown-unknown -c %t_file1.cpp %t_file2.cpp -### 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_PREFIX %s -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file1.cpp" -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1]]"{{.*}} "{{.*}}_file1.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file1.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1]]"{{.*}} "{{.*}}_file1.cpp" // CHECK_PREFIX: clang{{.*}} "-fsycl-is-host"{{.*}} "-fsycl-unique-prefix=[[PREFIX1]]"{{.*}} "{{.*}}_file1.cpp" -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file2.cpp" -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2]]"{{.*}} "{{.*}}_file2.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file2.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2]]"{{.*}} "{{.*}}_file2.cpp" // CHECK_PREFIX: clang{{.*}} "-fsycl-is-host"{{.*}} "-fsycl-unique-prefix=[[PREFIX2]]"{{.*}} "{{.*}}_file2.cpp" /// Check for prefix with preprocessed input