Skip to content

[HIP][Clang][Driver] Move BC preference logic into ROCm detection #149294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/Driver/ToolChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,8 @@ class ToolChain {

/// Get paths for device libraries.
virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
getDeviceLibs(const llvm::opt::ArgList &Args) const;
getDeviceLibs(const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadingKind) const;

/// Add the system specific linker arguments to use
/// for the given HIP runtime library type.
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1631,7 +1631,8 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
ToolChain::getDeviceLibs(const ArgList &DriverArgs) const {
ToolChain::getDeviceLibs(const ArgList &DriverArgs,
const Action::OffloadKind DeviceOffloadingKind) const {
return {};
}

Expand Down
147 changes: 84 additions & 63 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;

RocmInstallationDetector::CommonBitcodeLibsPreferences::
CommonBitcodeLibsPreferences(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
StringRef GPUArch,
const Action::OffloadKind DeviceOffloadingKind,
const bool NeedsASanRT)
: ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
tools::getAMDGPUCodeObjectVersion(D, DriverArgs))) {
const auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);

IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;

const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
Wave64 =
!HasWave32 || DriverArgs.hasFlag(options::OPT_mwavefrontsize64,
options::OPT_mno_wavefrontsize64, false);

const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
DeviceOffloadingKind == Action::OFK_HIP;

// Default to enabling f32 denormals on subtargets where fma is fast with
// denormals
const bool DefaultDAZ =
(Kind == llvm::AMDGPU::GK_NONE)
? false
: !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
(ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
// TODO: There are way too many flags that change this. Do we need to
// check them all?
DAZ = IsKnownOffloading
? DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
options::OPT_fno_gpu_flush_denormals_to_zero,
DefaultDAZ)
: DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || DefaultDAZ;

FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only) ||
DriverArgs.hasFlag(options::OPT_ffinite_math_only,
options::OPT_fno_finite_math_only, false);

UnsafeMathOpt =
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations) ||
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations, false);

FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math) ||
DriverArgs.hasFlag(options::OPT_ffast_math,
options::OPT_fno_fast_math, false);

const bool DefaultSqrt = IsKnownOffloading ? true : false;
CorrectSqrt =
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
DriverArgs.hasFlag(
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
// GPU Sanitizer currently only supports ASan and is enabled through host
// ASan.
GPUSan = (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, true) &&
NeedsASanRT);
}

void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
assert(!Path.empty());

Expand Down Expand Up @@ -884,33 +946,14 @@ void ROCMToolChain::addClangTargetOptions(
ABIVer))
return;

bool Wave64 = isWave64(DriverArgs, Kind);
// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
getDefaultDenormsAreZeroForTarget(Kind);
bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);

bool UnsafeMathOpt =
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
bool CorrectSqrt =
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);

// GPU Sanitizer currently only supports ASan and is enabled through host
// ASan.
bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, true) &&
getSanitizerArgs(DriverArgs).needsAsanRt();

// Add the OpenCL specific bitcode library.
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
BCLibs.emplace_back(RocmInstallation->getOpenCLPath().str());

// Add the generic set of libraries.
BCLibs.append(RocmInstallation->getCommonBitcodeLibs(
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false));
DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind,
getSanitizerArgs(DriverArgs).needsAsanRt()));

for (auto [BCFile, Internalize] : BCLibs) {
if (Internalize)
Expand Down Expand Up @@ -947,45 +990,47 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
RocmInstallationDetector::getCommonBitcodeLibs(
const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
bool isOpenMP) const {
const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
const bool NeedsASanRT) const {
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;

CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
DeviceOffloadingKind, NeedsASanRT};

auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
bool Internalize = true) {
BCLib.ShouldInternalize = Internalize;
BCLibs.emplace_back(BCLib);
};
auto AddSanBCLibs = [&]() {
if (GPUSan)
if (Pref.GPUSan)
AddBCLib(getAsanRTLPath(), false);
};

AddSanBCLibs();
AddBCLib(getOCMLPath());
if (!isOpenMP)
if (!Pref.IsOpenMP)
AddBCLib(getOCKLPath());
else if (GPUSan && isOpenMP)
else if (Pref.GPUSan && Pref.IsOpenMP)
AddBCLib(getOCKLPath(), false);
AddBCLib(getDenormalsAreZeroPath(DAZ));
AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
AddBCLib(getWavefrontSize64Path(Wave64));
AddBCLib(getDenormalsAreZeroPath(Pref.DAZ));
AddBCLib(getUnsafeMathPath(Pref.UnsafeMathOpt || Pref.FastRelaxedMath));
AddBCLib(getFiniteOnlyPath(Pref.FiniteOnly || Pref.FastRelaxedMath));
AddBCLib(getCorrectlyRoundedSqrtPath(Pref.CorrectSqrt));
AddBCLib(getWavefrontSize64Path(Pref.Wave64));
AddBCLib(LibDeviceFile);
auto ABIVerPath = getABIVersionPath(ABIVer);
auto ABIVerPath = getABIVersionPath(Pref.ABIVer);
if (!ABIVerPath.empty())
AddBCLib(ABIVerPath);

return BCLibs;
}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
const std::string &GPUArch,
bool isOpenMP) const {
ROCMToolChain::getCommonDeviceLibNames(
const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
Action::OffloadKind DeviceOffloadingKind) const {
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);

Expand All @@ -996,33 +1041,9 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
ABIVer))
return {};

// If --hip-device-lib is not set, add the default bitcode libraries.
// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
options::OPT_fno_gpu_flush_denormals_to_zero,
getDefaultDenormsAreZeroForTarget(Kind));
bool FiniteOnly = DriverArgs.hasFlag(
options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
bool UnsafeMathOpt =
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations, false);
bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
options::OPT_fno_fast_math, false);
bool CorrectSqrt = DriverArgs.hasFlag(
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
bool Wave64 = isWave64(DriverArgs, Kind);

// GPU Sanitizer currently only supports ASan and is enabled through host
// ASan.
bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, true) &&
getSanitizerArgs(DriverArgs).needsAsanRt();

return RocmInstallation->getCommonBitcodeLibs(
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP);
DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
getSanitizerArgs(DriverArgs).needsAsanRt());
}

bool AMDGPUToolChain::shouldSkipSanitizeOption(
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
llvm::SmallVector<BitCodeLibraryInfo, 12>
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
const std::string &GPUArch,
bool isOpenMP = false) const;
Action::OffloadKind DeviceOffloadingKind) const;

SanitizerMask getSupportedSanitizers() const override {
return SanitizerKind::Address;
Expand Down
10 changes: 6 additions & 4 deletions clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
true))
return;

for (auto BCFile : getDeviceLibs(DriverArgs)) {
for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
Expand Down Expand Up @@ -132,16 +132,18 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const {
AMDGPUOpenMPToolChain::getDeviceLibs(
const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadingKind) const {
if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true))
return {};

StringRef GpuArch = getProcessorFromTargetID(
getTriple(), Args.getLastArgValue(options::OPT_march_EQ));

SmallVector<BitCodeLibraryInfo, 12> BCLibs;
for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(),
/*IsOpenMP=*/true))
for (auto BCLib :
getCommonDeviceLibNames(Args, GpuArch.str(), DeviceOffloadingKind))
BCLibs.emplace_back(BCLib);

return BCLibs;
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
const llvm::opt::ArgList &Args) const override;

llvm::SmallVector<BitCodeLibraryInfo, 12>
getDeviceLibs(const llvm::opt::ArgList &Args) const override;
getDeviceLibs(const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadKind) const override;

const ToolChain &HostTC;
};
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/Driver/ToolChains/HIPAMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ void HIPAMDToolChain::addClangTargetOptions(
return; // No DeviceLibs for SPIR-V.
}

for (auto BCFile : getDeviceLibs(DriverArgs)) {
for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
Expand Down Expand Up @@ -355,7 +355,8 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
Action::OffloadKind DeviceOffloadingKind) const {
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
true) ||
Expand Down Expand Up @@ -397,7 +398,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");

// Add common device libraries like ocml etc.
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(),
DeviceOffloadingKind))
BCLibs.emplace_back(N);

// Add instrument lib.
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/HIPAMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
getDeviceLibs(const llvm::opt::ArgList &Args) const override;
getDeviceLibs(const llvm::opt::ArgList &Args,
Action::OffloadKind DeviceOffloadKind) const override;

SanitizerMask getSupportedSanitizers() const override;

Expand Down
7 changes: 5 additions & 2 deletions clang/lib/Driver/ToolChains/HIPSPV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ void HIPSPVToolChain::addClangTargetOptions(
CC1Args.append(
{"-fvisibility=hidden", "-fapply-global-visibility-to-externs"});

for (const BitCodeLibraryInfo &BCFile : getDeviceLibs(DriverArgs))
for (const BitCodeLibraryInfo &BCFile :
getDeviceLibs(DriverArgs, DeviceOffloadingKind))
CC1Args.append(
{"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)});
}
Expand Down Expand Up @@ -200,7 +201,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPSPVToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
HIPSPVToolChain::getDeviceLibs(
const llvm::opt::ArgList &DriverArgs,
const Action::OffloadKind DeviceOffloadingKind) const {
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
true))
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/HIPSPV.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
getDeviceLibs(const llvm::opt::ArgList &Args) const override;
getDeviceLibs(const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadKind) const override;

SanitizerMask getSupportedSanitizers() const override;

Expand Down
Loading
Loading