Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ enum {
ELFOSABI_FENIXOS = 16, // FenixOS
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
Expand All @@ -385,6 +386,12 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};

// CUDA OS ABI Version identification.
enum {
ELFABIVERSION_CUDA_V1 = 7,
ELFABIVERSION_CUDA_V2 = 8,
};

#define ELF_RELOC(name, value) name = value,

// X86_64 relocations.
Expand Down Expand Up @@ -921,7 +928,7 @@ enum {

// NVPTX specific e_flags.
enum : unsigned {
// Processor selection mask for EF_CUDA_SM* values.
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
EF_CUDA_SM = 0xff,

// SM based processor values.
Expand Down Expand Up @@ -954,12 +961,22 @@ enum : unsigned {
// The target is using 64-bit addressing.
EF_CUDA_64BIT_ADDRESS = 0x400,
// Set when using the sm_90a processor.
EF_CUDA_ACCELERATORS = 0x800,
EF_CUDA_ACCELERATORS_V1 = 0x800,
// Undocumented software feature.
EF_CUDA_SW_FLAG_V2 = 0x1000,

// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
EF_CUDA_VIRTUAL_SM = 0xff0000,

// Processor selection mask for EF_CUDA_SM* values following blackwell.
EF_CUDA_SM_MASK = 0xff00,

// SM based processor values.
EF_CUDA_SM100 = 0x6400,
EF_CUDA_SM120 = 0x7800,

// Set when using an accelerator variant like sm_100a.
EF_CUDA_ACCELERATORS = 0x8,
};

// ELF Relocation types for BPF
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Object/ELFObjectFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
case ELF::ELFOSABI_OPENBSD:
return Triple::OpenBSD;
case ELF::ELFOSABI_CUDA:
case ELF::ELFOSABI_CUDA_V2:
return Triple::CUDA;
case ELF::ELFOSABI_AMDGPU_HSA:
return Triple::AMDHSA;
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Object/ELFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,9 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {

StringRef ELFObjectFileBase::getNVPTXCPUName() const {
assert(getEMachine() == ELF::EM_CUDA);
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
? getPlatformFlags() & ELF::EF_CUDA_SM
: getPlatformFlags() & ELF::EF_CUDA_SM_MASK;

switch (SM) {
// Fermi architecture.
Expand Down Expand Up @@ -679,7 +681,18 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {

// Hopper architecture.
case ELF::EF_CUDA_SM90:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
: "sm_90";

// Blackwell architecture.
case ELF::EF_CUDA_SM100:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
: "sm_100";

// Rubin architecture.
case ELF::EF_CUDA_SM120:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
: "sm_120";
default:
llvm_unreachable("Unknown EF_CUDA_SM value");
}
Expand Down
70 changes: 38 additions & 32 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1083,26 +1083,25 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
};

const EnumEntry<unsigned> ElfOSABI[] = {
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
{"AROS", "AROS", ELF::ELFOSABI_AROS},
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
{"AROS", "AROS", ELF::ELFOSABI_AROS},
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};

const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
Expand Down Expand Up @@ -1667,16 +1666,17 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
};

const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
};

const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
Expand Down Expand Up @@ -3651,10 +3651,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
else if (e.e_machine == EM_XTENSA)
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
unsigned(ELF::EF_XTENSA_MACH));
else if (e.e_machine == EM_CUDA)
else if (e.e_machine == EM_CUDA) {
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
unsigned(ELF::EF_CUDA_SM));
else if (e.e_machine == EM_AMDGPU) {
if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
(e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
ElfFlags += "a";
else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
(e.e_flags & ELF::EF_CUDA_ACCELERATORS))
ElfFlags += "a";
} else if (e.e_machine == EM_AMDGPU) {
switch (e.e_ident[ELF::EI_ABIVERSION]) {
default:
break;
Expand Down
23 changes: 15 additions & 8 deletions offload/plugins-nextgen/common/src/Utils/ELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,30 @@ static Expected<bool>
checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
const auto Header = ELFObj.getELFFile().getHeader();
if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
return createError("Only executable ELF files are supported");
return createError("only executable ELF files are supported");

if (Header.e_machine == EM_AMDGPU) {
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version 5 or above");
return createError("invalid AMD ABI version, must be version 5 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) >
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
return createError("Unsupported AMDGPU architecture");
return createError("unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
return createError("Invalid CUDA addressing mode");
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
return createError("Unsupported NVPTX architecture");
if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
return createError("invalid CUDA addressing mode");
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
return createError("unsupported NVPTX architecture");
} else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just want to point this out again such that it does not get lost in the resolved thread.

Suggested change
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100)
if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, missed that one.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jhuber6 Perfect, all good now from my side and testing with sm_120 (sorry for mixing up accounts now)

return createError("unsupported NVPTX architecture");
} else {
return createError("invalid CUDA ABI version");
}
}

return Header.e_machine == EMachine;
Expand Down
6 changes: 5 additions & 1 deletion offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
return ElfOrErr.takeError();

// Get the numeric value for the image's `sm_` value.
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
const auto Header = ElfOrErr->getELFFile().getHeader();
unsigned SM =
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
? Header.e_flags & ELF::EF_CUDA_SM
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8;

CUdevice Device;
CUresult Res = cuDeviceGet(&Device, DeviceId);
Expand Down
Loading