Skip to content

Commit 421d233

Browse files
committed
[LLVM] Update CUDA ELF flags for their new ABI
Summary: We rely on these flags to do things in the runtime and print the contents of binaries correctly. CUDA updated their ABI encoding recently and we didn't handle that. it's a new ABI entirely so we just select on it when it shows up. Fixes: #148703
1 parent d35931c commit 421d233

File tree

8 files changed

+94
-42
lines changed

8 files changed

+94
-42
lines changed

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ enum {
362362
ELFOSABI_FENIXOS = 16, // FenixOS
363363
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
364364
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
365+
ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
365366
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
366367
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
367368
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
385386
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
386387
};
387388

389+
// CUDA OS ABI Version identification.
390+
enum {
391+
ELFABIVERSION_CUDA_V1 = 7,
392+
ELFABIVERSION_CUDA_V2 = 8,
393+
};
394+
388395
#define ELF_RELOC(name, value) name = value,
389396

390397
// X86_64 relocations.
@@ -921,7 +928,7 @@ enum {
921928

922929
// NVPTX specific e_flags.
923930
enum : unsigned {
924-
// Processor selection mask for EF_CUDA_SM* values.
931+
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
925932
EF_CUDA_SM = 0xff,
926933

927934
// SM based processor values.
@@ -954,12 +961,22 @@ enum : unsigned {
954961
// The target is using 64-bit addressing.
955962
EF_CUDA_64BIT_ADDRESS = 0x400,
956963
// Set when using the sm_90a processor.
957-
EF_CUDA_ACCELERATORS = 0x800,
964+
EF_CUDA_ACCELERATORS_V1 = 0x800,
958965
// Undocumented software feature.
959966
EF_CUDA_SW_FLAG_V2 = 0x1000,
960967

961968
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
962969
EF_CUDA_VIRTUAL_SM = 0xff0000,
970+
971+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
972+
EF_CUDA_SM_MASK = 0xff00,
973+
974+
// SM based processor values.
975+
EF_CUDA_SM100 = 0x6400,
976+
EF_CUDA_SM120 = 0x7800,
977+
978+
// Set when using an accelerator variant like sm_100a.
979+
EF_CUDA_ACCELERATORS = 0x8,
963980
};
964981

965982
// ELF Relocation types for BPF

llvm/include/llvm/Object/ELFObjectFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
14791479
case ELF::ELFOSABI_OPENBSD:
14801480
return Triple::OpenBSD;
14811481
case ELF::ELFOSABI_CUDA:
1482+
case ELF::ELFOSABI_CUDA_V2:
14821483
return Triple::CUDA;
14831484
case ELF::ELFOSABI_AMDGPU_HSA:
14841485
return Triple::AMDHSA;

llvm/lib/BinaryFormat/ELF.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,7 @@ uint8_t ELF::convertNameToOSABI(StringRef Name) {
652652
.StartsWith("fenixos", ELFOSABI_FENIXOS)
653653
.StartsWith("cloudabi", ELFOSABI_CLOUDABI)
654654
.StartsWith("cuda", ELFOSABI_CUDA)
655+
.StartsWith("cuda", ELFOSABI_CUDA_V2)
655656
.StartsWith("amdhsa", ELFOSABI_AMDGPU_HSA)
656657
.StartsWith("amdpal", ELFOSABI_AMDGPU_PAL)
657658
.StartsWith("mesa3d", ELFOSABI_AMDGPU_MESA3D)
@@ -696,6 +697,7 @@ StringRef ELF::convertOSABIToName(uint8_t OSABI) {
696697
case ELFOSABI_CLOUDABI:
697698
return "cloudabi";
698699
case ELFOSABI_CUDA:
700+
case ELFOSABI_CUDA_V2:
699701
return "cuda";
700702
case ELFOSABI_AMDGPU_HSA:
701703
return "amdhsa";

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,9 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
620620

621621
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
622622
assert(getEMachine() == ELF::EM_CUDA);
623-
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
623+
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
624+
? getPlatformFlags() & ELF::EF_CUDA_SM
625+
: getPlatformFlags() & ELF::EF_CUDA_SM_MASK;
624626

625627
switch (SM) {
626628
// Fermi architecture.
@@ -679,7 +681,18 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
679681

680682
// Hopper architecture.
681683
case ELF::EF_CUDA_SM90:
682-
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
684+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
685+
: "sm_90";
686+
687+
// Blackwell architecture.
688+
case ELF::EF_CUDA_SM100:
689+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
690+
: "sm_100";
691+
692+
// Rubin architecture.
693+
case ELF::EF_CUDA_SM120:
694+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
695+
: "sm_120";
683696
default:
684697
llvm_unreachable("Unknown EF_CUDA_SM value");
685698
}

llvm/tools/llvm-readobj/ELFDumper.cpp

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
10831083
};
10841084

10851085
const EnumEntry<unsigned> ElfOSABI[] = {
1086-
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1087-
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1088-
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1089-
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1090-
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1091-
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1092-
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1093-
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1094-
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1095-
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1096-
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1097-
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1098-
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1099-
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1100-
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1101-
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1102-
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1103-
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1104-
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
1105-
};
1086+
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1087+
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1088+
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1089+
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1090+
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1091+
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1092+
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1093+
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1094+
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1095+
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1096+
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1097+
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1098+
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1099+
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1100+
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1101+
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1102+
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1103+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1104+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
1105+
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
11061106

11071107
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
11081108
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
@@ -1667,16 +1667,17 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
16671667
};
16681668

16691669
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
1670-
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1671-
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1672-
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1673-
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1674-
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1675-
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1676-
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1677-
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1678-
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1679-
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1670+
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1671+
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1672+
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1673+
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1674+
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1675+
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1676+
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1677+
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1678+
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1679+
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1680+
ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
16801681
};
16811682

16821683
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3651,10 +3652,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
36513652
else if (e.e_machine == EM_XTENSA)
36523653
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
36533654
unsigned(ELF::EF_XTENSA_MACH));
3654-
else if (e.e_machine == EM_CUDA)
3655+
else if (e.e_machine == EM_CUDA) {
36553656
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
36563657
unsigned(ELF::EF_CUDA_SM));
3657-
else if (e.e_machine == EM_AMDGPU) {
3658+
if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
3659+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
3660+
ElfFlags += "a";
3661+
else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
3662+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS))
3663+
ElfFlags += "a";
3664+
} else if (e.e_machine == EM_AMDGPU) {
36583665
switch (e.e_ident[ELF::EI_ABIVERSION]) {
36593666
default:
36603667
break;

llvm/unittests/Object/ELFObjectFileTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,8 @@ TEST(ELFObjectFileTest, CheckOSAndTriple) {
295295
{ELF::EM_X86_64, ELF::ELFOSABI_AIX, "x86_64--aix"},
296296
{ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"},
297297
{ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"},
298-
{ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}};
298+
{ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"},
299+
{ELF::EM_CUDA, ELF::ELFOSABI_CUDA_V2, "nvptx64-nvidia-cuda"}};
299300
for (auto [Machine, OS, Triple] : Formats) {
300301
const DataForTest D(ELF::ELFCLASS64, ELF::ELFDATA2LSB, Machine, OS,
301302
ELF::EF_AMDGPU_MACH_AMDGCN_LAST);

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,17 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
7373
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
7474
return createError("Unsupported AMDGPU architecture");
7575
} else if (Header.e_machine == EM_CUDA) {
76-
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
77-
return createError("Invalid CUDA addressing mode");
78-
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
79-
return createError("Unsupported NVPTX architecture");
76+
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V1) {
77+
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
78+
return createError("Invalid CUDA addressing mode");
79+
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
80+
return createError("Unsupported NVPTX architecture");
81+
} else if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V2) {
82+
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100)
83+
return createError("Unsupported NVPTX architecture");
84+
} else {
85+
return createError("Invalid CUDA ABI version");
86+
}
8087
}
8188

8289
return Header.e_machine == EMachine;

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
14421442
return ElfOrErr.takeError();
14431443

14441444
// Get the numeric value for the image's `sm_` value.
1445-
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
1445+
const auto Header = ElfOrErr->getELFFile().getHeader();
1446+
unsigned SM =
1447+
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
1448+
? Header.e_flags & ELF::EF_CUDA_SM
1449+
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 4;
14461450

14471451
CUdevice Device;
14481452
CUresult Res = cuDeviceGet(&Device, DeviceId);

0 commit comments

Comments
 (0)