Skip to content

Commit b53be5f

Browse files
authored
[LLVM] Update CUDA ELF flags for their new ABI (#149534)
Summary: We rely on these flags to do things in the runtime and print the contents of binaries correctly. CUDA updated their ABI encoding recently and we didn't handle that. it's a new ABI entirely so we just select on it when it shows up. Fixes: #148703
1 parent 8f9ed78 commit b53be5f

File tree

6 files changed

+93
-45
lines changed

6 files changed

+93
-45
lines changed

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ enum {
362362
ELFOSABI_FENIXOS = 16, // FenixOS
363363
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
364364
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
365+
ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
365366
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
366367
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
367368
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
385386
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
386387
};
387388

389+
// CUDA OS ABI Version identification.
390+
enum {
391+
ELFABIVERSION_CUDA_V1 = 7,
392+
ELFABIVERSION_CUDA_V2 = 8,
393+
};
394+
388395
#define ELF_RELOC(name, value) name = value,
389396

390397
// X86_64 relocations.
@@ -921,7 +928,7 @@ enum {
921928

922929
// NVPTX specific e_flags.
923930
enum : unsigned {
924-
// Processor selection mask for EF_CUDA_SM* values.
931+
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
925932
EF_CUDA_SM = 0xff,
926933

927934
// SM based processor values.
@@ -954,12 +961,22 @@ enum : unsigned {
954961
// The target is using 64-bit addressing.
955962
EF_CUDA_64BIT_ADDRESS = 0x400,
956963
// Set when using the sm_90a processor.
957-
EF_CUDA_ACCELERATORS = 0x800,
964+
EF_CUDA_ACCELERATORS_V1 = 0x800,
958965
// Undocumented software feature.
959966
EF_CUDA_SW_FLAG_V2 = 0x1000,
960967

961968
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
962969
EF_CUDA_VIRTUAL_SM = 0xff0000,
970+
971+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
972+
EF_CUDA_SM_MASK = 0xff00,
973+
974+
// SM based processor values.
975+
EF_CUDA_SM100 = 0x6400,
976+
EF_CUDA_SM120 = 0x7800,
977+
978+
// Set when using an accelerator variant like sm_100a.
979+
EF_CUDA_ACCELERATORS = 0x8,
963980
};
964981

965982
// ELF Relocation types for BPF

llvm/include/llvm/Object/ELFObjectFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
14791479
case ELF::ELFOSABI_OPENBSD:
14801480
return Triple::OpenBSD;
14811481
case ELF::ELFOSABI_CUDA:
1482+
case ELF::ELFOSABI_CUDA_V2:
14821483
return Triple::CUDA;
14831484
case ELF::ELFOSABI_AMDGPU_HSA:
14841485
return Triple::AMDHSA;

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,9 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
620620

621621
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
622622
assert(getEMachine() == ELF::EM_CUDA);
623-
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
623+
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
624+
? getPlatformFlags() & ELF::EF_CUDA_SM
625+
: getPlatformFlags() & ELF::EF_CUDA_SM_MASK;
624626

625627
switch (SM) {
626628
// Fermi architecture.
@@ -679,7 +681,18 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
679681

680682
// Hopper architecture.
681683
case ELF::EF_CUDA_SM90:
682-
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
684+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
685+
: "sm_90";
686+
687+
// Blackwell architecture.
688+
case ELF::EF_CUDA_SM100:
689+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
690+
: "sm_100";
691+
692+
// Rubin architecture.
693+
case ELF::EF_CUDA_SM120:
694+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
695+
: "sm_120";
683696
default:
684697
llvm_unreachable("Unknown EF_CUDA_SM value");
685698
}

llvm/tools/llvm-readobj/ELFDumper.cpp

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,26 +1087,25 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
10871087
};
10881088

10891089
const EnumEntry<unsigned> ElfOSABI[] = {
1090-
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1091-
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1092-
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1093-
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1094-
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1095-
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1096-
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1097-
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1098-
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1099-
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1100-
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1101-
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1102-
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1103-
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1104-
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1105-
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1106-
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1107-
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1108-
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
1109-
};
1090+
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1091+
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1092+
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1093+
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1094+
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1095+
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1096+
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1097+
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1098+
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1099+
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1100+
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1101+
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1102+
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1103+
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1104+
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1105+
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1106+
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1107+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1108+
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
11101109

11111110
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
11121111
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
@@ -1671,16 +1670,17 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
16711670
};
16721671

16731672
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
1674-
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1675-
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1676-
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1677-
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1678-
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1679-
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1680-
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1681-
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1682-
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1683-
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1673+
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1674+
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1675+
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1676+
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1677+
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1678+
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1679+
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1680+
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1681+
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1682+
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1683+
ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
16841684
};
16851685

16861686
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3655,10 +3655,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
36553655
else if (e.e_machine == EM_XTENSA)
36563656
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
36573657
unsigned(ELF::EF_XTENSA_MACH));
3658-
else if (e.e_machine == EM_CUDA)
3658+
else if (e.e_machine == EM_CUDA) {
36593659
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
36603660
unsigned(ELF::EF_CUDA_SM));
3661-
else if (e.e_machine == EM_AMDGPU) {
3661+
if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
3662+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
3663+
ElfFlags += "a";
3664+
else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
3665+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS))
3666+
ElfFlags += "a";
3667+
} else if (e.e_machine == EM_AMDGPU) {
36623668
switch (e.e_ident[ELF::EI_ABIVERSION]) {
36633669
default:
36643670
break;

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,23 +60,30 @@ static Expected<bool>
6060
checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
6161
const auto Header = ELFObj.getELFFile().getHeader();
6262
if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
63-
return createError("Only executable ELF files are supported");
63+
return createError("only executable ELF files are supported");
6464

6565
if (Header.e_machine == EM_AMDGPU) {
6666
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
67-
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
67+
return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
6868
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
6969
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
70-
return createError("Invalid AMD ABI version, must be version 5 or above");
70+
return createError("invalid AMD ABI version, must be version 5 or above");
7171
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
7272
(Header.e_flags & EF_AMDGPU_MACH) >
7373
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
74-
return createError("Unsupported AMDGPU architecture");
74+
return createError("unsupported AMDGPU architecture");
7575
} else if (Header.e_machine == EM_CUDA) {
76-
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
77-
return createError("Invalid CUDA addressing mode");
78-
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
79-
return createError("Unsupported NVPTX architecture");
76+
if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
77+
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
78+
return createError("invalid CUDA addressing mode");
79+
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
80+
return createError("unsupported NVPTX architecture");
81+
} else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
82+
if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)
83+
return createError("unsupported NVPTX architecture");
84+
} else {
85+
return createError("invalid CUDA ABI version");
86+
}
8087
}
8188

8289
return Header.e_machine == EMachine;

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1444,7 +1444,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
14441444
return ElfOrErr.takeError();
14451445

14461446
// Get the numeric value for the image's `sm_` value.
1447-
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
1447+
const auto Header = ElfOrErr->getELFFile().getHeader();
1448+
unsigned SM =
1449+
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
1450+
? Header.e_flags & ELF::EF_CUDA_SM
1451+
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8;
14481452

14491453
CUdevice Device;
14501454
CUresult Res = cuDeviceGet(&Device, DeviceId);

0 commit comments

Comments
 (0)