From 15cb18c7d5f1c8e761e2cb65afa798d65529b2c2 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 18 Jul 2025 10:25:46 -0500 Subject: [PATCH 1/3] [LLVM] Update CUDA ELF flags for their new ABI Summary: We rely on these flags to do things in the runtime and print the contents of binaries correctly. CUDA updated their ABI encoding recently and we didn't handle that. it's a new ABI entirely so we just select on it when it shows up. Fixes: https://github.com/llvm/llvm-project/issues/148703 --- llvm/include/llvm/BinaryFormat/ELF.h | 21 +++++- llvm/include/llvm/Object/ELFObjectFile.h | 1 + llvm/lib/BinaryFormat/ELF.cpp | 2 + llvm/lib/Object/ELFObjectFile.cpp | 17 ++++- llvm/tools/llvm-readobj/ELFDumper.cpp | 71 ++++++++++--------- llvm/unittests/Object/ELFObjectFileTest.cpp | 3 +- .../plugins-nextgen/common/src/Utils/ELF.cpp | 15 ++-- offload/plugins-nextgen/cuda/src/rtl.cpp | 6 +- 8 files changed, 94 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index e4f82ad96a084..ad35d7f05d5da 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -362,6 +362,7 @@ enum { ELFOSABI_FENIXOS = 16, // FenixOS ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture. + ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture. ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime @@ -385,6 +386,12 @@ enum { ELFABIVERSION_AMDGPU_HSA_V6 = 4, }; +// CUDA OS ABI Version identification. +enum { + ELFABIVERSION_CUDA_V1 = 7, + ELFABIVERSION_CUDA_V2 = 8, +}; + #define ELF_RELOC(name, value) name = value, // X86_64 relocations. @@ -921,7 +928,7 @@ enum { // NVPTX specific e_flags. enum : unsigned { - // Processor selection mask for EF_CUDA_SM* values. + // Processor selection mask for EF_CUDA_SM* values prior to blackwell. EF_CUDA_SM = 0xff, // SM based processor values. @@ -954,12 +961,22 @@ enum : unsigned { // The target is using 64-bit addressing. EF_CUDA_64BIT_ADDRESS = 0x400, // Set when using the sm_90a processor. - EF_CUDA_ACCELERATORS = 0x800, + EF_CUDA_ACCELERATORS_V1 = 0x800, // Undocumented software feature. EF_CUDA_SW_FLAG_V2 = 0x1000, // Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values. EF_CUDA_VIRTUAL_SM = 0xff0000, + + // Processor selection mask for EF_CUDA_SM* values following blackwell. + EF_CUDA_SM_MASK = 0xff00, + + // SM based processor values. + EF_CUDA_SM100 = 0x6400, + EF_CUDA_SM120 = 0x7800, + + // Set when using an accelerator variant like sm_100a. + EF_CUDA_ACCELERATORS = 0x8, }; // ELF Relocation types for BPF diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index a3aa0d9c137a2..ced1afdd4cc6a 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1479,6 +1479,7 @@ template Triple::OSType ELFObjectFile::getOS() const { case ELF::ELFOSABI_OPENBSD: return Triple::OpenBSD; case ELF::ELFOSABI_CUDA: + case ELF::ELFOSABI_CUDA_V2: return Triple::CUDA; case ELF::ELFOSABI_AMDGPU_HSA: return Triple::AMDHSA; diff --git a/llvm/lib/BinaryFormat/ELF.cpp b/llvm/lib/BinaryFormat/ELF.cpp index 0ad1a09429e7c..4c96deb0655e2 100644 --- a/llvm/lib/BinaryFormat/ELF.cpp +++ b/llvm/lib/BinaryFormat/ELF.cpp @@ -652,6 +652,7 @@ uint8_t ELF::convertNameToOSABI(StringRef Name) { .StartsWith("fenixos", ELFOSABI_FENIXOS) .StartsWith("cloudabi", ELFOSABI_CLOUDABI) .StartsWith("cuda", ELFOSABI_CUDA) + .StartsWith("cuda", ELFOSABI_CUDA_V2) .StartsWith("amdhsa", ELFOSABI_AMDGPU_HSA) .StartsWith("amdpal", ELFOSABI_AMDGPU_PAL) .StartsWith("mesa3d", ELFOSABI_AMDGPU_MESA3D) @@ -696,6 +697,7 @@ StringRef ELF::convertOSABIToName(uint8_t OSABI) { case ELFOSABI_CLOUDABI: return "cloudabi"; case ELFOSABI_CUDA: + case ELFOSABI_CUDA_V2: return "cuda"; case ELFOSABI_AMDGPU_HSA: return "amdhsa"; diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 5597d7db6426d..0919c6aad74f2 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -620,7 +620,9 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { StringRef ELFObjectFileBase::getNVPTXCPUName() const { assert(getEMachine() == ELF::EM_CUDA); - unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM; + unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1 + ? getPlatformFlags() & ELF::EF_CUDA_SM + : getPlatformFlags() & ELF::EF_CUDA_SM_MASK; switch (SM) { // Fermi architecture. @@ -679,7 +681,18 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const { // Hopper architecture. case ELF::EF_CUDA_SM90: - return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90"; + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a" + : "sm_90"; + + // Blackwell architecture. + case ELF::EF_CUDA_SM100: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a" + : "sm_100"; + + // Rubin architecture. + case ELF::EF_CUDA_SM120: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a" + : "sm_120"; default: llvm_unreachable("Unknown EF_CUDA_SM value"); } diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ccc64fec12958..ea1b4e690c1c2 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1083,26 +1083,26 @@ const EnumEntry ElfObjectFileType[] = { }; const EnumEntry ElfOSABI[] = { - {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE}, - {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX}, - {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD}, - {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX}, - {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD}, - {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS}, - {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX}, - {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX}, - {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD}, - {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64}, - {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO}, - {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD}, - {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS}, - {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK}, - {"AROS", "AROS", ELF::ELFOSABI_AROS}, - {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, - {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, - {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, - {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE} -}; + {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE}, + {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX}, + {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD}, + {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX}, + {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD}, + {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS}, + {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX}, + {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX}, + {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD}, + {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64}, + {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO}, + {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD}, + {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS}, + {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK}, + {"AROS", "AROS", ELF::ELFOSABI_AROS}, + {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, + {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, + {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, + {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2}, + {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}}; const EnumEntry AMDGPUElfOSABI[] = { {"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA}, @@ -1667,16 +1667,17 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { }; const EnumEntry ElfHeaderNVPTXFlags[] = { - ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"), - ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"), - ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"), - ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"), - ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"), - ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"), - ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"), - ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"), - ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"), - ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"), + ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"), + ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"), + ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"), + ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"), + ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"), + ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"), + ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"), + ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"), + ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"), + ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"), + ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM120, "sm_120"), }; const EnumEntry ElfHeaderRISCVFlags[] = { @@ -3651,10 +3652,16 @@ template void GNUELFDumper::printFileHeaders() { else if (e.e_machine == EM_XTENSA) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags), unsigned(ELF::EF_XTENSA_MACH)); - else if (e.e_machine == EM_CUDA) + else if (e.e_machine == EM_CUDA) { ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags), unsigned(ELF::EF_CUDA_SM)); - else if (e.e_machine == EM_AMDGPU) { + if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 && + (e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1)) + ElfFlags += "a"; + else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 && + (e.e_flags & ELF::EF_CUDA_ACCELERATORS)) + ElfFlags += "a"; + } else if (e.e_machine == EM_AMDGPU) { switch (e.e_ident[ELF::EI_ABIVERSION]) { default: break; diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 25b390758f172..7f933dac18689 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -295,7 +295,8 @@ TEST(ELFObjectFileTest, CheckOSAndTriple) { {ELF::EM_X86_64, ELF::ELFOSABI_AIX, "x86_64--aix"}, {ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"}, {ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"}, - {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}}; + {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}, + {ELF::EM_CUDA, ELF::ELFOSABI_CUDA_V2, "nvptx64-nvidia-cuda"}}; for (auto [Machine, OS, Triple] : Formats) { const DataForTest D(ELF::ELFCLASS64, ELF::ELFDATA2LSB, Machine, OS, ELF::EF_AMDGPU_MACH_AMDGCN_LAST); diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index dfec55432f202..d97b6c630929c 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -73,10 +73,17 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC) return createError("Unsupported AMDGPU architecture"); } else if (Header.e_machine == EM_CUDA) { - if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) - return createError("Invalid CUDA addressing mode"); - if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) - return createError("Unsupported NVPTX architecture"); + if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V1) { + if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) + return createError("Invalid CUDA addressing mode"); + if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) + return createError("Unsupported NVPTX architecture"); + } else if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V2) { + if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100) + return createError("Unsupported NVPTX architecture"); + } else { + return createError("Invalid CUDA ABI version"); + } } return Header.e_machine == EMachine; diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 728bf07c572bb..883dea263fc53 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1444,7 +1444,11 @@ struct CUDAPluginTy final : public GenericPluginTy { return ElfOrErr.takeError(); // Get the numeric value for the image's `sm_` value. - auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM; + const auto Header = ElfOrErr->getELFFile().getHeader(); + unsigned SM = + Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 + ? Header.e_flags & ELF::EF_CUDA_SM + : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 4; CUdevice Device; CUresult Res = cuDeviceGet(&Device, DeviceId); From a59408f06c6ec69c3d9cedbac3f4c1a40d3a0251 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 21 Jul 2025 08:51:44 -0500 Subject: [PATCH 2/3] fixes --- llvm/lib/BinaryFormat/ELF.cpp | 2 -- llvm/tools/llvm-readobj/ELFDumper.cpp | 1 - llvm/unittests/Object/ELFObjectFileTest.cpp | 3 +-- .../plugins-nextgen/common/src/Utils/ELF.cpp | 20 +++++++++---------- offload/plugins-nextgen/cuda/src/rtl.cpp | 2 +- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/lib/BinaryFormat/ELF.cpp b/llvm/lib/BinaryFormat/ELF.cpp index 4c96deb0655e2..0ad1a09429e7c 100644 --- a/llvm/lib/BinaryFormat/ELF.cpp +++ b/llvm/lib/BinaryFormat/ELF.cpp @@ -652,7 +652,6 @@ uint8_t ELF::convertNameToOSABI(StringRef Name) { .StartsWith("fenixos", ELFOSABI_FENIXOS) .StartsWith("cloudabi", ELFOSABI_CLOUDABI) .StartsWith("cuda", ELFOSABI_CUDA) - .StartsWith("cuda", ELFOSABI_CUDA_V2) .StartsWith("amdhsa", ELFOSABI_AMDGPU_HSA) .StartsWith("amdpal", ELFOSABI_AMDGPU_PAL) .StartsWith("mesa3d", ELFOSABI_AMDGPU_MESA3D) @@ -697,7 +696,6 @@ StringRef ELF::convertOSABIToName(uint8_t OSABI) { case ELFOSABI_CLOUDABI: return "cloudabi"; case ELFOSABI_CUDA: - case ELFOSABI_CUDA_V2: return "cuda"; case ELFOSABI_AMDGPU_HSA: return "amdhsa"; diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ea1b4e690c1c2..2d681a39461c0 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1101,7 +1101,6 @@ const EnumEntry ElfOSABI[] = { {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, - {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2}, {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}}; const EnumEntry AMDGPUElfOSABI[] = { diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 7f933dac18689..25b390758f172 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -295,8 +295,7 @@ TEST(ELFObjectFileTest, CheckOSAndTriple) { {ELF::EM_X86_64, ELF::ELFOSABI_AIX, "x86_64--aix"}, {ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"}, {ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"}, - {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}, - {ELF::EM_CUDA, ELF::ELFOSABI_CUDA_V2, "nvptx64-nvidia-cuda"}}; + {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}}; for (auto [Machine, OS, Triple] : Formats) { const DataForTest D(ELF::ELFCLASS64, ELF::ELFDATA2LSB, Machine, OS, ELF::EF_AMDGPU_MACH_AMDGCN_LAST); diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index d97b6c630929c..87b3cbfcf88a9 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -60,29 +60,29 @@ static Expected checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { const auto Header = ELFObj.getELFFile().getHeader(); if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN) - return createError("Only executable ELF files are supported"); + return createError("only executable ELF files are supported"); if (Header.e_machine == EM_AMDGPU) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) - return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); + return createError("invalid AMD OS/ABI, must be AMDGPU_HSA"); if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) - return createError("Invalid AMD ABI version, must be version 5 or above"); + return createError("invalid AMD ABI version, must be version 5 or above"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC) - return createError("Unsupported AMDGPU architecture"); + return createError("unsupported AMDGPU architecture"); } else if (Header.e_machine == EM_CUDA) { - if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V1) { + if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) { if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) - return createError("Invalid CUDA addressing mode"); + return createError("invalid CUDA addressing mode"); if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) - return createError("Unsupported NVPTX architecture"); - } else if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V2) { + return createError("unsupported NVPTX architecture"); + } else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) { if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100) - return createError("Unsupported NVPTX architecture"); + return createError("unsupported NVPTX architecture"); } else { - return createError("Invalid CUDA ABI version"); + return createError("invalid CUDA ABI version"); } } diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 883dea263fc53..5a391a4d36006 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1448,7 +1448,7 @@ struct CUDAPluginTy final : public GenericPluginTy { unsigned SM = Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 ? Header.e_flags & ELF::EF_CUDA_SM - : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 4; + : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8; CUdevice Device; CUresult Res = cuDeviceGet(&Device, DeviceId); From 765d22d86a68946bc19cc7aaa896070cbc29c30c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 21 Jul 2025 09:40:47 -0500 Subject: [PATCH 3/3] fix --- offload/plugins-nextgen/common/src/Utils/ELF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index 87b3cbfcf88a9..b0ee1984c42ce 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -79,7 +79,7 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) return createError("unsupported NVPTX architecture"); } else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) { - if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100) + if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100) return createError("unsupported NVPTX architecture"); } else { return createError("invalid CUDA ABI version");