-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[CUDA] add support for targeting sm_103/sm_121 with CUDA-12.9 #151587
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang-driver Author: Artem Belevich (Artem-B) ChangesFull diff: https://github.com/llvm/llvm-project/pull/151587.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 6e531eff6dd1d..2d6fa1771014d 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,13 +21,17 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
!strconcat(f, "|", newer.Features));
}
+let Features = "sm_121a" in def SM_121a : SMFeatures;
let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_103a" in def SM_103a : SMFeatures;
let Features = "sm_101a" in def SM_101a : SMFeatures;
let Features = "sm_100a" in def SM_100a : SMFeatures;
let Features = "sm_90a" in def SM_90a : SMFeatures;
-def SM_120 : SM<"120", [SM_120a]>;
-def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_121 : SM<"121", [SM_121a]>;
+def SM_120 : SM<"120", [SM_120a, SM_121]>;
+def SM_103 : SM<"103", [SM_103a, SM_120]>;
+def SM_101 : SM<"101", [SM_101a, SM_103]>;
def SM_100 : SM<"100", [SM_100a, SM_101]>;
def SM_90 : SM<"90", [SM_90a, SM_100]>;
def SM_89 : SM<"89", [SM_90]>;
@@ -50,8 +54,9 @@ class PTX<string version, PTXFeatures newer> : PTXFeatures {
let Features = !strconcat("ptx", version, "|", newer.Features);
}
-let Features = "ptx87" in def PTX87 : PTXFeatures;
+let Features = "ptx88" in def PTX88 : PTXFeatures;
+def PTX87 : PTX<"87", PTX88>;
def PTX86 : PTX<"86", PTX87>;
def PTX85 : PTX<"85", PTX86>;
def PTX84 : PTX<"84", PTX85>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index d6a22a7af559b..81a792d3776aa 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -47,9 +47,10 @@ enum class CudaVersion {
CUDA_125,
CUDA_126,
CUDA_128,
- FULLY_SUPPORTED = CUDA_123,
+ CUDA_129,
+ FULLY_SUPPORTED = CUDA_128,
PARTIALLY_SUPPORTED =
- CUDA_128, // Partially supported. Proceed with a warning.
+ CUDA_129, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h
index 4dda3ec2216fa..387a68452ea0c 100644
--- a/clang/include/clang/Basic/OffloadArch.h
+++ b/clang/include/clang/Basic/OffloadArch.h
@@ -45,8 +45,12 @@ enum class OffloadArch {
SM_100a,
SM_101,
SM_101a,
+ SM_103,
+ SM_103a,
SM_120,
SM_120a,
+ SM_121,
+ SM_121a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 53b36d338fcec..dc81b71414b92 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -44,6 +44,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
CUDA_ENTRY(12, 8),
+ CUDA_ENTRY(12, 9),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
@@ -119,6 +120,11 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
return CudaVersion::CUDA_128;
+ case OffloadArch::SM_103:
+ case OffloadArch::SM_103a:
+ case OffloadArch::SM_121:
+ case OffloadArch::SM_121a:
+ return CudaVersion::CUDA_129;
default:
llvm_unreachable("invalid enum");
}
diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp
index dce9ffaedb905..434817813c470 100644
--- a/clang/lib/Basic/OffloadArch.cpp
+++ b/clang/lib/Basic/OffloadArch.cpp
@@ -33,8 +33,12 @@ static const OffloadArchToStringMap ArchNames[] = {
SM(100a), // Blackwell
SM(101), // Blackwell
SM(101a), // Blackwell
+ SM(103), // Blackwell
+ SM(103a), // Blackwell
SM(120), // Blackwell
SM(120a), // Blackwell
+ SM(121), // Blackwell
+ SM(121a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 54b39fd072a89..ce1e33b5ddf07 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -296,9 +296,15 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
return "1010";
+ case OffloadArch::SM_103:
+ case OffloadArch::SM_103a:
+ return "1030";
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
return "1200";
+ case OffloadArch::SM_121:
+ case OffloadArch::SM_121a:
+ return "1210";
}
llvm_unreachable("unhandled OffloadArch");
}();
@@ -307,7 +313,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_90a:
case OffloadArch::SM_100a:
case OffloadArch::SM_101a:
+ case OffloadArch::SM_103a:
case OffloadArch::SM_120a:
+ case OffloadArch::SM_121a:
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
break;
default:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index e25b6948d30f8..04c91920c9b7e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,8 +2278,12 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_100a:
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
+ case OffloadArch::SM_103:
+ case OffloadArch::SM_103a:
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
+ case OffloadArch::SM_121:
+ case OffloadArch::SM_121a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 7d803beb7aa3c..1f0b478c02b25 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -88,6 +88,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_126;
if (raw_version < 12090)
return CudaVersion::CUDA_128;
+ if (raw_version < 13000)
+ return CudaVersion::CUDA_129;
return CudaVersion::NEW;
}
@@ -683,6 +685,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
+ CASE_CUDA_VERSION(129, 88);
CASE_CUDA_VERSION(128, 87);
CASE_CUDA_VERSION(126, 85);
CASE_CUDA_VERSION(125, 85);
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index b5209ffc5f0a0..b90f26e8b348d 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -30,8 +30,12 @@
// CHECK-SAME: {{^}}, sm_100a
// CHECK-SAME: {{^}}, sm_101
// CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_103
+// CHECK-SAME: {{^}}, sm_103a
// CHECK-SAME: {{^}}, sm_120
// CHECK-SAME: {{^}}, sm_120a
+// CHECK-SAME: {{^}}, sm_121
+// CHECK-SAME: {{^}}, sm_121a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
|
@llvm/pr-subscribers-clang Author: Artem Belevich (Artem-B) ChangesFull diff: https://github.com/llvm/llvm-project/pull/151587.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 6e531eff6dd1d..2d6fa1771014d 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,13 +21,17 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
!strconcat(f, "|", newer.Features));
}
+let Features = "sm_121a" in def SM_121a : SMFeatures;
let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_103a" in def SM_103a : SMFeatures;
let Features = "sm_101a" in def SM_101a : SMFeatures;
let Features = "sm_100a" in def SM_100a : SMFeatures;
let Features = "sm_90a" in def SM_90a : SMFeatures;
-def SM_120 : SM<"120", [SM_120a]>;
-def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_121 : SM<"121", [SM_121a]>;
+def SM_120 : SM<"120", [SM_120a, SM_121]>;
+def SM_103 : SM<"103", [SM_103a, SM_120]>;
+def SM_101 : SM<"101", [SM_101a, SM_103]>;
def SM_100 : SM<"100", [SM_100a, SM_101]>;
def SM_90 : SM<"90", [SM_90a, SM_100]>;
def SM_89 : SM<"89", [SM_90]>;
@@ -50,8 +54,9 @@ class PTX<string version, PTXFeatures newer> : PTXFeatures {
let Features = !strconcat("ptx", version, "|", newer.Features);
}
-let Features = "ptx87" in def PTX87 : PTXFeatures;
+let Features = "ptx88" in def PTX88 : PTXFeatures;
+def PTX87 : PTX<"87", PTX88>;
def PTX86 : PTX<"86", PTX87>;
def PTX85 : PTX<"85", PTX86>;
def PTX84 : PTX<"84", PTX85>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index d6a22a7af559b..81a792d3776aa 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -47,9 +47,10 @@ enum class CudaVersion {
CUDA_125,
CUDA_126,
CUDA_128,
- FULLY_SUPPORTED = CUDA_123,
+ CUDA_129,
+ FULLY_SUPPORTED = CUDA_128,
PARTIALLY_SUPPORTED =
- CUDA_128, // Partially supported. Proceed with a warning.
+ CUDA_129, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h
index 4dda3ec2216fa..387a68452ea0c 100644
--- a/clang/include/clang/Basic/OffloadArch.h
+++ b/clang/include/clang/Basic/OffloadArch.h
@@ -45,8 +45,12 @@ enum class OffloadArch {
SM_100a,
SM_101,
SM_101a,
+ SM_103,
+ SM_103a,
SM_120,
SM_120a,
+ SM_121,
+ SM_121a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 53b36d338fcec..dc81b71414b92 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -44,6 +44,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
CUDA_ENTRY(12, 8),
+ CUDA_ENTRY(12, 9),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
@@ -119,6 +120,11 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
return CudaVersion::CUDA_128;
+ case OffloadArch::SM_103:
+ case OffloadArch::SM_103a:
+ case OffloadArch::SM_121:
+ case OffloadArch::SM_121a:
+ return CudaVersion::CUDA_129;
default:
llvm_unreachable("invalid enum");
}
diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp
index dce9ffaedb905..434817813c470 100644
--- a/clang/lib/Basic/OffloadArch.cpp
+++ b/clang/lib/Basic/OffloadArch.cpp
@@ -33,8 +33,12 @@ static const OffloadArchToStringMap ArchNames[] = {
SM(100a), // Blackwell
SM(101), // Blackwell
SM(101a), // Blackwell
+ SM(103), // Blackwell
+ SM(103a), // Blackwell
SM(120), // Blackwell
SM(120a), // Blackwell
+ SM(121), // Blackwell
+ SM(121a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 54b39fd072a89..ce1e33b5ddf07 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -296,9 +296,15 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
return "1010";
+ case OffloadArch::SM_103:
+ case OffloadArch::SM_103a:
+ return "1030";
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
return "1200";
+ case OffloadArch::SM_121:
+ case OffloadArch::SM_121a:
+ return "1210";
}
llvm_unreachable("unhandled OffloadArch");
}();
@@ -307,7 +313,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_90a:
case OffloadArch::SM_100a:
case OffloadArch::SM_101a:
+ case OffloadArch::SM_103a:
case OffloadArch::SM_120a:
+ case OffloadArch::SM_121a:
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
break;
default:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index e25b6948d30f8..04c91920c9b7e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,8 +2278,12 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_100a:
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
+ case OffloadArch::SM_103:
+ case OffloadArch::SM_103a:
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
+ case OffloadArch::SM_121:
+ case OffloadArch::SM_121a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 7d803beb7aa3c..1f0b478c02b25 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -88,6 +88,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_126;
if (raw_version < 12090)
return CudaVersion::CUDA_128;
+ if (raw_version < 13000)
+ return CudaVersion::CUDA_129;
return CudaVersion::NEW;
}
@@ -683,6 +685,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
+ CASE_CUDA_VERSION(129, 88);
CASE_CUDA_VERSION(128, 87);
CASE_CUDA_VERSION(126, 85);
CASE_CUDA_VERSION(125, 85);
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index b5209ffc5f0a0..b90f26e8b348d 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -30,8 +30,12 @@
// CHECK-SAME: {{^}}, sm_100a
// CHECK-SAME: {{^}}, sm_101
// CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_103
+// CHECK-SAME: {{^}}, sm_103a
// CHECK-SAME: {{^}}, sm_120
// CHECK-SAME: {{^}}, sm_120a
+// CHECK-SAME: {{^}}, sm_121
+// CHECK-SAME: {{^}}, sm_121a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, could you do a quick check to verify that this prints the appropriate arch? I want to make sure they didn't break it yet again.
clang foo.c --target=nvptx64-nvidia-cuda -march=sm_121a && llvm-readelf -h a.out
✅ With the latest revision this PR passed the C/C++ code formatter. |
It seems to work, but for some reason reports the binary as |
#149534 should be from this PR. You'll probably need to add those new flags to the enum list and then handle them in the getCPUName function. That can be a separate PR if you don't want to do it here. |
Looks like sm_101/sm_103 are also mishandled in the similar way -- both are reported as sm_100. I think this should be handled separately from this patch. |
Yup, guess i missed those, but they all need their own enum value. Should be an easy follow-up. |
No description provided.