From c82bff38f45c65d8fbac77bd4ef3369a16b768d9 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Wed, 16 Jul 2025 17:45:04 -0500 Subject: [PATCH 1/6] Exposed some AMDGCN raw buffer atomic intrinsics to clang. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 9 +++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 46 +++++++++++ ...cn-raw-buffer-atomics-gfx908-target-err.cl | 16 ++++ ...ns-amdgcn-raw-buffer-atomics-gfx90a-err.cl | 16 ++++ ...iltins-amdgcn-raw-buffer-atomics-gfx90a.cl | 76 +++++++++++++++++++ 5 files changed, 163 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 313c0e640d240..58e26f0a2c458 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -252,6 +252,15 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64, "ddQbiiIi", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts") + +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts") + TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index dcfdea648e93c..4ed6439c0d2d4 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -1439,6 +1439,52 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); } + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: { + llvm::Type *RetTy; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: + RetTy = Int32Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: + RetTy = FloatTy; + break; + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: + RetTy = DoubleTy; + break; + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: + RetTy = llvm::FixedVectorType::get(HalfTy, 2); + } + unsigned IID; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: + IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add; + break; + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: + IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: + IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax; + break; + } + llvm::Function *F = CGM.getIntrinsic(IID, RetTy); + return Builder.CreateCall( + F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), + EmitScalarExpr(E->getArg(4))}); + } case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: return emitBuiltinWithOneOverloadedType<2>( *this, E, Intrinsic::amdgcn_s_prefetch_data); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl new file mode 100644 index 0000000000000..3e72ef98de2c1 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s + +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset) { + i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' needs target feature gfx90a-insts}} + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature gfx90a-insts}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' needs target feature gfx90a-insts}} + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature gfx90a-insts}} + + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature gfx90a-insts}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature gfx90a-insts}} + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' needs target feature gfx90a-insts}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl new file mode 100644 index 0000000000000..b32c9cc620896 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s + +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset, int x) { + i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}} + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' must be a constant integer}} + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}} + + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}} + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' must be a constant integer}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl new file mode 100644 index 0000000000000..f13d4f3459d58 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local double @test_atomic_fadd_f64( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret double [[TMP0]] +// +double test_atomic_fadd_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret <2 x half> [[TMP0]] +// +float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret double [[TMP0]] +// +double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fmax_v2f16( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret <2 x half> [[TMP0]] +// +float16x2_t test_atomic_fmax_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(x, rsrc, offset, soffset, 0); +} From 59e1b303486a6c051deeeec6a91773bdfb82aa3e Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Thu, 17 Jul 2025 12:29:01 -0500 Subject: [PATCH 2/6] Refined choice of target features. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 16 ++-- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 9 +-- ...iltins-amdgcn-raw-buffer-atomic-add-err.cl | 10 +++ ...amdgcn-raw-buffer-atomic-add-target-err.cl | 9 +++ .../builtins-amdgcn-raw-buffer-atomic-add.cl | 36 +++++++++ ...iltins-amdgcn-raw-buffer-atomic-max-err.cl | 7 ++ ...amdgcn-raw-buffer-atomic-max-target-err.cl | 7 ++ .../builtins-amdgcn-raw-buffer-atomic-max.cl | 24 ++++++ ...cn-raw-buffer-atomics-gfx908-target-err.cl | 16 ---- ...ns-amdgcn-raw-buffer-atomics-gfx90a-err.cl | 16 ---- ...iltins-amdgcn-raw-buffer-atomics-gfx90a.cl | 76 ------------------- 11 files changed, 102 insertions(+), 124 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 58e26f0a2c458..9b1e618a490a5 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -163,6 +163,13 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "atomic-fadd-rtn-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts") + +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64") + TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts") TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts") @@ -252,15 +259,6 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64, "ddQbiiIi", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts") - -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts") - TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 4ed6439c0d2d4..0c9b8fafc8cd2 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -1441,11 +1441,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: { + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: { llvm::Type *RetTy; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: @@ -1455,13 +1453,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: RetTy = FloatTy; break; - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: RetTy = DoubleTy; break; case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: RetTy = llvm::FixedVectorType::get(HalfTy, 2); + break; } unsigned IID; switch (BuiltinID) { @@ -1469,13 +1466,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add; break; case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd; break; case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax; break; } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl new file mode 100644 index 0000000000000..5c5ae937797e8 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset, int x) { + i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}} + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}} + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl new file mode 100644 index 0000000000000..509a498af5efa --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, float16x2_t v2f16, int offset, int soffset) { + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}} + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl new file mode 100644 index 0000000000000..7641e10c89c04 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret <2 x half> [[TMP0]] +// +float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl new file mode 100644 index 0000000000000..aeb7426177d6d --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) { + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl new file mode 100644 index 0000000000000..3f1ad416d132f --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) { + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature atomic-fmin-fmax-global-f32}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature atomic-fmin-fmax-global-f64}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl new file mode 100644 index 0000000000000..d2a9314906a65 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl @@ -0,0 +1,24 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: amdgpu-registered-target + +// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0); +} + +// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64( +// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) +// CHECK-NEXT: ret double [[TMP0]] +// +double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl deleted file mode 100644 index 3e72ef98de2c1..0000000000000 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s - -// REQUIRES: amdgpu-registered-target - -typedef half __attribute__((ext_vector_type(2))) float16x2_t; - -void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset) { - i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' needs target feature gfx90a-insts}} - f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature gfx90a-insts}} - f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' needs target feature gfx90a-insts}} - v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature gfx90a-insts}} - - f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature gfx90a-insts}} - f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature gfx90a-insts}} - v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' needs target feature gfx90a-insts}} -} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl deleted file mode 100644 index b32c9cc620896..0000000000000 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s - -// REQUIRES: amdgpu-registered-target - -typedef half __attribute__((ext_vector_type(2))) float16x2_t; - -void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset, int x) { - i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}} - f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}} - f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' must be a constant integer}} - v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}} - - f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}} - f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}} - v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' must be a constant integer}} -} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl deleted file mode 100644 index f13d4f3459d58..0000000000000 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl +++ /dev/null @@ -1,76 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s - -// REQUIRES: amdgpu-registered-target - -typedef half __attribute__((ext_vector_type(2))) float16x2_t; - -// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret i32 [[TMP0]] -// -int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0); -} - -// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret float [[TMP0]] -// -float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0); -} - -// CHECK-LABEL: define dso_local double @test_atomic_fadd_f64( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret double [[TMP0]] -// -double test_atomic_fadd_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(x, rsrc, offset, soffset, 0); -} - -// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret <2 x half> [[TMP0]] -// -float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0); -} - -// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret float [[TMP0]] -// -float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0); -} - -// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret double [[TMP0]] -// -double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0); -} - -// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fmax_v2f16( -// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0) -// CHECK-NEXT: ret <2 x half> [[TMP0]] -// -float16x2_t test_atomic_fmax_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) { - return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(x, rsrc, offset, soffset, 0); -} From 16fcd493d898399e484f000fa9682555410a4edd Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Fri, 18 Jul 2025 11:56:35 -0500 Subject: [PATCH 3/6] Addressed reviewer feedback. --- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 44 +++---------------- .../builtins-amdgcn-raw-buffer-atomic-add.cl | 2 + ...iltins-amdgcn-raw-buffer-atomic-max-err.cl | 7 --- .../builtins-amdgcn-raw-buffer-atomic-max.cl | 5 ++- ...iltins-amdgcn-raw-buffer-atomic-add-err.cl | 0 ...amdgcn-raw-buffer-atomic-add-target-err.cl | 0 ...iltins-amdgcn-raw-buffer-atomic-max-err.cl | 9 ++++ ...amdgcn-raw-buffer-atomic-max-target-err.cl | 0 8 files changed, 21 insertions(+), 46 deletions(-) delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl rename clang/test/{CodeGenOpenCL => SemaOpenCL}/builtins-amdgcn-raw-buffer-atomic-add-err.cl (100%) rename clang/test/{CodeGenOpenCL => SemaOpenCL}/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl (100%) create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl rename clang/test/{CodeGenOpenCL => SemaOpenCL}/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl (100%) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 0c9b8fafc8cd2..0374d858160ce 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -1440,46 +1440,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); } case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: + return emitBuiltinWithOneOverloadedType<5>( + *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add); case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: + return emitBuiltinWithOneOverloadedType<5>( + *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd); case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: { - llvm::Type *RetTy; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: - RetTy = Int32Ty; - break; - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: - RetTy = FloatTy; - break; - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: - RetTy = DoubleTy; - break; - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: - RetTy = llvm::FixedVectorType::get(HalfTy, 2); - break; - } - unsigned IID; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: - IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add; - break; - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: - IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: - IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax; - break; - } - llvm::Function *F = CGM.getIntrinsic(IID, RetTy); - return Builder.CreateCall( - F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), - EmitScalarExpr(E->getArg(4))}); - } + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: + return emitBuiltinWithOneOverloadedType<5>( + *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax); case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: return emitBuiltinWithOneOverloadedType<2>( *this, E, Intrinsic::amdgcn_s_prefetch_data); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl index 7641e10c89c04..689046ace4479 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl @@ -1,5 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s // REQUIRES: amdgpu-registered-target diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl deleted file mode 100644 index aeb7426177d6d..0000000000000 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl +++ /dev/null @@ -1,7 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s -// REQUIRES: amdgpu-registered-target - -void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) { - f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}} - f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}} -} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl index d2a9314906a65..2b88ca2e3a4e6 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl @@ -1,6 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s - +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s // REQUIRES: amdgpu-registered-target // CHECK-LABEL: define dso_local float @test_atomic_fmax_f32( diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl similarity index 100% rename from clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl similarity index 100% rename from clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl new file mode 100644 index 0000000000000..4ad4eb50dbbd4 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) { + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl similarity index 100% rename from clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl From 877c00182080363ac114b24a0416f768be232cd5 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Fri, 1 Aug 2025 13:10:29 -0500 Subject: [PATCH 4/6] Added `atomic-fmin-fmax-global-f*` features to TargetParser. --- clang/test/CodeGen/link-builtin-bitcode.c | 8 +-- clang/test/CodeGenOpenCL/amdgpu-features.cl | 54 +++++++++---------- .../builtins-amdgcn-raw-buffer-atomic-max.cl | 6 +-- llvm/lib/TargetParser/TargetParser.cpp | 10 ++++ 4 files changed, 44 insertions(+), 34 deletions(-) diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index 963a3956ff808..9a5b6de3c3b38 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-LABEL: @attr_incompatible // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { -// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } +// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 42768ac8def1f..690471e4d1083 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -82,34 +82,34 @@ // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32" -// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" +// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" kernel void test() {} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl index 2b88ca2e3a4e6..5cfb099bf8acc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx10-1-generic -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx10-3-generic -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s // REQUIRES: amdgpu-registered-target // CHECK-LABEL: define dso_local float @test_atomic_fmax_f32( diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index d7e206ef8cd4f..a13e4f8695459 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -456,6 +456,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["atomic-global-pk-add-bf16-inst"] = true; Features["atomic-ds-pk-add-16-insts"] = true; Features["setprio-inc-wg-inst"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; break; case GK_GFX1201: case GK_GFX1200: @@ -484,6 +486,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fadd-rtn-insts"] = true; Features["image-insts"] = true; Features["fp8-conversion-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; break; case GK_GFX1153: case GK_GFX1152: @@ -512,6 +515,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fadd-rtn-insts"] = true; Features["image-insts"] = true; Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; break; case GK_GFX1036: case GK_GFX1035: @@ -540,6 +544,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memtime-inst"] = true; Features["gws"] = true; Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; break; case GK_GFX1012: case GK_GFX1011: @@ -565,6 +571,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memtime-inst"] = true; Features["gws"] = true; Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; break; case GK_GFX950: Features["bitop3-insts"] = true; @@ -616,11 +624,13 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memtime-inst"] = true; Features["gws"] = true; Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; break; case GK_GFX90A: Features["gfx90a-insts"] = true; Features["atomic-buffer-global-pk-add-f16-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; [[fallthrough]]; case GK_GFX908: Features["dot3-insts"] = true; From 1cdb6bc60c8836a7d7f4ba252032f876f03b2447 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Fri, 1 Aug 2025 13:39:37 -0500 Subject: [PATCH 5/6] Updated tests affected by merge --- clang/test/CodeGenOpenCL/amdgpu-features.cl | 2 +- .../amdgpu-readonly-features-written-with-no-target.cl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 18748e815d203..db8bc976a52ba 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -108,7 +108,7 @@ // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32" +// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl index 1542efa14efb9..1a0a30ca0b51e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl @@ -11,6 +11,6 @@ __attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {} // NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32 From dbd12bbb18058994125c5c888061f29c12ef1221 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Tue, 5 Aug 2025 09:52:07 -0500 Subject: [PATCH 6/6] Added `fmin`, and enabled features for SI/CI archs --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 3 +++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 4 ++++ clang/test/CodeGenOpenCL/amdgpu-features.cl | 18 +++++++++--------- ...-amdgcn-raw-buffer-atomic-fmin-fmax-err.cl} | 9 ++++++--- ...-raw-buffer-atomic-fmin-fmax-target-err.cl} | 2 ++ llvm/lib/TargetParser/TargetParser.cpp | 8 +++++++- 6 files changed, 31 insertions(+), 13 deletions(-) rename clang/test/SemaOpenCL/{builtins-amdgcn-raw-buffer-atomic-max-err.cl => builtins-amdgcn-raw-buffer-atomic-fmin-fmax-err.cl} (58%) rename clang/test/SemaOpenCL/{builtins-amdgcn-raw-buffer-atomic-max-target-err.cl => builtins-amdgcn-raw-buffer-atomic-fmin-fmax-target-err.cl} (60%) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index fced35aa6bc4b..80e1d1da8becd 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -164,10 +164,13 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t") + TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "atomic-fadd-rtn-insts") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 281162f4b2ecf..27ba0e0ec59f4 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -1494,6 +1494,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: return emitBuiltinWithOneOverloadedType<5>( *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd); + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32: + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64: + return emitBuiltinWithOneOverloadedType<5>( + *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin); case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: return emitBuiltinWithOneOverloadedType<5>( diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index db8bc976a52ba..e8d5b209ee10e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -62,15 +62,15 @@ // NOCPU-WAVE32: "target-features"="+wavefrontsize32" // NOCPU-WAVE64: "target-features"="+wavefrontsize64" -// GFX600: "target-features"="+s-memtime-inst,+wavefrontsize64" -// GFX601: "target-features"="+s-memtime-inst,+wavefrontsize64" -// GFX602: "target-features"="+s-memtime-inst,+wavefrontsize64" -// GFX700: "target-features"="+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX701: "target-features"="+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX702: "target-features"="+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX703: "target-features"="+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX704: "target-features"="+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX705: "target-features"="+ci-insts,+s-memtime-inst,+wavefrontsize64" +// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64 +// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64 +// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64 +// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" +// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" +// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" +// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" +// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" +// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" // GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-fmin-fmax-err.cl similarity index 58% rename from clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-fmin-fmax-err.cl index 4ad4eb50dbbd4..6442bd559d95f 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-fmin-fmax-err.cl @@ -1,9 +1,12 @@ -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx10-1-generic -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx10-3-generic -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -S -verify=expected -o - %s + // REQUIRES: amdgpu-registered-target void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) { + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32' must be a constant integer}} f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64' must be a constant integer}} f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}} } diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-fmin-fmax-target-err.cl similarity index 60% rename from clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-fmin-fmax-target-err.cl index 3f1ad416d132f..9b1701fb2a0c3 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-fmin-fmax-target-err.cl @@ -2,6 +2,8 @@ // REQUIRES: amdgpu-registered-target void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) { + f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32' needs target feature atomic-fmin-fmax-global-f32}} f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature atomic-fmin-fmax-global-f32}} + f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64' needs target feature atomic-fmin-fmax-global-f64}} f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature atomic-fmin-fmax-global-f64}} } diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index c80bca6b5f256..87e21a55ce121 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -667,7 +667,11 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["16-bit-insts"] = true; Features["dpp"] = true; Features["s-memrealtime"] = true; - [[fallthrough]]; + Features["ci-insts"] = true; + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + break; case GK_GFX705: case GK_GFX704: case GK_GFX703: @@ -682,6 +686,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["image-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; break; case GK_NONE: break;