Skip to content

Commit 9b605ec

Browse files
committed
Add builtins for wave reduction intrinsics
1 parent 02d5851 commit 9b605ec

File tree

3 files changed

+461
-0
lines changed

3 files changed

+461
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
351351
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
352352
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
353353

354+
//===----------------------------------------------------------------------===//
355+
356+
// Wave Reduction builtins.
357+
358+
//===----------------------------------------------------------------------===//
359+
360+
BUILTIN(__builtin_amdgcn_wave_reduce_add_i32, "iii", "nc")
361+
BUILTIN(__builtin_amdgcn_wave_reduce_sub_i32, "iii", "nc")
362+
BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "iii", "nc")
363+
BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "UiUii", "nc")
364+
BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "iii", "nc")
365+
BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "UiUii", "nc")
366+
BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "iii", "nc")
367+
BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "iii", "nc")
368+
BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "iii", "nc")
369+
BUILTIN(__builtin_amdgcn_wave_reduce_add_i64, "WiWii", "nc")
370+
BUILTIN(__builtin_amdgcn_wave_reduce_sub_i64, "WiWii", "nc")
371+
BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWii", "nc")
372+
BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUii", "nc")
373+
BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWii", "nc")
374+
BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUii", "nc")
375+
BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWii", "nc")
376+
BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWii", "nc")
377+
BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWii", "nc")
378+
354379
//===----------------------------------------------------------------------===//
355380
// R600-NI only builtins.
356381
//===----------------------------------------------------------------------===//

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
295295
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
296296
}
297297

298+
static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
299+
switch (BuiltinID) {
300+
default:
301+
llvm_unreachable("Unknown BuiltinID for wave reduction");
302+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32:
303+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64:
304+
return Intrinsic::amdgcn_wave_reduce_add;
305+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32:
306+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64:
307+
return Intrinsic::amdgcn_wave_reduce_sub;
308+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
309+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
310+
return Intrinsic::amdgcn_wave_reduce_min;
311+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
312+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
313+
return Intrinsic::amdgcn_wave_reduce_umin;
314+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
315+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
316+
return Intrinsic::amdgcn_wave_reduce_max;
317+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
318+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
319+
return Intrinsic::amdgcn_wave_reduce_umax;
320+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
321+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
322+
return Intrinsic::amdgcn_wave_reduce_and;
323+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
324+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
325+
return Intrinsic::amdgcn_wave_reduce_or;
326+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
327+
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
328+
return Intrinsic::amdgcn_wave_reduce_xor;
329+
}
330+
}
331+
298332
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
299333
const CallExpr *E) {
300334
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
301335
llvm::SyncScope::ID SSID;
302336
switch (BuiltinID) {
337+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32:
338+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32:
339+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
340+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
341+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
342+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
343+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
344+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
345+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
346+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64:
347+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64:
348+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
349+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
350+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
351+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
352+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
353+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
354+
case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
355+
Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID);
356+
llvm::Value *Value = EmitScalarExpr(E->getArg(0));
357+
llvm::Value *Strategy = EmitScalarExpr(E->getArg(1));
358+
llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
359+
return Builder.CreateCall(F, {Value, Strategy});
360+
}
303361
case AMDGPU::BI__builtin_amdgcn_div_scale:
304362
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
305363
// Translate from the intrinsics's struct return to the builtin's out

0 commit comments

Comments
 (0)