@@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
295
295
Inst->setMetadata (LLVMContext::MD_mmra, MMRAMetadata::getMD (Ctx, MMRAs));
296
296
}
297
297
298
+ static Intrinsic::ID getIntrinsicIDforWaveReduction (unsigned BuiltinID) {
299
+ switch (BuiltinID) {
300
+ default :
301
+ llvm_unreachable (" Unknown BuiltinID for wave reduction" );
302
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32:
303
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64:
304
+ return Intrinsic::amdgcn_wave_reduce_add;
305
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32:
306
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64:
307
+ return Intrinsic::amdgcn_wave_reduce_sub;
308
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
309
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
310
+ return Intrinsic::amdgcn_wave_reduce_min;
311
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
312
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
313
+ return Intrinsic::amdgcn_wave_reduce_umin;
314
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
315
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
316
+ return Intrinsic::amdgcn_wave_reduce_max;
317
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
318
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
319
+ return Intrinsic::amdgcn_wave_reduce_umax;
320
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
321
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
322
+ return Intrinsic::amdgcn_wave_reduce_and;
323
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
324
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
325
+ return Intrinsic::amdgcn_wave_reduce_or;
326
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
327
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
328
+ return Intrinsic::amdgcn_wave_reduce_xor;
329
+ }
330
+ }
331
+
298
332
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr (unsigned BuiltinID,
299
333
const CallExpr *E) {
300
334
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
301
335
llvm::SyncScope::ID SSID;
302
336
switch (BuiltinID) {
337
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32:
338
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32:
339
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
340
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
341
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
342
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
343
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
344
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
345
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
346
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64:
347
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64:
348
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
349
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
350
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
351
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
352
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
353
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
354
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
355
+ Intrinsic::ID IID = getIntrinsicIDforWaveReduction (BuiltinID);
356
+ llvm::Value *Value = EmitScalarExpr (E->getArg (0 ));
357
+ llvm::Value *Strategy = EmitScalarExpr (E->getArg (1 ));
358
+ llvm::Function *F = CGM.getIntrinsic (IID, {Value->getType ()});
359
+ return Builder.CreateCall (F, {Value, Strategy});
360
+ }
303
361
case AMDGPU::BI__builtin_amdgcn_div_scale:
304
362
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
305
363
// Translate from the intrinsics's struct return to the builtin's out
0 commit comments