Skip to content

Commit 2685c0e

Browse files
committed
Extending wave reduction intrinsics to support i64 types.
Supported Operations: `min`, `max`, `umin`, `umax`, `and`, `or`, `xor`, `add`, `sub`
1 parent 2edc730 commit 2685c0e

11 files changed

+9792
-42
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 326 additions & 32 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -304,28 +304,57 @@ def : GCNPat<(i32 (int_amdgcn_set_inactive_chain_arg i32:$src, i32:$inactive)),
304304
(V_SET_INACTIVE_B32 0, VGPR_32:$src, 0, VGPR_32:$inactive, (IMPLICIT_DEF))>;
305305

306306
// clang-format off
307-
defvar int_amdgcn_wave_reduce_ = "int_amdgcn_wave_reduce_";
307+
308308
multiclass
309-
AMDGPUWaveReducePseudoGenerator<string Op, string DataType> {
309+
AMDGPUWaveReducePseudoGenerator<string Op, string DataType, ValueType ty, RegisterClass RetReg, SrcRegOrImm9 Reg> {
310310
let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
311311
def !toupper(Op) #"_PSEUDO_" #DataType
312-
: VPseudoInstSI<(outs SGPR_32 : $sdst),
313-
(ins VSrc_b32 : $src, VSrc_b32 : $strategy),
314-
[(set i32 : $sdst, (!cast<AMDGPUWaveReduce>(int_amdgcn_wave_reduce_ #Op) i32 : $src, i32 : $strategy))]> {}
312+
: VPseudoInstSI<(outs RetReg : $sdst),
313+
(ins Reg : $src, VSrc_b32 : $strategy),
314+
[(set ty : $sdst, (!cast<AMDGPUWaveReduce>("int_amdgcn_wave_reduce_" #Op) ty : $src, i32 : $strategy))]> {}
315315
}
316316
}
317317
// clang-format on
318318

319+
class WaveReduceOp<string OpName, string TypeStr, ValueType Ty,
320+
RegisterClass ReturnRegisterClass, SrcRegOrImm9 RC> {
321+
string Name = OpName;
322+
string TypeString = TypeStr;
323+
ValueType VT = Ty;
324+
RegisterClass RetReg = ReturnRegisterClass;
325+
SrcRegOrImm9 Reg = RC;
326+
}
327+
319328
// Input list : [Operation_name,
320-
// type - Signed(I)/Unsigned(U)/Float(F)/Bitwise(B)]
329+
// type - Signed(I)/Unsigned(U)/Float(F)/Bitwise(B),
330+
// bit-width
331+
// output register class,
332+
// input register class]
321333
defvar Operations = [
322-
["umin", "U32"], ["min", "I32"], ["umax", "U32"], ["max", "I32"],
323-
["add", "I32"], ["sub", "I32"], ["and", "B32"], ["or", "B32"],
324-
["xor", "B32"]
334+
WaveReduceOp<"umin", "U32", i32, SGPR_32, VSrc_b32>,
335+
WaveReduceOp<"min", "I32", i32, SGPR_32, VSrc_b32>,
336+
WaveReduceOp<"umax", "U32", i32, SGPR_32, VSrc_b32>,
337+
WaveReduceOp<"max", "I32", i32, SGPR_32, VSrc_b32>,
338+
WaveReduceOp<"add", "I32", i32, SGPR_32, VSrc_b32>,
339+
WaveReduceOp<"sub", "I32", i32, SGPR_32, VSrc_b32>,
340+
WaveReduceOp<"and", "B32", i32, SGPR_32, VSrc_b32>,
341+
WaveReduceOp<"or", "B32", i32, SGPR_32, VSrc_b32>,
342+
WaveReduceOp<"xor", "B32", i32, SGPR_32, VSrc_b32>,
343+
344+
WaveReduceOp<"umin", "U64", i64, SGPR_64, VSrc_b64>,
345+
WaveReduceOp<"min", "I64", i64, SGPR_64, VSrc_b64>,
346+
WaveReduceOp<"umax", "U64", i64, SGPR_64, VSrc_b64>,
347+
WaveReduceOp<"max", "I64", i64, SGPR_64, VSrc_b64>,
348+
WaveReduceOp<"add", "I64", i64, SGPR_64, VSrc_b64>,
349+
WaveReduceOp<"sub", "I64", i64, SGPR_64, VSrc_b64>,
350+
WaveReduceOp<"and", "B64", i64, SGPR_64, VSrc_b64>,
351+
WaveReduceOp<"or", "B64", i64, SGPR_64, VSrc_b64>,
352+
WaveReduceOp<"xor", "B64", i64, SGPR_64, VSrc_b64>,
325353
];
326354

327355
foreach Op = Operations in {
328-
defm WAVE_REDUCE_ : AMDGPUWaveReducePseudoGenerator<Op[0], Op[1]>;
356+
defm WAVE_REDUCE_ : AMDGPUWaveReducePseudoGenerator<Op.Name, Op.TypeString,
357+
Op.VT, Op.RetReg, Op.Reg>;
329358
}
330359

331360
let usesCustomInserter = 1, Defs = [VCC] in {

0 commit comments

Comments
 (0)