@@ -304,28 +304,57 @@ def : GCNPat<(i32 (int_amdgcn_set_inactive_chain_arg i32:$src, i32:$inactive)),
304
304
(V_SET_INACTIVE_B32 0, VGPR_32:$src, 0, VGPR_32:$inactive, (IMPLICIT_DEF))>;
305
305
306
306
// clang-format off
307
- defvar int_amdgcn_wave_reduce_ = "int_amdgcn_wave_reduce_";
307
+
308
308
multiclass
309
- AMDGPUWaveReducePseudoGenerator<string Op, string DataType> {
309
+ AMDGPUWaveReducePseudoGenerator<string Op, string DataType, ValueType ty, RegisterClass RetReg, SrcRegOrImm9 Reg > {
310
310
let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
311
311
def !toupper(Op) #"_PSEUDO_" #DataType
312
- : VPseudoInstSI<(outs SGPR_32 : $sdst),
313
- (ins VSrc_b32 : $src, VSrc_b32 : $strategy),
314
- [(set i32 : $sdst, (!cast<AMDGPUWaveReduce>(int_amdgcn_wave_reduce_ #Op) i32 : $src, i32 : $strategy))]> {}
312
+ : VPseudoInstSI<(outs RetReg : $sdst),
313
+ (ins Reg : $src, VSrc_b32 : $strategy),
314
+ [(set ty : $sdst, (!cast<AMDGPUWaveReduce>(" int_amdgcn_wave_reduce_" #Op) ty : $src, i32 : $strategy))]> {}
315
315
}
316
316
}
317
317
// clang-format on
318
318
319
+ class WaveReduceOp<string OpName, string TypeStr, ValueType Ty,
320
+ RegisterClass ReturnRegisterClass, SrcRegOrImm9 RC> {
321
+ string Name = OpName;
322
+ string TypeString = TypeStr;
323
+ ValueType VT = Ty;
324
+ RegisterClass RetReg = ReturnRegisterClass;
325
+ SrcRegOrImm9 Reg = RC;
326
+ }
327
+
319
328
// Input list : [Operation_name,
320
- // type - Signed(I)/Unsigned(U)/Float(F)/Bitwise(B)]
329
+ // type - Signed(I)/Unsigned(U)/Float(F)/Bitwise(B),
330
+ // bit-width
331
+ // output register class,
332
+ // input register class]
321
333
defvar Operations = [
322
- ["umin", "U32"], ["min", "I32"], ["umax", "U32"], ["max", "I32"],
323
- ["add", "I32"], ["sub", "I32"], ["and", "B32"], ["or", "B32"],
324
- ["xor", "B32"]
334
+ WaveReduceOp<"umin", "U32", i32, SGPR_32, VSrc_b32>,
335
+ WaveReduceOp<"min", "I32", i32, SGPR_32, VSrc_b32>,
336
+ WaveReduceOp<"umax", "U32", i32, SGPR_32, VSrc_b32>,
337
+ WaveReduceOp<"max", "I32", i32, SGPR_32, VSrc_b32>,
338
+ WaveReduceOp<"add", "I32", i32, SGPR_32, VSrc_b32>,
339
+ WaveReduceOp<"sub", "I32", i32, SGPR_32, VSrc_b32>,
340
+ WaveReduceOp<"and", "B32", i32, SGPR_32, VSrc_b32>,
341
+ WaveReduceOp<"or", "B32", i32, SGPR_32, VSrc_b32>,
342
+ WaveReduceOp<"xor", "B32", i32, SGPR_32, VSrc_b32>,
343
+
344
+ WaveReduceOp<"umin", "U64", i64, SGPR_64, VSrc_b64>,
345
+ WaveReduceOp<"min", "I64", i64, SGPR_64, VSrc_b64>,
346
+ WaveReduceOp<"umax", "U64", i64, SGPR_64, VSrc_b64>,
347
+ WaveReduceOp<"max", "I64", i64, SGPR_64, VSrc_b64>,
348
+ WaveReduceOp<"add", "I64", i64, SGPR_64, VSrc_b64>,
349
+ WaveReduceOp<"sub", "I64", i64, SGPR_64, VSrc_b64>,
350
+ WaveReduceOp<"and", "B64", i64, SGPR_64, VSrc_b64>,
351
+ WaveReduceOp<"or", "B64", i64, SGPR_64, VSrc_b64>,
352
+ WaveReduceOp<"xor", "B64", i64, SGPR_64, VSrc_b64>,
325
353
];
326
354
327
355
foreach Op = Operations in {
328
- defm WAVE_REDUCE_ : AMDGPUWaveReducePseudoGenerator<Op[0], Op[1]>;
356
+ defm WAVE_REDUCE_ : AMDGPUWaveReducePseudoGenerator<Op.Name, Op.TypeString,
357
+ Op.VT, Op.RetReg, Op.Reg>;
329
358
}
330
359
331
360
let usesCustomInserter = 1, Defs = [VCC] in {
0 commit comments