From f9556e26c551617c3c9a61c6d8e67543f0390dec Mon Sep 17 00:00:00 2001 From: Kyle Wang Date: Mon, 18 Nov 2024 02:15:49 -0800 Subject: [PATCH 1/2] Add description to amdgpu.dpp and rocdl.update.dpp to explain their behaviors --- mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 5 +++++ mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 69745addfd748..cb25655cc29eb 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -437,6 +437,11 @@ def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result", let summary = "AMDGPU DPP operation"; let description = [{ This operation represents DPP functionality in a GPU program. + + The behavior should be equivalent to: + v_mov_b32 `result` `old` + v_mov_b32 `result` `src` `kind` `row_mask` `bank_mask` `bound_ctrl` + DPP provides the following operations: - Full crossbar in a group of four (`quad_perm`) - Wavefront shift left by one lane (`wave_shl`) diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 3695708439d91..a2a86a3a8a2cf 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -621,6 +621,15 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0], Arguments<(ins LLVM_Type:$old, LLVM_Type:$src, I32Attr:$dppCtrl, I32Attr:$rowMask, I32Attr:$bankMask, I1Attr:$boundCtrl)> { let results = (outs LLVM_Type:$res); + let summary = "Represent the DPP(Data-Parallel Primitives) functionality"; + let description = [{ + Represent the DPP(Data-Parallel Primitives) functionality, + which supports cross-lane data operations. + + The behavior should be equivalent to: + v_mov_b32 `res` `old` + v_mov_b32 `res` `src` `dppCtrl` `rowMask` `bankMask` `boundCtrl` + }]; let assemblyFormat = [{ attr-dict $old `,` $src `with` $dppCtrl `,` $rowMask `,` $bankMask `,` $boundCtrl `:` type($src) }]; From cd75ea2e4c4ab790875b57d06322a5d22a41e523 Mon Sep 17 00:00:00 2001 From: Kyle Wang Date: Tue, 19 Nov 2024 02:01:30 -0800 Subject: [PATCH 2/2] rephrased the descriptions --- mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 15 +++++++++----- mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 20 ++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index cb25655cc29eb..61a62e9cc9111 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -436,13 +436,18 @@ def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result", DefaultValuedAttr:$bound_ctrl)> { let summary = "AMDGPU DPP operation"; let description = [{ - This operation represents DPP functionality in a GPU program. + This operation represents DPP (Data-Parallel Primitives) functionality in a GPU program. - The behavior should be equivalent to: - v_mov_b32 `result` `old` - v_mov_b32 `result` `src` `kind` `row_mask` `bank_mask` `bound_ctrl` + It will result in the values from `src`, permuted according to + the DPP attributes(`kind`, `permArgument`, `row_mask`, `bank_mask` and `bound_ctrl`). - DPP provides the following operations: + If a lane is masked off(by EXEC mask, `row_mask`, `bank_mask`, etc) or + has no value to receive(for example, if the source of a lane is out of range), + this operation will instead produce the value of `old`. + + For detail explanation of the attributes, please check the ISA Reference Guide. + + DPP provides the following operations: - Full crossbar in a group of four (`quad_perm`) - Wavefront shift left by one lane (`wave_shl`) - Wavefront shift right by one lane (`wave_shr`) diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index a2a86a3a8a2cf..e64f2b271aada 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -621,14 +621,20 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0], Arguments<(ins LLVM_Type:$old, LLVM_Type:$src, I32Attr:$dppCtrl, I32Attr:$rowMask, I32Attr:$bankMask, I1Attr:$boundCtrl)> { let results = (outs LLVM_Type:$res); - let summary = "Represent the DPP(Data-Parallel Primitives) functionality"; + let summary = "Exchange data among lanes in a warp"; let description = [{ - Represent the DPP(Data-Parallel Primitives) functionality, - which supports cross-lane data operations. - - The behavior should be equivalent to: - v_mov_b32 `res` `old` - v_mov_b32 `res` `src` `dppCtrl` `rowMask` `bankMask` `boundCtrl` + This operation maps to the DPP (Data-Parallel Primitives) intrinsics, + which allows lanes in the same warp to exchange data in their registers + directly following some very specific patterns. + + It will result in the values from `src`, permuted according to + the DPP attributes(`dppCtrl`, `rowMask`, `bankMask` and `boundCtrl`). + + If a lane is masked off(by EXEC mask, `rowMask`, `bankMask`, etc) or + has no value to receive(for example, if the source of a lane is out of range), + this operation will instead produce the value of `old`. + + For detail explanation of the attributes, please check the ISA Reference Guide. }]; let assemblyFormat = [{ attr-dict $old `,` $src `with` $dppCtrl `,` $rowMask `,` $bankMask `,` $boundCtrl `:` type($src)