Skip to content

Commit 92425cb

Browse files
committed
[flang][OMPIRBuilder][MLIR][llvm] Backend support for atomic control options
1 parent a6532c2 commit 92425cb

File tree

5 files changed

+117
-15
lines changed

5 files changed

+117
-15
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
! REQUIRES: amdgpu-registered-target
2+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -munsafe-fp-atomics %s -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
3+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-ignore-denormal-mode %s -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
4+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-fine-grained-memory %s -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
5+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-remote-memory %s -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
6+
program test
7+
implicit none
8+
integer :: A, threads
9+
threads = 128
10+
A = 0
11+
!$omp target parallel num_threads(threads)
12+
!$omp atomic
13+
A = A + 1
14+
!$omp end target parallel
15+
end program test
16+
17+
!UNSAFE-FP-ATOMICS: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
18+
!IGNORE-DENORMAL-MODE: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
19+
!FINE-GRAINED-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
20+
!REMOTE-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.fine.grained.memory !{{.*}}

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3286,7 +3286,8 @@ class OpenMPIRBuilder {
32863286
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
32873287
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
32883288
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
3289-
bool IsXBinopExpr);
3289+
bool IsXBinopExpr, bool IsIgnoreDenormalMode,
3290+
bool IsFineGrainedMemory, bool IsRemoteMemory);
32903291

32913292
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
32923293
///
@@ -3359,7 +3360,9 @@ class OpenMPIRBuilder {
33593360
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(
33603361
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33613362
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3362-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr);
3363+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
3364+
bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
3365+
bool IsRemoteMemory = false);
33633366

33643367
/// Emit atomic update for constructs: --- Only Scalar data types
33653368
/// V = X; X = X BinOp Expr ,
@@ -3394,7 +3397,9 @@ class OpenMPIRBuilder {
33943397
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33953398
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
33963399
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
3397-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr);
3400+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
3401+
bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
3402+
bool IsRemoteMemory = false);
33983403

33993404
/// Emit atomic compare for constructs: --- Only scalar data types
34003405
/// cond-expr-stmt:

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8957,7 +8957,8 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
89578957
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
89588958
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
89598959
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
8960-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
8960+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
8961+
bool IsIgnoreDenormalMode, bool IsFineGrainedMemory, bool IsRemoteMemory) {
89618962
assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
89628963
if (!updateToLocation(Loc))
89638964
return Loc.IP;
@@ -8975,9 +8976,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
89758976
"OpenMP atomic does not support LT or GT operations");
89768977
});
89778978

8978-
Expected<std::pair<Value *, Value *>> AtomicResult =
8979-
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
8980-
X.IsVolatile, IsXBinopExpr);
8979+
Expected<std::pair<Value *, Value *>> AtomicResult = emitAtomicUpdate(
8980+
AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
8981+
IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
89818982
if (!AtomicResult)
89828983
return AtomicResult.takeError();
89838984
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
@@ -9024,7 +9025,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
90249025
Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
90259026
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
90269027
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
9027-
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
9028+
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr,
9029+
bool IsIgnoreDenormalMode, bool IsFineGrainedMemory, bool IsRemoteMemory) {
90289030
// TODO: handle the case where XElemTy is not byte-sized or not a power of 2
90299031
// or a complex datatype.
90309032
bool emitRMWOp = false;
@@ -9047,7 +9049,20 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
90479049

90489050
std::pair<Value *, Value *> Res;
90499051
if (emitRMWOp) {
9050-
Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
9052+
AtomicRMWInst *AtomicRMWInst =
9053+
Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
9054+
if (T.isAMDGPU()) {
9055+
if (IsIgnoreDenormalMode)
9056+
AtomicRMWInst->setMetadata("amdgpu.ignore.denormal.mode",
9057+
llvm::MDNode::get(Builder.getContext(), {}));
9058+
if (!IsFineGrainedMemory)
9059+
AtomicRMWInst->setMetadata("amdgpu.no.fine.grained.memory",
9060+
llvm::MDNode::get(Builder.getContext(), {}));
9061+
if (!IsRemoteMemory)
9062+
AtomicRMWInst->setMetadata("amdgpu.no.remote.memory",
9063+
llvm::MDNode::get(Builder.getContext(), {}));
9064+
}
9065+
Res.first = AtomicRMWInst;
90519066
// not needed except in case of postfix captures. Generate anyway for
90529067
// consistency with the else part. Will be removed with any DCE pass.
90539068
// AtomicRMWInst::Xchg does not have a coressponding instruction.
@@ -9179,7 +9194,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
91799194
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
91809195
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
91819196
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
9182-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
9197+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
9198+
bool IsIgnoreDenormalMode, bool IsFineGrainedMemory, bool IsRemoteMemory) {
91839199
if (!updateToLocation(Loc))
91849200
return Loc.IP;
91859201

@@ -9198,9 +9214,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
91989214
// If UpdateExpr is 'x' updated with some `expr` not based on 'x',
91999215
// 'x' is simply atomically rewritten with 'expr'.
92009216
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
9201-
Expected<std::pair<Value *, Value *>> AtomicResult =
9202-
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
9203-
X.IsVolatile, IsXBinopExpr);
9217+
Expected<std::pair<Value *, Value *>> AtomicResult = emitAtomicUpdate(
9218+
AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile,
9219+
IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
92049220
if (!AtomicResult)
92059221
return AtomicResult.takeError();
92069222
Value *CapturedVal =

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3269,13 +3269,25 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
32693269
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
32703270
};
32713271

3272+
bool isIgnoreDenormalMode = false;
3273+
bool isFineGrainedMemory = false;
3274+
bool isRemoteMemory = false;
3275+
if (opInst->hasAttr(opInst.getAtomicControlAttrName())) {
3276+
mlir::omp::AtomicControlAttr atomicControlAttr =
3277+
opInst.getAtomicControlAttr();
3278+
isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3279+
isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3280+
isRemoteMemory = atomicControlAttr.getRemoteMemory();
3281+
}
3282+
32723283
// Handle ambiguous alloca, if any.
32733284
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
32743285
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
32753286
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
32763287
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
32773288
atomicOrdering, binop, updateFn,
3278-
isXBinopExpr);
3289+
isXBinopExpr, isIgnoreDenormalMode,
3290+
isFineGrainedMemory, isRemoteMemory);
32793291

32803292
if (failed(handleError(afterIP, *opInst)))
32813293
return failure();
@@ -3364,13 +3376,26 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
33643376
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
33653377
};
33663378

3379+
bool isIgnoreDenormalMode = false;
3380+
bool isFineGrainedMemory = false;
3381+
bool isRemoteMemory = false;
3382+
if (atomicUpdateOp &&
3383+
atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
3384+
mlir::omp::AtomicControlAttr atomicControlAttr =
3385+
atomicUpdateOp.getAtomicControlAttr();
3386+
isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3387+
isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3388+
isRemoteMemory = atomicControlAttr.getRemoteMemory();
3389+
}
3390+
33673391
// Handle ambiguous alloca, if any.
33683392
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
33693393
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
33703394
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
33713395
ompBuilder->createAtomicCapture(
33723396
ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3373-
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3397+
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
3398+
isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory);
33743399

33753400
if (failed(handleError(afterIP, *atomicCaptureOp)))
33763401
return failure();
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
// CHECK: atomicrmw add ptr %loadgep_, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
4+
5+
module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<1> = dense<64> : vector<4xi64>, !llvm.ptr<2> = dense<32> : vector<4xi64>, !llvm.ptr<3> = dense<32> : vector<4xi64>, !llvm.ptr<4> = dense<64> : vector<4xi64>, !llvm.ptr<5> = dense<32> : vector<4xi64>, !llvm.ptr<6> = dense<32> : vector<4xi64>, !llvm.ptr<7> = dense<[160, 256, 256, 32]> : vector<4xi64>, !llvm.ptr<8> = dense<[128, 128, 128, 48]> : vector<4xi64>, !llvm.ptr<9> = dense<[192, 256, 256, 32]> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.legal_int_widths" = array<i32: 32, 64>, "dlti.stack_alignment" = 32 : i64, "dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, fir.atomic_ignore_denormal_mode, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "generic-hsa", llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.flags = #omp.flags<openmp_device_version = 31>, omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires none>, omp.target_triples = [], omp.version = #omp.version<version = 31>} {
6+
llvm.func @_QQmain() attributes {fir.bindc_name = "TEST", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "generic-hsa"} {
7+
%0 = llvm.mlir.constant(1 : i64) : i64
8+
%1 = llvm.alloca %0 x i32 {bindc_name = "threads"} : (i64) -> !llvm.ptr<5>
9+
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
10+
%3 = llvm.mlir.constant(1 : i64) : i64
11+
%4 = llvm.alloca %3 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5>
12+
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
13+
%6 = llvm.mlir.constant(0 : i32) : i32
14+
%7 = llvm.mlir.constant(128 : i32) : i32
15+
%8 = llvm.mlir.constant(1 : i64) : i64
16+
%9 = llvm.mlir.constant(1 : i64) : i64
17+
llvm.store %7, %2 : i32, !llvm.ptr
18+
llvm.store %6, %5 : i32, !llvm.ptr
19+
%10 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "threads"}
20+
%11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "a"}
21+
omp.target map_entries(%10 -> %arg0, %11 -> %arg1 : !llvm.ptr, !llvm.ptr) {
22+
%12 = llvm.mlir.constant(1 : i32) : i32
23+
%13 = llvm.load %arg0 : !llvm.ptr -> i32
24+
omp.parallel num_threads(%13 : i32) {
25+
omp.atomic.update %arg1 : !llvm.ptr {
26+
^bb0(%arg2: i32):
27+
%14 = llvm.add %arg2, %12 : i32
28+
omp.yield(%14 : i32)
29+
} {atomic_control = #omp.atomic_control<ignore_denormal_mode = true>}
30+
omp.terminator
31+
}
32+
omp.terminator
33+
}
34+
llvm.return
35+
}
36+
}

0 commit comments

Comments
 (0)