Skip to content

Commit 1b7dd6c

Browse files
committed
Address review comments
1 parent 5a73674 commit 1b7dd6c

File tree

2 files changed

+55
-33
lines changed

2 files changed

+55
-33
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,8 +1108,8 @@ struct DeferredStore {
11081108
/// target region represents a Generic (non-SPMD) kernel.
11091109
///
11101110
/// This represents a necessary but not sufficient set of conditions to use
1111-
/// device shared memory in place of regular allocas. Depending on the variable,
1112-
/// its uses or the associated OpenMP construct might also need to be taken into
1111+
/// device shared memory in place of regular allocas. For some variables, the
1112+
/// associated OpenMP construct or their uses might also need to be taken into
11131113
/// account.
11141114
static bool
11151115
mightAllocInDeviceSharedMemory(Operation &op,
@@ -1122,9 +1122,8 @@ mightAllocInDeviceSharedMemory(Operation &op,
11221122
targetOp = op.getParentOfType<omp::TargetOp>();
11231123

11241124
return targetOp &&
1125-
!bitEnumContainsAny(
1126-
targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()),
1127-
omp::TargetRegionFlags::spmd);
1125+
targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) ==
1126+
omp::TargetExecMode::generic;
11281127
}
11291128

11301129
/// Check whether the entry block argument representing the private copy of a
@@ -1146,7 +1145,7 @@ static bool mustAllocPrivateVarInDeviceSharedMemory(BlockArgument value) {
11461145
if (llvm::is_contained(parallelOp.getReductionVars(), value))
11471146
return true;
11481147
} else if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) {
1149-
if (targetOp->isProperAncestor(parallelOp))
1148+
if (parentOp->isProperAncestor(parallelOp))
11501149
return true;
11511150
}
11521151
}

mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,38 +49,61 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
4949
}
5050
omp.terminator
5151
}
52-
llvm.return
53-
}
54-
}
55-
56-
// CHECK: call i32 @__kmpc_target_init
57-
// CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]]
52+
// CHECK: call i32 @__kmpc_target_init
53+
// CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]]
5854

55+
// CHECK: define internal void @[[OUTLINED_TARGET]]
56+
// CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
57+
// CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
58+
// CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]]
59+
// CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast)
5960

60-
// CHECK: define internal void @[[OUTLINED_TARGET]]
61-
// CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
62-
// CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
63-
// CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]]
64-
// CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast)
61+
// CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]:
62+
// CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num
63+
// CHECK-NEXT: call void @__kmpc_barrier
64+
// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4)
6565

66-
// CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]:
67-
// CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num
68-
// CHECK-NEXT: call void @__kmpc_barrier
69-
// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4)
66+
// CHECK: define internal void @[[OUTLINED_TEAMS]]
67+
// CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
68+
// CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
7069

70+
// %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
71+
// store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8
72+
// %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
73+
// store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8
7174

72-
// CHECK: define internal void @[[OUTLINED_TEAMS]]
73-
// CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
74-
// CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
75+
// CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4)
76+
// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4)
77+
// CHECK-NEXT: br label %[[EXIT_BB:.*]]
7578

76-
// %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
77-
// store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8
78-
// %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
79-
// store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8
79+
// CHECK: [[EXIT_BB]]:
80+
// CHECK-NEXT: ret void
8081

81-
// CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4)
82-
// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4)
83-
// CHECK-NEXT: br label %[[EXIT_BB:.*]]
82+
// Test that we don't misidentify a private `distribute` value as being
83+
// located inside of a parallel region if that parallel region is not nested
84+
// inside of `omp.distribute`.
85+
omp.parallel {
86+
%18 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"}
87+
omp.target map_entries(%18 -> %arg0 : !llvm.ptr) {
88+
%19 = llvm.mlir.constant(10000 : i32) : i32
89+
%20 = llvm.mlir.constant(1 : i32) : i32
90+
omp.teams {
91+
omp.distribute private(@privatizer %arg0 -> %arg1 : !llvm.ptr) {
92+
omp.loop_nest (%arg2) : i32 = (%20) to (%19) inclusive step (%20) {
93+
llvm.store %arg2, %arg1 : i32, !llvm.ptr
94+
omp.yield
95+
}
96+
}
97+
omp.terminator
98+
}
99+
omp.terminator
100+
}
101+
omp.terminator
102+
}
103+
// CHECK: call i32 @__kmpc_target_init
104+
// CHECK-NOT: call {{.*}} @__kmpc_alloc_shared
105+
// CHECK-NOT: call {{.*}} @__kmpc_free_shared
84106

85-
// CHECK: [[EXIT_BB]]:
86-
// CHECK-NEXT: ret void
107+
llvm.return
108+
}
109+
}

0 commit comments

Comments
 (0)