-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause #152831
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/kevinsala/omp-dyn-groupprivate-codegen-pr
Are you sure you want to change the base?
[OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause #152831
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-offload Author: Kevin Sala Penades (kevinsala) ChangesPart 2 adding offload runtime support. See #152651. Patch is 39.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152831.diff 23 Files Affected:
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 2e5d92380f040..a43b506d6879e 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,4 +163,8 @@ typedef enum omp_allocator_handle_t {
///}
+enum omp_access_t {
+ omp_access_cgroup = 0,
+};
+
#endif
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4..672afea206785 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -222,7 +222,7 @@ struct KernelEnvironmentTy;
int8_t __kmpc_is_spmd_exec_mode();
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
void __kmpc_target_deinit();
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index db396dae6e445..17c3c6f2d3e42 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -116,7 +116,7 @@ extern Local<ThreadStateTy **> ThreadStates;
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
/// Return the kernel and kernel launch environment associated with the current
/// kernel. The former is static and contains compile time information that
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 467e44a65276c..58e9a09105a76 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -34,8 +34,8 @@ enum OMPTgtExecModeFlags : unsigned char {
};
static void
-inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+initializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
// Order is important here.
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
@@ -80,17 +80,17 @@ extern "C" {
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
if (IsSPMD) {
- inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
- KernelLaunchEnvironment);
+ initializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
+ KernelLaunchEnvironment);
synchronize::threadsAligned(atomic::relaxed);
} else {
- inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
- KernelLaunchEnvironment);
+ initializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
+ KernelLaunchEnvironment);
// No need to wait since only the main threads will execute user
// code and workers will run into a barrier right away.
}
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index 62b03e7bba720..9e2a9999167b4 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -158,6 +158,34 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
}
+struct DynCGroupMemTy {
+ void init(KernelLaunchEnvironmentTy *KLE, void *NativeDynCGroup) {
+ Size = 0;
+ Ptr = nullptr;
+ IsFallback = false;
+ if (KLE) {
+ Size = KLE->DynCGroupMemSize;
+ if (void *Fallback = KLE->DynCGroupMemFallback) {
+ Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
+ IsFallback = true;
+ } else {
+ Ptr = static_cast<char *>(NativeDynCGroup);
+ }
+ }
+ }
+
+ char *getPtr(size_t Offset) const { return Ptr + Offset; }
+ bool isFallback() const { return IsFallback; }
+ size_t getSize() const { return Size; }
+
+private:
+ char *Ptr;
+ size_t Size;
+ bool IsFallback;
+};
+
+[[clang::loader_uninitialized]] static Local<DynCGroupMemTy> DynCGroupMem;
+
} // namespace
void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
@@ -246,13 +274,18 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
} // namespace
void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ KernelLaunchEnvironmentTy *KLE) {
SharedMemorySmartStack.init(IsSPMD);
+
+ if (KLE == reinterpret_cast<KernelLaunchEnvironmentTy *>(~0))
+ KLE = nullptr;
+
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
+ DynCGroupMem.init(KLE, DynamicSharedBuffer);
TeamState.init(IsSPMD);
ThreadStates = nullptr;
KernelEnvironmentPtr = &KernelEnvironment;
- KernelLaunchEnvironmentPtr = &KernelLaunchEnvironment;
+ KernelLaunchEnvironmentPtr = KLE;
}
}
@@ -430,6 +463,17 @@ int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
int omp_get_initial_device(void) { return -1; }
int omp_is_initial_device(void) { return 0; }
+
+void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback,
+ omp_access_t) {
+ if (IsFallback != NULL)
+ *IsFallback = DynCGroupMem.isFallback();
+ return DynCGroupMem.getPtr(Offset);
+}
+
+size_t omp_get_dyn_groupprivate_size(omp_access_t) {
+ return DynCGroupMem.getSize();
+}
}
extern "C" {
diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h
index 978b53d5d69b9..0ef2dd162292b 100644
--- a/offload/include/Shared/APITypes.h
+++ b/offload/include/Shared/APITypes.h
@@ -97,8 +97,10 @@ struct KernelArgsTy {
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA.
- uint64_t Unused : 62;
- } Flags = {0, 0, 0};
+ uint64_t AllowDynCGroupMemFallback : 1; // Allow fallback for dynamic cgroup
+ // mem fallback.
+ uint64_t Unused : 61;
+ } Flags = {0, 0, 0, 0};
// The number of teams (for x,y,z dimension).
uint32_t NumTeams[3] = {0, 0, 0};
// The number of threads (for x,y,z dimension).
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index 2a283bd6fa4ed..0670ac1090da4 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -93,9 +93,11 @@ struct KernelEnvironmentTy {
};
struct KernelLaunchEnvironmentTy {
+ void *ReductionBuffer = nullptr;
+ void *DynCGroupMemFallback = nullptr;
uint32_t ReductionCnt = 0;
uint32_t ReductionIterCnt = 0;
- void *ReductionBuffer = nullptr;
+ uint32_t DynCGroupMemSize = 0;
};
#endif // OMPTARGET_SHARED_ENVIRONMENT_H
diff --git a/offload/include/device.h b/offload/include/device.h
index f4b10abbaa3fd..0e93cf8ec1a8b 100644
--- a/offload/include/device.h
+++ b/offload/include/device.h
@@ -158,6 +158,9 @@ struct DeviceTy {
/// Indicate that there are pending images for this device or not.
void setHasPendingImages(bool V) { HasPendingImages = V; }
+ /// Get the maximum shared memory per team for any kernel.
+ uint64_t getMaxSharedTeamMemory();
+
private:
/// Deinitialize the device (and plugin).
void deinit();
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 6971780c7bdb5..45bb74ec367d6 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -107,7 +107,7 @@ enum TargetAllocTy : int32_t {
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
- 0, {0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
+ 0, {0,0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
struct DeviceTy;
@@ -273,10 +273,15 @@ struct __tgt_target_non_contig {
extern "C" {
#endif
+typedef enum {
+ omp_access_cgroup = 0,
+} omp_access_t;
+
void ompx_dump_mapping_tables(void);
int omp_get_num_devices(void);
int omp_get_device_num(void);
int omp_get_initial_device(void);
+size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..1ed4192157fc8 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -98,6 +98,20 @@ EXTERN int omp_get_initial_device(void) {
return HostDevice;
}
+EXTERN size_t omp_get_groupprivate_limit(int DeviceNum,
+ omp_access_t AccessGroup) {
+ TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ if (DeviceNum == omp_get_initial_device())
+ return 0;
+
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ return DeviceOrErr->getMaxSharedTeamMemory();
+}
+
EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) +
";size=" + std::to_string(Size));
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index f88e30ae9e76b..31bfc7d092424 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -281,3 +281,9 @@ bool DeviceTy::useAutoZeroCopy() {
return false;
return RTL->use_auto_zero_copy(RTLDeviceID);
}
+
+uint64_t DeviceTy::getMaxSharedTeamMemory() {
+ using DeviceQueryKind = llvm::omp::target::plugin::DeviceQueryKind;
+ return RTL->query_device_info(
+ RTLDeviceID, DeviceQueryKind::DEVICE_QUERY_MAX_SHARED_TEAM_MEM);
+}
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..b5a1401564d58 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -40,6 +40,7 @@ VERS1.0 {
omp_get_num_devices;
omp_get_device_num;
omp_get_initial_device;
+ omp_get_groupprivate_limit;
omp_target_alloc;
omp_target_free;
omp_target_is_present;
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
index 3117763e35896..2cf156e576c5f 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
@@ -52,6 +52,7 @@ typedef enum {
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
+ HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
} hsa_amd_memory_pool_info_t;
typedef enum {
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 12c7cc62905c9..fa373c2029f0c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -273,7 +273,6 @@ struct AMDGPUMemoryPoolTy {
if (auto Err = getAttr(HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, GlobalFlags))
return Err;
-
return Plugin::success();
}
@@ -543,6 +542,8 @@ struct AMDGPUKernelTy : public GenericKernelTy {
return Err;
}
+ StaticBlockMemSize = GroupSize;
+
// Make sure it is a kernel symbol.
if (SymbolType != HSA_SYMBOL_KIND_KERNEL)
return Plugin::error(ErrorCode::INVALID_BINARY,
@@ -566,8 +567,8 @@ struct AMDGPUKernelTy : public GenericKernelTy {
/// Launch the AMDGPU kernel function.
Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3],
- uint32_t NumBlocks[3], KernelArgsTy &KernelArgs,
- KernelLaunchParamsTy LaunchParams,
+ uint32_t NumBlocks[3], uint32_t DynBlockMemSize,
+ KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
/// Print more elaborate kernel launch info for AMDGPU
@@ -2020,6 +2021,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = checkIfAPU())
return Err;
+ // Retrieve the size of the group memory.
+ for (const auto *Pool : AllMemoryPools) {
+ if (Pool->isGroup()) {
+ size_t Size = 0;
+ if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE, Size))
+ return Err;
+ MaxBlockSharedMemSize = Size;
+ break;
+ }
+ }
+
+ // Supports block shared memory natively.
+ HasNativeBlockSharedMem = true;
+
return Plugin::success();
}
@@ -2856,7 +2871,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
KernelArgsTy KernelArgs = {};
uint32_t NumBlocksAndThreads[3] = {1u, 1u, 1u};
if (auto Err = AMDGPUKernel.launchImpl(
- *this, NumBlocksAndThreads, NumBlocksAndThreads, KernelArgs,
+ *this, NumBlocksAndThreads, NumBlocksAndThreads, 0, KernelArgs,
KernelLaunchParamsTy{}, AsyncInfoWrapper))
return Err;
@@ -3357,6 +3372,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
uint32_t NumThreads[3], uint32_t NumBlocks[3],
+ uint32_t DynBlockMemSize,
KernelArgsTy &KernelArgs,
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
@@ -3374,13 +3390,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
return Err;
- // Account for user requested dynamic shared memory.
- uint32_t GroupSize = getGroupSize();
- if (uint32_t MaxDynCGroupMem = std::max(
- KernelArgs.DynCGroupMem, GenericDevice.getDynamicMemorySize())) {
- GroupSize += MaxDynCGroupMem;
- }
-
uint64_t StackSize;
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
return Err;
@@ -3434,7 +3443,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
// Push the kernel launch into the stream.
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
- GroupSize, StackSize, ArgsMemoryManager);
+ getStaticBlockMemSize() + DynBlockMemSize,
+ StackSize, ArgsMemoryManager);
}
Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 162b149ab483e..3357ccfe0c9b5 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -226,6 +226,10 @@ struct InfoTreeNode {
}
};
+enum class DeviceQueryKind {
+ DEVICE_QUERY_MAX_SHARED_TEAM_MEM = 0,
+};
+
/// Class wrapping a __tgt_device_image and its offload entry table on a
/// specific device. This class is responsible for storing and managing
/// the offload entries for an image on a device.
@@ -312,13 +316,16 @@ struct GenericKernelTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) const;
virtual Error launchImpl(GenericDeviceTy &GenericDevice,
uint32_t NumThreads[3], uint32_t NumBlocks[3],
- KernelArgsTy &KernelArgs,
+ uint32_t DynBlockMemSize, KernelArgsTy &KernelArgs,
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0;
/// Get the kernel name.
const char *getName() const { return Name.c_str(); }
+ /// Get the size of the static per-block memory consumed by the kernel.
+ uint32_t getStaticBlockMemSize() const { return StaticBlockMemSize; };
+
/// Get the kernel image.
DeviceImageTy &getImage() const {
assert(ImagePtr && "Kernel is not initialized!");
@@ -331,9 +338,9 @@ struct GenericKernelTy {
}
/// Return a device pointer to a new kernel launch environment.
- Expected<KernelLaunchEnvironmentTy *>
- getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice, uint32_t Version,
- AsyncInfoWrapperTy &AsyncInfo) const;
+ Expected<KernelLaunchEnvironmentTy *> getKernelLaunchEnvironment(
+ GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
+ void *FallbackBlockMem, AsyncInfoWrapperTy &AsyncInfo) const;
/// Indicate whether an execution mode is valid.
static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
@@ -425,6 +432,9 @@ struct GenericKernelTy {
/// The maximum number of threads which the kernel could leverage.
uint32_t MaxNumThreads;
+ /// The static memory sized per block.
+ uint32_t StaticBlockMemSize = 0;
+
/// The kernel environment, including execution flags.
KernelEnvironmentTy KernelEnvironment;
@@ -731,6 +741,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// this id is not unique between different plugins; they may overlap.
int32_t getDeviceId() const { return DeviceId; }
+ /// Get the total shared memory per block that can be used in any kernel.
+ uint32_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }
+
+ /// Indicate whether the device has native block shared memory.
+ bool hasNativeBlockSharedMem() const { return HasNativeBlockSharedMem; }
+
/// Set the context of the device if needed, before calling device-specific
/// functions. Plugins may implement this function as a no-op if not needed.
virtual Error setContext() = 0;
@@ -1132,6 +1148,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
std::atomic<bool> OmptInitialized;
#endif
+ /// The total per-block shared memory that a kernel may use.
+ uint32_t MaxBlockSharedMemSize = 0;
+
+ /// Whether the device has native block shared memory.
+ bool HasNativeBlockSharedMem = false;
+
private:
DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
@@ -1347,6 +1369,9 @@ struct GenericPluginTy {
/// Prints information about the given devices supported by the plugin.
void print_device_info(int32_t DeviceId);
+ /// Retrieve information about the given device.
+ int64_t query_device_info(int32_t DeviceId, DeviceQueryKind Query);
+
/// Creates an event in the given plugin if supported.
int32_t create_event(int32_t DeviceId, void **EventPtr);
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 81b9d423e13d8..2997585e1660f 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -477,20 +477,20 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
Expected<KernelLaunchEnvironmentTy *>
GenericKernelTy::getKernelLaunchEnvironment(
- GenericDeviceTy &GenericDevice, uint32_t Version,
- AsyncInfoWrapperTy &AsyncInfoWrapper) const {
+ GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
+ void *FallbackBlockMem, AsyncInfoWrapperTy &AsyncInfoWrapper) const {
// Ctor/Dtor have no arguments, replaying uses the original kernel launch
// environment. Older versions of the compiler do not generate a kernel
// launch environment.
if (GenericDevice.Plugin.getRecordReplay().isReplaying() ||
- Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
+ KernelArgs.Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
return nullptr;
- if (!KernelEnvironment....
[truncated]
|
Size = 0; | ||
Ptr = nullptr; | ||
IsFallback = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move to field initializers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the clang::loader_uninitialized
attribute in the DynCGroupMem
variable, I can't use field initializers or a constructor.
✅ With the latest revision this PR passed the C/C++ code formatter. |
20a96c6
to
f20f4ba
Compare
f20f4ba
to
f2c6f97
Compare
Part 2 adding offload runtime support. See #152651.