From 435d760c5229965f7339044a7e6995fb0bbfd8a9 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Wed, 1 Mar 2023 07:14:29 -0800 Subject: [PATCH 01/37] Adds preliminar support for atomic_fence_order_capabilities. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 12 +++++++ sycl/include/sycl/info/device_traits.def | 3 ++ sycl/include/sycl/info/info_desc.hpp | 2 ++ sycl/plugins/cuda/pi_cuda.cpp | 3 ++ .../esimd_emulator/pi_esimd_emulator.cpp | 1 + sycl/plugins/hip/pi_hip.cpp | 2 ++ sycl/plugins/opencl/pi_opencl.cpp | 33 +++++++++++++++++++ sycl/source/detail/device_info.hpp | 20 +++++++++++ 8 files changed, 76 insertions(+) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 56b8b33fae583..19a5587d378e9 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -313,6 +313,7 @@ typedef enum { PI_DEVICE_INFO_ATOMIC_64 = 0x10110, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, + PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10114, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, // Return whether bfloat16 math functions are supported by device @@ -561,6 +562,17 @@ constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; +// CL equivalents are only available for OpenCL version 3.0 +#define PI_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 +using pi_device_atomic_capabilities = pi_bitfield; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_RELAXED = 0x01; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_ACQ_REL = 0x02; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_SEQ_CST = 0x04; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM = 0x08; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP = 0x10; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_DEVICE = 0x20; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = 0x40; + typedef enum { PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, PI_PROFILING_INFO_COMMAND_SUBMIT = 0x1281, diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 457889455d198..30876d918e2ae 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -112,6 +112,9 @@ __SYCL_PARAM_TRAITS_SPEC(device, host_unified_memory, bool, __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities, std::vector, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_order_capabilities, + std::vector, + PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities, std::vector, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index a2be0891799dc..2d8930600aa59 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -95,6 +95,8 @@ namespace device { // atomic_fence_order_capabilities, atomic_fence_scope_capabilities, aspects, // il_version. +struct atomic_fence_order_capabilities; + #define __SYCL_PARAM_TRAITS_DEPRECATED(Desc, Message) \ struct __SYCL2020_DEPRECATED(Message) Desc; #include diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 2d88978d87780..abcb7bc7e23f7 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1010,6 +1010,9 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } + case PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + // There is no way to query this in the backend + return PI_ERROR_INVALID_ARG_VALUE; case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { int major = 0; sycl::detail::pi::assertion( diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 0fc2a5a10f4f9..7f38d24111531 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -805,6 +805,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_IMAGE_SRGB) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_64) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) + CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index ddca2a872adfc..882b908056d64 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1865,6 +1865,8 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, } // TODO: Investigate if this information is available on HIP. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + // There is no way to query this in the backend + case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case PI_DEVICE_INFO_DEVICE_ID: case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 2c44f0cfe9eb3..1f22b98545d25 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -285,6 +285,39 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: return PI_ERROR_INVALID_VALUE; + case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + pi_memory_order_capabilities capabilities = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; + + OCLV::OpenCLVersion devVer; + + cl_device_id deviceID = cast(device); + cl_int ret_err = getDeviceVersion(deviceID, devVer); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + + if (devVer >= OCLV::V3_0) { + pi_device_atomic_capabilities devCapabilities = 0; + ret_err = clGetDeviceInfo( + deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(pi_device_atomic_capabilities), &devCapabilities, nullptr); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_RELAXED && "Violates minimum mandate guarantee"); + assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_ACQ_REL && "Violates minimum mandate guarantee"); + + if (devCapabilities && PI_DEVICE_ATOMIC_ORDER_SEQ_CST) { + capabilities |= PI_MEMORY_ORDER_SEQ_CST; + } + + std::memcpy(paramValue, &devCapabilities, sizeof(devCapabilities)); + return PI_SUCCESS; + } else { + // This info is only available in OpenCL version >= 3.0 + return PI_ERROR_INVALID_ARG_VALUE; + } + } case PI_DEVICE_INFO_ATOMIC_64: { cl_int ret_err = CL_SUCCESS; cl_bool result = CL_FALSE; diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 8c0417f050e32..8d5f6fdda9f42 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -275,6 +275,19 @@ struct get_device_info_impl, } }; +// Specialization for atomic_fence_order_capabilities, PI returns a bitfield +template <> +struct get_device_info_impl, + info::device::atomic_fence_order_capabilities> { + static std::vector get(RT::PiDevice dev, const plugin &Plugin) { + pi_memory_order_capabilities result; + Plugin.call_nocheck( + dev, PiInfoCode::value, + sizeof(pi_memory_order_capabilities), &result, nullptr); + return readMemoryOrderBitfield(result); + } +}; + // Specialization for atomic_memory_scope_capabilities, PI returns a bitfield template <> struct get_device_info_impl, @@ -1005,6 +1018,13 @@ get_device_info_host() { memory_order::acq_rel, memory_order::seq_cst}; } +template <> +inline std::vector +get_device_info_host() { + return {memory_order::relaxed, memory_order::acquire, memory_order::release, + memory_order::acq_rel}; +} + template <> inline std::vector get_device_info_host() { From d1269966c7cdf7637d790ba02486470dff08916c Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Wed, 8 Mar 2023 08:31:29 -0800 Subject: [PATCH 02/37] Adds support for atomic fence capabilities device queries. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 1 + sycl/include/sycl/info/device_traits.def | 3 + sycl/include/sycl/info/info_desc.hpp | 1 + sycl/plugins/cuda/pi_cuda.cpp | 1 + .../esimd_emulator/pi_esimd_emulator.cpp | 1 + sycl/plugins/hip/pi_hip.cpp | 1 + sycl/plugins/opencl/pi_opencl.cpp | 85 ++++++++++++++++--- sycl/plugins/unified_runtime/pi2ur.hpp | 4 + .../ur/adapters/level_zero/ur_level_zero.cpp | 21 +++++ sycl/plugins/unified_runtime/ur/ur.hpp | 4 + sycl/source/detail/device_info.hpp | 20 +++++ 11 files changed, 132 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 19a5587d378e9..9cd9aff81d424 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -314,6 +314,7 @@ typedef enum { PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10114, + PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10115, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, // Return whether bfloat16 math functions are supported by device diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 30876d918e2ae..92e36e9913fe0 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -118,6 +118,9 @@ __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_order_capabilities, __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities, std::vector, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_scope_capabilities, + std::vector, + PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, profiling_timer_resolution, size_t, PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION) __SYCL_PARAM_TRAITS_SPEC(device, is_endian_little, bool, diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index 2d8930600aa59..c1b52decb681c 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -96,6 +96,7 @@ namespace device { // il_version. struct atomic_fence_order_capabilities; +struct atomic_fence_scope_capabilities; #define __SYCL_PARAM_TRAITS_DEPRECATED(Desc, Message) \ struct __SYCL2020_DEPRECATED(Message) Desc; diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index abcb7bc7e23f7..11d62ebf2e98b 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1011,6 +1011,7 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name, capabilities); } case PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: // There is no way to query this in the backend return PI_ERROR_INVALID_ARG_VALUE; case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 7f38d24111531..abacdf6c71d86 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -806,6 +806,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_64) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) + CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 882b908056d64..8d610456498d0 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1867,6 +1867,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: // There is no way to query this in the backend case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: case PI_DEVICE_INFO_DEVICE_ID: case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1f22b98545d25..5c8d47168c61b 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1,4 +1,4 @@ -//==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==// +//==---------- pi_ope // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -286,7 +286,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: return PI_ERROR_INVALID_VALUE; case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { - pi_memory_order_capabilities capabilities = + // Initialize result to minimum mandated capabilities according to + // SYCL2020 4.6.3.2 + pi_memory_order_capabilities result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; @@ -297,26 +299,89 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, if (ret_err != CL_SUCCESS) return static_cast(ret_err); + pi_device_atomic_capabilities devCapabilities = 0; if (devVer >= OCLV::V3_0) { - pi_device_atomic_capabilities devCapabilities = 0; ret_err = clGetDeviceInfo( deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(pi_device_atomic_capabilities), &devCapabilities, nullptr); if (ret_err != CL_SUCCESS) return static_cast(ret_err); - assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_RELAXED && "Violates minimum mandate guarantee"); - assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_ACQ_REL && "Violates minimum mandate guarantee"); + assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_RELAXED && + "Violates minimum mandated guarantee"); + assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_ACQ_REL && + "Violates minimum mandated guarantee"); if (devCapabilities && PI_DEVICE_ATOMIC_ORDER_SEQ_CST) { - capabilities |= PI_MEMORY_ORDER_SEQ_CST; + result |= PI_MEMORY_ORDER_SEQ_CST; + } + + } else { + // This info is only available in OpenCL version >= 3.0 + // Just return minimum mandated capabilities for older versions. + // OpenCL 1.x minimum mandated capabilities are RELAXED | ACQ_REL, we + // already initialized using these. + if (devVer >= OCLV::V2_0) { + // OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | + // SEQ_CST + result |= PI_MEMORY_ORDER_SEQ_CST; + } + } + std::memcpy(paramValue, &result, sizeof(result)); + return PI_SUCCESS; + } + case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // Initialize result to minimum mandated capabilities according to + // SYCL2020 4.6.3.2 + pi_memory_scope_capabilities result = + PI_MEMORY_SCOPE_SUB_GROUP | PI_MEMORY_SCOPE_WORK_GROUP; + + OCLV::OpenCLVersion devVer; + + cl_device_id deviceID = cast(device); + cl_int ret_err = getDeviceVersion(deviceID, devVer); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + + pi_device_atomic_capabilities devCapabilities = 0; + if (devVer >= OCLV::V3_0) { + ret_err = clGetDeviceInfo(deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(pi_device_atomic_capabilities), + &devCapabilities, nullptr); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + assert(devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP && + "Violates minimum mandated guarantee"); + + // Because scopes are hierarchical, wider scopes support all narrower + // scopes (except work_item which is a special case). SUB_GROUP was + // already included in the initialization, since WORK_GROUP is mandated + // minimum capality. + + // Special case, only enable if it is explicitly enabled in the backend + if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM) + result |= PI_MEMORY_SCOPE_WORK_ITEM; + + if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_DEVICE) { + result |= PI_MEMORY_SCOPE_DEVICE; + } + + if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + result |= PI_MEMORY_SCOPE_SYSTEM; } - std::memcpy(paramValue, &devCapabilities, sizeof(devCapabilities)); - return PI_SUCCESS; } else { - // This info is only available in OpenCL version >= 3.0 - return PI_ERROR_INVALID_ARG_VALUE; + // This info is only available in OpenCL version >= 3.0 + // Just return minimum mandated capabilities for older versions. + // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we + // already initialized using it. + if (devVer >= OCLV::V2_0) { + // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + // ALL_DEVICES + result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; + } } + std::memcpy(paramValue, &result, sizeof(result)); + return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_64: { cl_int ret_err = CL_SUCCESS; diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 224589f482578..2211de7a687e5 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -488,6 +488,10 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, (zer_device_info_t)ZER_EXT_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS}, {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, + {PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES}, + {PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES}, }; auto InfoType = InfoMapping.find(ParamName); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 450a5aff1a4d8..322b57bca7a02 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===-----------------------------------------------------------------===// +#include #include #include @@ -1160,6 +1161,26 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( // bfloat16 math functions are not yet supported on Intel GPUs. return ReturnValue(bool{false}); } + case ZER_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + // There are no explicit restrictions in L0 programming guide, so assume all + // are supported + pi_memory_order_capabilities result = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + + return ReturnValue(result); + } + case ZER_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // There are no explicit restrictions in L0 programming guide, so assume all + // are supported + pi_memory_scope_capabilities result = + PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | + PI_MEMORY_SCOPE_SYSTEM; + + return ReturnValue(result); + } // TODO: Implement. case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: diff --git a/sycl/plugins/unified_runtime/ur/ur.hpp b/sycl/plugins/unified_runtime/ur/ur.hpp index 2bafa14ba5898..327be2120ac2c 100644 --- a/sycl/plugins/unified_runtime/ur/ur.hpp +++ b/sycl/plugins/unified_runtime/ur/ur.hpp @@ -43,6 +43,10 @@ const int ZER_EXT_DEVICE_INFO_FREE_MEMORY = ZER_EXT_DEVICE_INFO_END - 13; const int ZER_EXT_DEVICE_INFO_DEVICE_ID = ZER_EXT_DEVICE_INFO_END - 14; const int ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = ZER_DEVICE_INFO_IMAGE_MAX_ARRAR_SIZE; +const int ZER_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = + ZER_EXT_DEVICE_INFO_END - 16; +const int ZER_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = + ZER_EXT_DEVICE_INFO_END - 17; const int ZER_EXT_RESULT_END = 0x1000; const zer_result_t ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR = diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 8d5f6fdda9f42..d902f4ffa8afa 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -301,6 +301,19 @@ struct get_device_info_impl, } }; +// Specialization for atomic_fence_scope_capabilities, PI returns a bitfield +template <> +struct get_device_info_impl, + info::device::atomic_fence_scope_capabilities> { + static std::vector get(RT::PiDevice dev, const plugin &Plugin) { + pi_memory_scope_capabilities result; + Plugin.call_nocheck( + dev, PiInfoCode::value, + sizeof(pi_memory_scope_capabilities), &result, nullptr); + return readMemoryScopeBitfield(result); + } +}; + // Specialization for bf16 math functions template <> struct get_device_info_impl() { memory_scope::work_group, memory_scope::device, memory_scope::system}; } +template <> +inline std::vector +get_device_info_host() { + return {memory_scope::work_item, memory_scope::sub_group, + memory_scope::work_group, memory_scope::device, memory_scope::system}; +} + template <> inline bool get_device_info_host() { From fbb2998c748b46900d4b8baf6403f59c9376e14d Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Wed, 8 Mar 2023 08:31:50 -0800 Subject: [PATCH 03/37] Adds tests for atomic fence capabilities device queries. Signed-off-by: Maronas, Marcos --- .../SYCL2020/AtomicFenceCapabilities.cpp | 107 ++++++++++++++++++ sycl/unittests/SYCL2020/CMakeLists.txt | 1 + 2 files changed, 108 insertions(+) create mode 100644 sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp new file mode 100644 index 0000000000000..dd0ba08ec2943 --- /dev/null +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -0,0 +1,107 @@ +//==----------------- DeviceCheck.cpp --- queue unit tests -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +using namespace sycl; + +namespace { + +pi_platform PiPlatform = nullptr; + +pi_result redefinedDeviceGetInfoAfter(pi_device device, + pi_device_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { + if (param_value) { + auto *Result = + reinterpret_cast(param_value); + *Result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + } + } else if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + if (param_value) { + auto *Result = + reinterpret_cast(param_value); + *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | + PI_MEMORY_SCOPE_SYSTEM; + } + } + return PI_SUCCESS; +} + +TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + + PiPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); + context DefaultCtx = Plt.ext_oneapi_get_default_context(); + device Dev = DefaultCtx.get_devices()[0]; + + Mock.redefineAfter( + redefinedDeviceGetInfoAfter); + auto order_capabilities = + Dev.get_info(); + size_t expectedSize = 5; + EXPECT_EQ(order_capabilities.size(), expectedSize); + + auto res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::relaxed); + EXPECT_FALSE(res == order_capabilities.end()); + res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::acquire); + EXPECT_FALSE(res == order_capabilities.end()); + res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::release); + EXPECT_FALSE(res == order_capabilities.end()); + res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::acq_rel); + EXPECT_FALSE(res == order_capabilities.end()); + res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::seq_cst); + EXPECT_FALSE(res == order_capabilities.end()); +} + +TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceScopeCapabilities) { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + + PiPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); + context DefaultCtx = Plt.ext_oneapi_get_default_context(); + device Dev = DefaultCtx.get_devices()[0]; + + Mock.redefineAfter( + redefinedDeviceGetInfoAfter); + auto scope_capabilities = + Dev.get_info(); + size_t expectedSize = 5; + EXPECT_EQ(scope_capabilities.size(), expectedSize); + + auto res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::work_item); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::sub_group); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::work_group); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::device); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::system); + EXPECT_FALSE(res == scope_capabilities.end()); +} +} // anonymous namespace diff --git a/sycl/unittests/SYCL2020/CMakeLists.txt b/sycl/unittests/SYCL2020/CMakeLists.txt index 9e22f73abfa00..5566081dddb36 100644 --- a/sycl/unittests/SYCL2020/CMakeLists.txt +++ b/sycl/unittests/SYCL2020/CMakeLists.txt @@ -9,5 +9,6 @@ add_sycl_unittest(SYCL2020Tests OBJECT IsCompatible.cpp DeviceGetInfoAspects.cpp DeviceAspectTraits.cpp + AtomicFenceCapabilities.cpp ) From 8e07de6e4f42d5ff856d5dea4b4c789f34e20b50 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 02:07:17 -0800 Subject: [PATCH 04/37] Updateds atomic fence capabilities unittest. Signed-off-by: Maronas, Marcos --- sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index dd0ba08ec2943..32fc86a7ad1fe 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -14,6 +14,8 @@ using namespace sycl; namespace { +thread_local bool isRedefined; + pi_platform PiPlatform = nullptr; pi_result redefinedDeviceGetInfoAfter(pi_device device, @@ -21,6 +23,7 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { + isRedefined = true; if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { if (param_value) { auto *Result = @@ -49,10 +52,13 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { context DefaultCtx = Plt.ext_oneapi_get_default_context(); device Dev = DefaultCtx.get_devices()[0]; + isRedefined = false; + Mock.redefineAfter( redefinedDeviceGetInfoAfter); auto order_capabilities = Dev.get_info(); + EXPECT_TRUE(isRedefined); size_t expectedSize = 5; EXPECT_EQ(order_capabilities.size(), expectedSize); @@ -81,10 +87,13 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceScopeCapabilities) { context DefaultCtx = Plt.ext_oneapi_get_default_context(); device Dev = DefaultCtx.get_devices()[0]; + isRedefined = false; + Mock.redefineAfter( redefinedDeviceGetInfoAfter); auto scope_capabilities = Dev.get_info(); + EXPECT_TRUE(isRedefined); size_t expectedSize = 5; EXPECT_EQ(scope_capabilities.size(), expectedSize); From 8327cb08da968ed3a83fe353dd1558c6777c5174 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 02:07:34 -0800 Subject: [PATCH 05/37] Updates ABI test. Signed-off-by: Maronas, Marcos --- sycl/test/abi/sycl_symbols_linux.dump | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 21f2d70806e09..b72fcc2b3b4cb 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3,7 +3,7 @@ # DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. ################################################################################ -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libsycl.so +# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libsycl.so # REQUIRES: linux # UNSUPPORTED: libcxx @@ -4332,6 +4332,8 @@ _ZNK4sycl3_V16device8get_infoINS0_4info6device29ext_oneapi_max_work_groups_2dEEE _ZNK4sycl3_V16device8get_infoINS0_4info6device29ext_oneapi_max_work_groups_3dEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device29preferred_vector_width_doubleEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device30partition_type_affinity_domainEEENS0_6detail19is_device_info_descIT_E11return_typeEv +_ZNK4sycl3_V16device8get_infoINS0_4info6device31atomic_fence_order_capabilitiesEEENS0_6detail19is_device_info_descIT_E11return_typeEv +_ZNK4sycl3_V16device8get_infoINS0_4info6device31atomic_fence_scope_capabilitiesEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device31ext_intel_gpu_hw_threads_per_euEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device32atomic_memory_order_capabilitiesEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device32atomic_memory_scope_capabilitiesEEENS0_6detail19is_device_info_descIT_E11return_typeEv From 46e315a6f37cc683e831e4e06234a33c2ac8bd71 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 02:27:17 -0800 Subject: [PATCH 06/37] Reverts involuntary change. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 5c8d47168c61b..326d3a44441bf 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1,4 +1,4 @@ -//==---------- pi_ope +//==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 4bad01d6634b84ed594e5315ccfb6996e0e53249 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 02:35:47 -0800 Subject: [PATCH 07/37] Updates atomic fence capabilities unittest. Signed-off-by: Maronas, Marcos --- sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index 32fc86a7ad1fe..86b1d61873227 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -14,7 +14,7 @@ using namespace sycl; namespace { -thread_local bool isRedefined; +thread_local bool deviceGetInfoCalled; pi_platform PiPlatform = nullptr; @@ -23,8 +23,8 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - isRedefined = true; if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { + deviceGetInfoCalled = true; if (param_value) { auto *Result = reinterpret_cast(param_value); @@ -33,6 +33,7 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, PI_MEMORY_ORDER_SEQ_CST; } } else if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + deviceGetInfoCalled = true; if (param_value) { auto *Result = reinterpret_cast(param_value); @@ -52,13 +53,13 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { context DefaultCtx = Plt.ext_oneapi_get_default_context(); device Dev = DefaultCtx.get_devices()[0]; - isRedefined = false; + deviceGetInfoCalled = false; Mock.redefineAfter( redefinedDeviceGetInfoAfter); auto order_capabilities = Dev.get_info(); - EXPECT_TRUE(isRedefined); + EXPECT_TRUE(deviceGetInfoCalled); size_t expectedSize = 5; EXPECT_EQ(order_capabilities.size(), expectedSize); @@ -87,13 +88,13 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceScopeCapabilities) { context DefaultCtx = Plt.ext_oneapi_get_default_context(); device Dev = DefaultCtx.get_devices()[0]; - isRedefined = false; + deviceGetInfoCalled = false; Mock.redefineAfter( redefinedDeviceGetInfoAfter); auto scope_capabilities = Dev.get_info(); - EXPECT_TRUE(isRedefined); + EXPECT_TRUE(deviceGetInfoCalled); size_t expectedSize = 5; EXPECT_EQ(scope_capabilities.size(), expectedSize); From 3a2457413069516c4f81af66ef3897ed590baa63 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 02:40:37 -0800 Subject: [PATCH 08/37] Updates PI version. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 9cd9aff81d424..04edd62cdfcea 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -77,9 +77,11 @@ // 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp // 12.23 Added new piextEnqueueDeviceGlobalVariableWrite and // piextEnqueueDeviceGlobalVariableRead functions. +// 12.24 Added PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and +// PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo. #define _PI_H_VERSION_MAJOR 12 -#define _PI_H_VERSION_MINOR 23 +#define _PI_H_VERSION_MINOR 24 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) From eeb02e95c8bfebede2efff44eaf3f1b94d19d14a Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 03:18:42 -0800 Subject: [PATCH 09/37] Fixes compilation error in CUDA. Signed-off-by: Maronas, Marcos --- sycl/plugins/cuda/pi_cuda.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 11d62ebf2e98b..196af07373a69 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1010,10 +1010,6 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } - case PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: - // There is no way to query this in the backend - return PI_ERROR_INVALID_ARG_VALUE; case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { int major = 0; sycl::detail::pi::assertion( @@ -1319,6 +1315,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } + case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: + // There is no way to query this in the backend + return PI_ERROR_INVALID_ARG_VALUE; case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { int major = 0; sycl::detail::pi::assertion( From 882916992b2ea069391493e1f2ff32969f81b3b9 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 08:14:02 -0800 Subject: [PATCH 10/37] Update atomic fence capabilities unittest header. Signed-off-by: Maronas, Marcos --- sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index 86b1d61873227..ed29efc4fa357 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -1,4 +1,4 @@ -//==----------------- DeviceCheck.cpp --- queue unit tests -----------------==// +//==----------- AtomicFenceCapabilities.cpp --- queue unit tests -----------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 0ce9beb9482c20ce46e92175609361cbf341933e Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 10 Mar 2023 01:47:22 -0800 Subject: [PATCH 11/37] Fixes clang-format issue. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 326d3a44441bf..d567668abaa45 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -301,9 +301,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, pi_device_atomic_capabilities devCapabilities = 0; if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo( - deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(pi_device_atomic_capabilities), &devCapabilities, nullptr); + ret_err = clGetDeviceInfo(deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(pi_device_atomic_capabilities), + &devCapabilities, nullptr); if (ret_err != CL_SUCCESS) return static_cast(ret_err); assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_RELAXED && From 60132b734615d39b6587f1f463db34aedaf8d1b5 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 10 Mar 2023 02:37:06 -0800 Subject: [PATCH 12/37] Fixes clang-format issues. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 24 +++++++++++++----------- sycl/plugins/hip/pi_hip.cpp | 14 +++++++------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 04edd62cdfcea..2a2b4fe711601 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -188,7 +188,7 @@ typedef enum : pi_uint64 { PI_DEVICE_TYPE_CPU = (1 << 1), ///< A PI device that is the host processor. PI_DEVICE_TYPE_GPU = (1 << 2), ///< A PI device that is a GPU. PI_DEVICE_TYPE_ACC = (1 << 3), ///< A PI device that is a - ///< dedicated accelerator. + ///< dedicated accelerator. PI_DEVICE_TYPE_CUSTOM = (1 << 4) ///< A PI device that is a custom device. } _pi_device_type; @@ -565,16 +565,18 @@ constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; -// CL equivalents are only available for OpenCL version 3.0 +// CL equivalents are only available for OpenCL version 3.0 #define PI_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 using pi_device_atomic_capabilities = pi_bitfield; constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_RELAXED = 0x01; constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_ACQ_REL = 0x02; constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_SEQ_CST = 0x04; constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM = 0x08; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP = 0x10; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP = + 0x10; constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_DEVICE = 0x20; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = 0x40; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = + 0x40; typedef enum { PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, @@ -626,13 +628,13 @@ using pi_queue_properties = pi_bitfield; constexpr pi_queue_properties PI_QUEUE_FLAGS = -1; constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2; // clang-format off -constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); -constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); -constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); -constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); + constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); + constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); + constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); + constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); + constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); + constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); + constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); // clang-format on using pi_result = _pi_result; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 8d610456498d0..44b2a35ea987a 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -529,9 +529,9 @@ hipStream_t _pi_queue::get_next_transfer_stream() { _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue, hipStream_t stream, pi_uint32 stream_token) : commandType_{type}, refCount_{1}, hasBeenWaitedOn_{false}, - isRecorded_{false}, isStarted_{false}, - streamToken_{stream_token}, evEnd_{nullptr}, evStart_{nullptr}, - evQueued_{nullptr}, queue_{queue}, stream_{stream}, context_{context} { + isRecorded_{false}, isStarted_{false}, streamToken_{stream_token}, + evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue}, + stream_{stream}, context_{context} { assert(type != PI_COMMAND_TYPE_USER); @@ -685,8 +685,8 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) { } _pi_program::_pi_program(pi_context ctxt) - : module_{nullptr}, binary_{}, - binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} { + : module_{nullptr}, binary_{}, binarySizeInBytes_{0}, refCount_{1}, + context_{ctxt} { hip_piContextRetain(context_); } @@ -1865,7 +1865,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, } // TODO: Investigate if this information is available on HIP. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - // There is no way to query this in the backend + // There is no way to query this in the backend case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: case PI_DEVICE_INFO_DEVICE_ID: @@ -5327,7 +5327,7 @@ pi_result hip_piextEnqueueDeviceGlobalVariableRead( // Windows: dynamically loaded plugins might have been unloaded already // when this is called. Sycl RT holds onto the PI plugin so it can be // called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result hip_piTearDown(void *PluginParameter) { From 36192eea1000bdcfbce415381086f5921c28d2bc Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 10 Mar 2023 06:01:02 -0800 Subject: [PATCH 13/37] SYCL should always return memory_scope::work_item. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index d567668abaa45..0d53ff4e310e2 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -331,9 +331,10 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, } case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2 - pi_memory_scope_capabilities result = - PI_MEMORY_SCOPE_SUB_GROUP | PI_MEMORY_SCOPE_WORK_GROUP; + // SYCL2020 4.6.3.2. + pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | + PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP; OCLV::OpenCLVersion devVer; @@ -353,14 +354,8 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, "Violates minimum mandated guarantee"); // Because scopes are hierarchical, wider scopes support all narrower - // scopes (except work_item which is a special case). SUB_GROUP was - // already included in the initialization, since WORK_GROUP is mandated - // minimum capality. - - // Special case, only enable if it is explicitly enabled in the backend - if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM) - result |= PI_MEMORY_SCOPE_WORK_ITEM; - + // scopes. SUB_GROUP and WORK_ITEM was already included in the + // initialization, since WORK_GROUP is mandated minimum capality. if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } From c1cf809110e6f4a3404fc043760792d051ba7b9e Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 13 Mar 2023 10:44:29 -0700 Subject: [PATCH 14/37] Addressing code review concerncs. Signed-off-by: Maronas, Marcos --- opencl/CMakeLists.txt | 2 +- sycl/include/sycl/detail/cl.h | 2 +- sycl/include/sycl/detail/pi.h | 13 ------------ sycl/plugins/opencl/pi_opencl.cpp | 33 +++++++++++++++++++------------ 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/opencl/CMakeLists.txt b/opencl/CMakeLists.txt index 1442a1ac43075..4ed26bd5e719d 100644 --- a/opencl/CMakeLists.txt +++ b/opencl/CMakeLists.txt @@ -44,7 +44,7 @@ FetchContent_GetProperties(ocl-headers) set(OpenCL_INCLUDE_DIR ${ocl-headers_SOURCE_DIR} CACHE PATH "Path to OpenCL Headers") -target_compile_definitions(Headers INTERFACE -DCL_TARGET_OPENCL_VERSION=220) +target_compile_definitions(Headers INTERFACE -DCL_TARGET_OPENCL_VERSION=300) add_library(OpenCL-Headers ALIAS Headers) # OpenCL Library (ICD Loader) diff --git a/sycl/include/sycl/detail/cl.h b/sycl/include/sycl/detail/cl.h index 7e90fe126e40d..aa160d360563a 100644 --- a/sycl/include/sycl/detail/cl.h +++ b/sycl/include/sycl/detail/cl.h @@ -11,7 +11,7 @@ // Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION // and define all symbols up to OpenCL 2.2 #ifndef CL_TARGET_OPENCL_VERSION -#define CL_TARGET_OPENCL_VERSION 220 +#define CL_TARGET_OPENCL_VERSION 300 #endif #include diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 2a2b4fe711601..5c9596a975a92 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -565,19 +565,6 @@ constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; -// CL equivalents are only available for OpenCL version 3.0 -#define PI_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 -using pi_device_atomic_capabilities = pi_bitfield; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_RELAXED = 0x01; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_ACQ_REL = 0x02; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_SEQ_CST = 0x04; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM = 0x08; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP = - 0x10; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_DEVICE = 0x20; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = - 0x40; - typedef enum { PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, PI_PROFILING_INFO_COMMAND_SUBMIT = 0x1281, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 0d53ff4e310e2..5fb8b955369fa 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -299,19 +299,19 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, if (ret_err != CL_SUCCESS) return static_cast(ret_err); - pi_device_atomic_capabilities devCapabilities = 0; + cl_device_atomic_capabilities devCapabilities = 0; if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(pi_device_atomic_capabilities), + ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &devCapabilities, nullptr); if (ret_err != CL_SUCCESS) return static_cast(ret_err); - assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_RELAXED && + assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && "Violates minimum mandated guarantee"); - assert(devCapabilities && PI_DEVICE_ATOMIC_ORDER_ACQ_REL && + assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && "Violates minimum mandated guarantee"); - if (devCapabilities && PI_DEVICE_ATOMIC_ORDER_SEQ_CST) { + if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { result |= PI_MEMORY_ORDER_SEQ_CST; } @@ -343,24 +343,24 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, if (ret_err != CL_SUCCESS) return static_cast(ret_err); - pi_device_atomic_capabilities devCapabilities = 0; + cl_device_atomic_capabilities devCapabilities = 0; if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, PI_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(pi_device_atomic_capabilities), + ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &devCapabilities, nullptr); if (ret_err != CL_SUCCESS) return static_cast(ret_err); - assert(devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP && + assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); // Because scopes are hierarchical, wider scopes support all narrower // scopes. SUB_GROUP and WORK_ITEM was already included in the // initialization, since WORK_GROUP is mandated minimum capality. - if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_DEVICE) { + if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } - if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { result |= PI_MEMORY_SCOPE_SYSTEM; } @@ -375,7 +375,14 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; } } - std::memcpy(paramValue, &result, sizeof(result)); + if (paramValue) { + if (paramValueSize < sizeof(cl_device_atomic_capabilities)) + return PI_ERROR_INVALID_VALUE; + + std::memcpy(paramValue, &result, sizeof(result)); + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(result); + } return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_64: { From bf12a928f37312827646fba4b0a62d4ebcb88194 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 16 Mar 2023 09:45:07 -0700 Subject: [PATCH 15/37] Reverts unrelated changes. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 5c9596a975a92..3a31574b77d94 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -188,7 +188,7 @@ typedef enum : pi_uint64 { PI_DEVICE_TYPE_CPU = (1 << 1), ///< A PI device that is the host processor. PI_DEVICE_TYPE_GPU = (1 << 2), ///< A PI device that is a GPU. PI_DEVICE_TYPE_ACC = (1 << 3), ///< A PI device that is a - ///< dedicated accelerator. + ///< dedicated accelerator. PI_DEVICE_TYPE_CUSTOM = (1 << 4) ///< A PI device that is a custom device. } _pi_device_type; @@ -615,13 +615,13 @@ using pi_queue_properties = pi_bitfield; constexpr pi_queue_properties PI_QUEUE_FLAGS = -1; constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2; // clang-format off - constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); - constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); - constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); - constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); - constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); - constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); - constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); +constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); +constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); +constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); +constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); // clang-format on using pi_result = _pi_result; From 95480738e26a372d5415e772e5fa70008ef824ad Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 16 Mar 2023 09:47:28 -0700 Subject: [PATCH 16/37] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/cl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/cl.h b/sycl/include/sycl/detail/cl.h index aa160d360563a..20d640bcff59f 100644 --- a/sycl/include/sycl/detail/cl.h +++ b/sycl/include/sycl/detail/cl.h @@ -9,7 +9,7 @@ #pragma once // Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION -// and define all symbols up to OpenCL 2.2 +// and define all symbols up to OpenCL 3.0 #ifndef CL_TARGET_OPENCL_VERSION #define CL_TARGET_OPENCL_VERSION 300 #endif From 4c59edb4baf465190858f6aea9993f27fe26ef9f Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 17 Mar 2023 05:49:20 -0700 Subject: [PATCH 17/37] Reverts unrelated clang-format changes. Signed-off-by: Maronas, Marcos --- sycl/plugins/hip/pi_hip.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 44b2a35ea987a..f277f4596e1e3 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -529,10 +529,9 @@ hipStream_t _pi_queue::get_next_transfer_stream() { _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue, hipStream_t stream, pi_uint32 stream_token) : commandType_{type}, refCount_{1}, hasBeenWaitedOn_{false}, - isRecorded_{false}, isStarted_{false}, streamToken_{stream_token}, - evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue}, - stream_{stream}, context_{context} { - + isRecorded_{false}, isStarted_{false}, + streamToken_{stream_token}, evEnd_{nullptr}, evStart_{nullptr}, + evQueued_{nullptr}, queue_{queue}, stream_{stream}, context_{context} { assert(type != PI_COMMAND_TYPE_USER); bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE; @@ -685,8 +684,8 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) { } _pi_program::_pi_program(pi_context ctxt) - : module_{nullptr}, binary_{}, binarySizeInBytes_{0}, refCount_{1}, - context_{ctxt} { + : module_{nullptr}, binary_{}, + binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} { hip_piContextRetain(context_); } From 3ab7991bb597903ceccfc7df71c157fb98944756 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 17 Mar 2023 05:49:56 -0700 Subject: [PATCH 18/37] Reverts unrelated clang-format changes. Signed-off-by: Maronas, Marcos --- sycl/plugins/hip/pi_hip.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index f277f4596e1e3..f1a5f1f2a8165 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -532,6 +532,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue, isRecorded_{false}, isStarted_{false}, streamToken_{stream_token}, evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue}, stream_{stream}, context_{context} { + assert(type != PI_COMMAND_TYPE_USER); bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE; From f19eb4c68c1b98e87019e4f5523d4a0723507078 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 17 Mar 2023 05:50:57 -0700 Subject: [PATCH 19/37] Reverts unrelated clang-format changes. Signed-off-by: Maronas, Marcos --- sycl/plugins/hip/pi_hip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index f1a5f1f2a8165..3e03bc9479354 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5327,7 +5327,7 @@ pi_result hip_piextEnqueueDeviceGlobalVariableRead( // Windows: dynamically loaded plugins might have been unloaded already // when this is called. Sycl RT holds onto the PI plugin so it can be // called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result hip_piTearDown(void *PluginParameter) { From 9e7947c03779e953a097b9cd59707528f515d5e1 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 17 Mar 2023 08:20:45 -0700 Subject: [PATCH 20/37] Sets return values correctly in piDeviceGetInfo for atomic fence capabilities. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 5fb8b955369fa..1130354dcb45e 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -311,6 +311,8 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && "Violates minimum mandated guarantee"); + // We already initialized to minimum mandated capabilities. Just + // check stronger orders. if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { result |= PI_MEMORY_ORDER_SEQ_CST; } @@ -326,12 +328,22 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, result |= PI_MEMORY_ORDER_SEQ_CST; } } - std::memcpy(paramValue, &result, sizeof(result)); + if (paramValue) { + if (paramValueSize < sizeof(cl_device_atomic_capabilities)) + return PI_ERROR_INVALID_VALUE; + + std::memcpy(paramValue, &result, sizeof(result)); + } + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(result); return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to // SYCL2020 4.6.3.2. + // Because scopes are hierarchical, wider scopes support all narrower + // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | PI_MEMORY_SCOPE_WORK_GROUP; @@ -354,8 +366,10 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, "Violates minimum mandated guarantee"); // Because scopes are hierarchical, wider scopes support all narrower - // scopes. SUB_GROUP and WORK_ITEM was already included in the - // initialization, since WORK_GROUP is mandated minimum capality. + // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) + // We already initialized to these minimum mandated capabilities. Just + // check wider scopes. if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } @@ -380,9 +394,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, return PI_ERROR_INVALID_VALUE; std::memcpy(paramValue, &result, sizeof(result)); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); } + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(result); return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_64: { From 806e05478357d2cff9d8c915005dbc87b1002ecf Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Wed, 22 Mar 2023 04:45:40 -0700 Subject: [PATCH 21/37] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 8 +++---- sycl/include/sycl/info/device_traits.def | 4 ++-- sycl/plugins/cuda/pi_cuda.cpp | 4 ++-- .../esimd_emulator/pi_esimd_emulator.cpp | 4 ++-- sycl/plugins/hip/pi_hip.cpp | 4 ++-- sycl/plugins/opencl/pi_opencl.cpp | 4 ++-- sycl/plugins/unified_runtime/pi2ur.hpp | 4 ++-- .../ur/adapters/level_zero/ur_level_zero.cpp | 21 +++++++++++-------- .../SYCL2020/AtomicFenceCapabilities.cpp | 4 ++-- 9 files changed, 30 insertions(+), 27 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 3a31574b77d94..f83a6f8c34087 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -77,8 +77,8 @@ // 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp // 12.23 Added new piextEnqueueDeviceGlobalVariableWrite and // piextEnqueueDeviceGlobalVariableRead functions. -// 12.24 Added PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and -// PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo. +// 12.24 Added PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and +// PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo. #define _PI_H_VERSION_MAJOR 12 #define _PI_H_VERSION_MINOR 24 @@ -315,8 +315,6 @@ typedef enum { PI_DEVICE_INFO_ATOMIC_64 = 0x10110, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, - PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10114, - PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10115, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, // Return whether bfloat16 math functions are supported by device @@ -326,6 +324,8 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, + PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x20005, + PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x20006, } _pi_device_info; typedef enum { diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 92e36e9913fe0..b2e8b8f345055 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -114,13 +114,13 @@ __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_order_capabilities, std::vector, - PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) + PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities, std::vector, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_scope_capabilities, std::vector, - PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) + PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, profiling_timer_resolution, size_t, PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION) __SYCL_PARAM_TRAITS_SPEC(device, is_endian_little, bool, diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 196af07373a69..d687780037f76 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1315,8 +1315,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } - case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: // There is no way to query this in the backend return PI_ERROR_INVALID_ARG_VALUE; case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index abacdf6c71d86..3255abeb04a55 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -805,8 +805,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_IMAGE_SRGB) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_64) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) - CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) - CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) + CASE_PI_UNSUPPORTED(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) + CASE_PI_UNSUPPORTED(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 3e03bc9479354..1c1e579c7bd51 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1866,8 +1866,8 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, // TODO: Investigate if this information is available on HIP. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: // There is no way to query this in the backend - case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: case PI_DEVICE_INFO_DEVICE_ID: case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1130354dcb45e..ce302b612af32 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -285,7 +285,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: return PI_ERROR_INVALID_VALUE; - case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to // SYCL2020 4.6.3.2 pi_memory_order_capabilities result = @@ -338,7 +338,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, *paramValueSizeRet = sizeof(result); return PI_SUCCESS; } - case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to // SYCL2020 4.6.3.2. // Because scopes are hierarchical, wider scopes support all narrower diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index a1fb493524d5e..742702dfe1bf2 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -475,9 +475,9 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, (ur_device_info_t)UR_DEVICE_INFO_BFLOAT16}, {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, - {PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + {PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES}, - {PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + {PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES}, }; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 499bcb8608043..cd4f0c1e23b1b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===-----------------------------------------------------------------===// -#include #include #include @@ -1168,20 +1167,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // There are no explicit restrictions in L0 programming guide, so assume all // are supported - pi_memory_order_capabilities result = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + ur_memory_order_capability_flags_t result = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; return ReturnValue(result); } case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // There are no explicit restrictions in L0 programming guide, so assume all // are supported - pi_memory_scope_capabilities result = - PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | - PI_MEMORY_SCOPE_SYSTEM; + ur_memory_scope_capability_flags_t result = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; return ReturnValue(result); } diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index ed29efc4fa357..3b74918253b23 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -23,7 +23,7 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { + if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { deviceGetInfoCalled = true; if (param_value) { auto *Result = @@ -32,7 +32,7 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; } - } else if (param_name == PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + } else if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { deviceGetInfoCalled = true; if (param_value) { auto *Result = From c55f52a47b29a39dc875c71140107e9e5992de81 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Wed, 22 Mar 2023 07:17:14 -0700 Subject: [PATCH 22/37] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/plugins/cuda/pi_cuda.cpp | 4 +++- sycl/plugins/hip/pi_hip.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d687780037f76..d45e2efd8bb10 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1318,7 +1318,9 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: // There is no way to query this in the backend - return PI_ERROR_INVALID_ARG_VALUE; + setErrorMessage("This backend does not support this query", + PI_ERROR_INVALID_ARG_VALUE); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { int major = 0; sycl::detail::pi::assertion( diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 1c1e579c7bd51..bcc1d1cb2264b 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1878,7 +1878,9 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: - return PI_ERROR_INVALID_VALUE; + setErrorMessage("This backend does not support this query", + PI_ERROR_INVALID_ARG_VALUE); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; default: __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); From 1571410fd0135a51628a61d3e1f3ecd5585eb707 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 07:11:32 -0700 Subject: [PATCH 23/37] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 4 ++-- sycl/include/sycl/info/device_traits.def | 4 ++-- sycl/plugins/cuda/pi_cuda.cpp | 4 ++-- sycl/plugins/hip/pi_hip.cpp | 4 ++-- sycl/plugins/unified_runtime/pi2ur.hpp | 2 +- sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 3f2890d28df35..52188e96bdecc 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -313,8 +313,8 @@ typedef enum { // return the number of queue indices that are available for this device. PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032, PI_DEVICE_INFO_ATOMIC_64 = 0x10110, - PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, - PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, // Return whether bfloat16 math functions are supported by device diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index b2e8b8f345055..f091bb7e8f67d 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -111,13 +111,13 @@ __SYCL_PARAM_TRAITS_SPEC(device, host_unified_memory, bool, PI_DEVICE_INFO_HOST_UNIFIED_MEMORY) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities, std::vector, - PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_order_capabilities, std::vector, PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities, std::vector, - PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_scope_capabilities, std::vector, PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d45e2efd8bb10..2820adc261c34 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1293,14 +1293,14 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, atomic64); } - case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } - case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { int major = 0; sycl::detail::pi::assertion( cuDeviceGetAttribute(&major, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index bcc1d1cb2264b..c0b844830fb77 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1856,7 +1856,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, pi_int32{1}); } - case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE; @@ -1864,7 +1864,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, capabilities); } // TODO: Investigate if this information is available on HIP. - case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: // There is no way to query this in the backend case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 4d695abbfbbd1..2d6fe8a1303a5 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -483,7 +483,7 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, (ur_device_info_t)UR_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH}, {PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, (ur_device_info_t)UR_DEVICE_INFO_BFLOAT16}, - {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + {PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, {PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES}, diff --git a/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp index 23be3b2c4869c..55ab902cb3de5 100644 --- a/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp @@ -23,7 +23,7 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { + if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { deviceGetInfoCalled = true; if (param_value) { auto *Result = From 45dd7620db9febe569ecd8d22d93e75c06fdaf0c Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 07:33:44 -0700 Subject: [PATCH 24/37] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/plugins/cuda/pi_cuda.cpp | 2 +- sycl/plugins/hip/pi_hip.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 2820adc261c34..7d989e846e998 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1318,7 +1318,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: // There is no way to query this in the backend - setErrorMessage("This backend does not support this query", + setErrorMessage("CUDA backend does not support this query", PI_ERROR_INVALID_ARG_VALUE); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index c0b844830fb77..e2f28ac77738e 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1878,7 +1878,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: - setErrorMessage("This backend does not support this query", + setErrorMessage("HIP backend does not support this query", PI_ERROR_INVALID_ARG_VALUE); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; From d4a5d37eae1cb731a9e4dbf4a3197ac24e2a9222 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 08:04:17 -0700 Subject: [PATCH 25/37] Fixes compilation error in AtomicMemoryOrderCapabilities unittest. Signed-off-by: Maronas, Marcos --- sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp index 6ed291c0da3e8..b899589af63e1 100644 --- a/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp @@ -27,7 +27,7 @@ static bool has_capability(const std::vector &deviceCapabilities, pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { + if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { deviceGetInfoCalled = true; if (param_value) { pi_memory_order_capabilities *Capabilities = From 918b923e5ea8340e5ffa514b0269f361a8129415 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 09:02:52 -0700 Subject: [PATCH 26/37] Fixes compilation error in AtomicFenceCapabilities unittest. Signed-off-by: Maronas, Marcos --- sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index 3b74918253b23..c8781577778b3 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -16,8 +16,6 @@ namespace { thread_local bool deviceGetInfoCalled; -pi_platform PiPlatform = nullptr; - pi_result redefinedDeviceGetInfoAfter(pi_device device, pi_device_info param_name, size_t param_value_size, @@ -48,10 +46,7 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { sycl::unittest::PiMock Mock; sycl::platform Plt = Mock.getPlatform(); - - PiPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); - context DefaultCtx = Plt.ext_oneapi_get_default_context(); - device Dev = DefaultCtx.get_devices()[0]; + device Dev = Plt.get_devices()[0]; deviceGetInfoCalled = false; @@ -83,10 +78,7 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceScopeCapabilities) { sycl::unittest::PiMock Mock; sycl::platform Plt = Mock.getPlatform(); - - PiPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); - context DefaultCtx = Plt.ext_oneapi_get_default_context(); - device Dev = DefaultCtx.get_devices()[0]; + device Dev = Plt.get_devices()[0]; deviceGetInfoCalled = false; From 6490a36b6986e28fa4e5a786b4491a2cf47d8d5f Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 09:52:18 -0700 Subject: [PATCH 27/37] Updates Windows ABI. Signed-off-by: Maronas, Marcos --- patch.txt | 47 +++++++++++++++++++++++++ sycl/test/abi/sycl_symbols_windows.dump | 2 ++ 2 files changed, 49 insertions(+) create mode 100644 patch.txt diff --git a/patch.txt b/patch.txt new file mode 100644 index 0000000000000..59d8898fc9bbd --- /dev/null +++ b/patch.txt @@ -0,0 +1,47 @@ +From 99669798008dedd68398021c2d03d5bc7eb150cd Mon Sep 17 00:00:00 2001 +From: "Maronas, Marcos" +Date: Thu, 23 Mar 2023 09:16:04 -0700 +Subject: [PATCH] Updates Windows ABI. + +Signed-off-by: Maronas, Marcos +--- + sycl/test/abi/sycl_symbols_windows.dump | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump +index cec276ccf86f..4f4db1f69cd9 100644 +--- a/sycl/test/abi/sycl_symbols_windows.dump ++++ b/sycl/test/abi/sycl_symbols_windows.dump +@@ -27,6 +27,8 @@ + ??$get_info@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ + ??$get_info@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ + ??$get_info@Uatomic64@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ ++??$get_info@Uatomic_fence_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ++??$get_info@Uatomic_fence_scope_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ + ??$get_info@Uatomic_memory_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ + ??$get_info@Uatomic_memory_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ + ??$get_info@Uatomic_memory_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ +@@ -1016,8 +1018,8 @@ + ?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ + ?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ + ?get_width@stream@_V1@sycl@@QEBA_KXZ +-?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ + ?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ ++?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ + ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z + ?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXPEAX@Z@std@@_K_N@Z + ?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@X@std@@_K_N@Z +@@ -1185,8 +1187,8 @@ + ?size@SYCLMemObjT@detail@_V1@sycl@@QEBA_KXZ + ?size@exception_list@_V1@sycl@@QEBA_KXZ + ?size@image_impl@detail@_V1@sycl@@QEBA_KXZ +-?size@stream_impl@detail@_V1@sycl@@QEBA_KXZ + ?size@stream@_V1@sycl@@QEBA_KXZ ++?size@stream_impl@detail@_V1@sycl@@QEBA_KXZ + ?split_string@detail@_V1@sycl@@YA?AV?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@5@D@Z + ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ + ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ +-- +2.39.1.windows.1 + + diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index cec276ccf86f6..04424816623ec 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -27,6 +27,8 @@ ??$get_info@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ ??$get_info@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic64@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ +??$get_info@Uatomic_fence_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ +??$get_info@Uatomic_fence_scope_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_memory_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_memory_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_memory_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ From 924b9b777371229fcc06d49c34bbe4188759f776 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 13:37:56 -0700 Subject: [PATCH 28/37] Removes file pushed accidentally. Signed-off-by: Maronas, Marcos --- patch.txt | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 patch.txt diff --git a/patch.txt b/patch.txt deleted file mode 100644 index 59d8898fc9bbd..0000000000000 --- a/patch.txt +++ /dev/null @@ -1,47 +0,0 @@ -From 99669798008dedd68398021c2d03d5bc7eb150cd Mon Sep 17 00:00:00 2001 -From: "Maronas, Marcos" -Date: Thu, 23 Mar 2023 09:16:04 -0700 -Subject: [PATCH] Updates Windows ABI. - -Signed-off-by: Maronas, Marcos ---- - sycl/test/abi/sycl_symbols_windows.dump | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump -index cec276ccf86f..4f4db1f69cd9 100644 ---- a/sycl/test/abi/sycl_symbols_windows.dump -+++ b/sycl/test/abi/sycl_symbols_windows.dump -@@ -27,6 +27,8 @@ - ??$get_info@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ - ??$get_info@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ - ??$get_info@Uatomic64@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ -+??$get_info@Uatomic_fence_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ -+??$get_info@Uatomic_fence_scope_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ - ??$get_info@Uatomic_memory_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ - ??$get_info@Uatomic_memory_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ - ??$get_info@Uatomic_memory_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ -@@ -1016,8 +1018,8 @@ - ?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ - ?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ - ?get_width@stream@_V1@sycl@@QEBA_KXZ --?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ - ?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ -+?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ - ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z - ?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXPEAX@Z@std@@_K_N@Z - ?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@X@std@@_K_N@Z -@@ -1185,8 +1187,8 @@ - ?size@SYCLMemObjT@detail@_V1@sycl@@QEBA_KXZ - ?size@exception_list@_V1@sycl@@QEBA_KXZ - ?size@image_impl@detail@_V1@sycl@@QEBA_KXZ --?size@stream_impl@detail@_V1@sycl@@QEBA_KXZ - ?size@stream@_V1@sycl@@QEBA_KXZ -+?size@stream_impl@detail@_V1@sycl@@QEBA_KXZ - ?split_string@detail@_V1@sycl@@YA?AV?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@5@D@Z - ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ - ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ --- -2.39.1.windows.1 - - From 879c096d1d582bcb178b4dbb2dcd69be7eebbe3c Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 13:55:17 -0700 Subject: [PATCH 29/37] Adds ur2pi value conversion. Signed-off-by: Maronas, Marcos --- sycl/plugins/unified_runtime/pi2ur.hpp | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index a061ceb8e3992..87a27314919c7 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -236,6 +236,56 @@ inline pi_result ur2piInfoValue(ur_device_info_t ParamName, {UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL, PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL}, }; return Value.convert(Map); + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { + static std::unordered_map + Map = { + {UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED, PI_MEMORY_ORDER_RELAXED}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE, PI_MEMORY_ORDER_ACQUIRE}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE, PI_MEMORY_ORDER_RELEASE}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL, PI_MEMORY_ORDER_ACQ_REL}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST, PI_MEMORY_ORDER_SEQ_CST}, + }; + return Value.convertBitSet(Map); + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { + static std::unordered_map + Map = { + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM, + PI_MEMORY_SCOPE_WORK_ITEM}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP, + PI_MEMORY_SCOPE_SUB_GROUP}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP, + PI_MEMORY_SCOPE_WORK_GROUP}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE, PI_MEMORY_SCOPE_DEVICE}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM, PI_MEMORY_SCOPE_SYSTEM}, + }; + return Value.convertBitSet(Map); + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { + static std::unordered_map + Map = { + {UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED, PI_MEMORY_ORDER_RELAXED}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE, PI_MEMORY_ORDER_ACQUIRE}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE, PI_MEMORY_ORDER_RELEASE}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL, PI_MEMORY_ORDER_ACQ_REL}, + {UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST, PI_MEMORY_ORDER_SEQ_CST}, + }; + return Value.convertBitSet(Map); + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + static std::unordered_map + Map = { + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM, + PI_MEMORY_SCOPE_WORK_ITEM}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP, + PI_MEMORY_SCOPE_SUB_GROUP}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP, + PI_MEMORY_SCOPE_WORK_GROUP}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE, PI_MEMORY_SCOPE_DEVICE}, + {UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM, PI_MEMORY_SCOPE_SYSTEM}, + }; + return Value.convertBitSet(Map); } else { // TODO: what else needs a UR-PI translation? } From aea9486920303be81e05ad4bb109aad8095b4bf8 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 23 Mar 2023 14:45:12 -0700 Subject: [PATCH 30/37] Merges branches with common code. Signed-off-by: Maronas, Marcos --- sycl/plugins/unified_runtime/pi2ur.hpp | 31 ++++---------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 87a27314919c7..ac6b9ff0466a4 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -236,7 +236,8 @@ inline pi_result ur2piInfoValue(ur_device_info_t ParamName, {UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL, PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL}, }; return Value.convert(Map); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES || + ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { static std::unordered_map Map = { @@ -247,32 +248,8 @@ inline pi_result ur2piInfoValue(ur_device_info_t ParamName, {UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST, PI_MEMORY_ORDER_SEQ_CST}, }; return Value.convertBitSet(Map); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { - static std::unordered_map - Map = { - {UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM, - PI_MEMORY_SCOPE_WORK_ITEM}, - {UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP, - PI_MEMORY_SCOPE_SUB_GROUP}, - {UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP, - PI_MEMORY_SCOPE_WORK_GROUP}, - {UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE, PI_MEMORY_SCOPE_DEVICE}, - {UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM, PI_MEMORY_SCOPE_SYSTEM}, - }; - return Value.convertBitSet(Map); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { - static std::unordered_map - Map = { - {UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED, PI_MEMORY_ORDER_RELAXED}, - {UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE, PI_MEMORY_ORDER_ACQUIRE}, - {UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE, PI_MEMORY_ORDER_RELEASE}, - {UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL, PI_MEMORY_ORDER_ACQ_REL}, - {UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST, PI_MEMORY_ORDER_SEQ_CST}, - }; - return Value.convertBitSet(Map); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES || + ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { static std::unordered_map Map = { From 8d37dfb175c8bdc9a026a2206f49c34aa4e342e4 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 24 Mar 2023 02:47:45 -0700 Subject: [PATCH 31/37] Fixes merge issue. Signed-off-by: Maronas, Marcos --- sycl/plugins/hip/pi_hip.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 64ec752f14db7..147723d6a3753 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1890,10 +1890,8 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, // TODO: Investigate if this information is available on HIP. case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - // There is no way to query this in the backend case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: - case PI_DEVICE_INFO_DEVICE_ID: case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: From 2e1d848c7e7b09db6bb0e4a8062f949b4c884211 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 24 Mar 2023 07:06:25 -0700 Subject: [PATCH 32/37] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 1 + sycl/source/detail/device_info.hpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 7b96e5bb19f99..41d6d9261d967 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -282,6 +282,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // For details about Intel UUID extension, see // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md case PI_DEVICE_INFO_UUID: + return PI_ERROR_INVALID_VALUE; case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { // This query is missing beore OpenCL 3.0 // Check version and handle appropriately diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index e6c82847640a2..21b7245a209a4 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -269,7 +269,7 @@ struct get_device_info_impl, info::device::atomic_memory_order_capabilities> { static std::vector get(RT::PiDevice dev, const plugin &Plugin) { pi_memory_order_capabilities result; - Plugin.call_nocheck( + Plugin.call( dev, PiInfoCode::value, sizeof(pi_memory_order_capabilities), &result, nullptr); return readMemoryOrderBitfield(result); @@ -282,7 +282,7 @@ struct get_device_info_impl, info::device::atomic_fence_order_capabilities> { static std::vector get(RT::PiDevice dev, const plugin &Plugin) { pi_memory_order_capabilities result; - Plugin.call_nocheck( + Plugin.call( dev, PiInfoCode::value, sizeof(pi_memory_order_capabilities), &result, nullptr); return readMemoryOrderBitfield(result); @@ -295,7 +295,7 @@ struct get_device_info_impl, info::device::atomic_memory_scope_capabilities> { static std::vector get(RT::PiDevice dev, const plugin &Plugin) { pi_memory_scope_capabilities result; - Plugin.call_nocheck( + Plugin.call( dev, PiInfoCode::value, sizeof(pi_memory_scope_capabilities), &result, nullptr); return readMemoryScopeBitfield(result); @@ -308,7 +308,7 @@ struct get_device_info_impl, info::device::atomic_fence_scope_capabilities> { static std::vector get(RT::PiDevice dev, const plugin &Plugin) { pi_memory_scope_capabilities result; - Plugin.call_nocheck( + Plugin.call( dev, PiInfoCode::value, sizeof(pi_memory_scope_capabilities), &result, nullptr); return readMemoryScopeBitfield(result); From 38b415f0ea40019ac101418196113b2ca631b453 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 27 Mar 2023 05:29:46 -0700 Subject: [PATCH 33/37] Adds context query for atomic_fence_capabilities. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 2 + sycl/include/sycl/info/context_traits.def | 2 + sycl/source/detail/context_impl.cpp | 67 +++++++++----- sycl/source/detail/context_impl.hpp | 15 +++ sycl/test/abi/sycl_symbols_linux.dump | 2 + .../SYCL2020/AtomicFenceCapabilities.cpp | 91 +++++++++++++++++-- 6 files changed, 150 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 52188e96bdecc..9b36d494c4517 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -350,6 +350,8 @@ typedef enum { // Atomics capabilities extensions PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010, PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011, + PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10012, + PI_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10013, // Native 2D USM memory operation support PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000, PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001, diff --git a/sycl/include/sycl/info/context_traits.def b/sycl/include/sycl/info/context_traits.def index a495e6bb1f50f..e71f8ca712fbc 100644 --- a/sycl/include/sycl/info/context_traits.def +++ b/sycl/include/sycl/info/context_traits.def @@ -3,3 +3,5 @@ __SYCL_PARAM_TRAITS_SPEC(context, platform, sycl::platform, PI_CONTEXT_INFO_PLAT __SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector, PI_CONTEXT_INFO_DEVICES) __SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_order_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_scope_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 198b4f1cc2d30..c1421c749db5d 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -175,17 +175,9 @@ context_impl::get_info() if (is_host()) return CapabilityList; - for (const sycl::device &Device : MDevices) { - std::vector NewCapabilityList(CapabilityList.size()); - std::vector DeviceCapabilities = - Device.get_info(); - std::set_intersection( - CapabilityList.begin(), CapabilityList.end(), - DeviceCapabilities.begin(), DeviceCapabilities.end(), - std::inserter(NewCapabilityList, NewCapabilityList.begin())); - CapabilityList = NewCapabilityList; - } - CapabilityList.shrink_to_fit(); + GetCapabilitiesIntersectionSet< + sycl::memory_order, info::device::atomic_memory_order_capabilities>( + MDevices, CapabilityList); return CapabilityList; } @@ -193,17 +185,50 @@ template <> std::vector context_impl::get_info() const { + std::vector CapabilityList{ + sycl::memory_scope::work_item, sycl::memory_scope::sub_group, + sycl::memory_scope::work_group, sycl::memory_scope::device, + sycl::memory_scope::system}; + if (is_host()) + return CapabilityList; + + GetCapabilitiesIntersectionSet< + sycl::memory_scope, info::device::atomic_memory_scope_capabilities>( + MDevices, CapabilityList); + + return CapabilityList; +} +template <> +std::vector +context_impl::get_info() const { + std::vector CapabilityList{ + sycl::memory_order::relaxed, sycl::memory_order::acquire, + sycl::memory_order::release, sycl::memory_order::acq_rel, + sycl::memory_order::seq_cst}; if (is_host()) - return {sycl::memory_scope::work_item, sycl::memory_scope::sub_group, - sycl::memory_scope::work_group, sycl::memory_scope::device, - sycl::memory_scope::system}; - - pi_memory_scope_capabilities Result; - getPlugin().call( - MContext, - PiInfoCode::value, - sizeof(Result), &Result, nullptr); - return readMemoryScopeBitfield(Result); + return CapabilityList; + + GetCapabilitiesIntersectionSet( + MDevices, CapabilityList); + + return CapabilityList; +} +template <> +std::vector +context_impl::get_info() const { + std::vector CapabilityList{ + sycl::memory_scope::work_item, sycl::memory_scope::sub_group, + sycl::memory_scope::work_group, sycl::memory_scope::device, + sycl::memory_scope::system}; + if (is_host()) + return CapabilityList; + + GetCapabilitiesIntersectionSet( + MDevices, CapabilityList); + + return CapabilityList; } RT::PiContext &context_impl::getHandleRef() { return MContext; } diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 4aa86ecb84326..ba6401a7da54c 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -270,6 +270,21 @@ class context_impl { std::mutex MDeviceGlobalInitializersMutex; }; +template +void GetCapabilitiesIntersectionSet(const std::vector &Devices, + std::vector &CapabilityList) { + for (const sycl::device &Device : Devices) { + std::vector NewCapabilityList; + std::vector DeviceCapabilities = Device.get_info(); + std::set_intersection( + CapabilityList.begin(), CapabilityList.end(), + DeviceCapabilities.begin(), DeviceCapabilities.end(), + std::inserter(NewCapabilityList, NewCapabilityList.begin())); + CapabilityList = NewCapabilityList; + } + CapabilityList.shrink_to_fit(); +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index ceebe14a8a12b..1af4d868bdb90 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4452,6 +4452,8 @@ _ZNK4sycl3_V17context12has_propertyINS0_8property9reduction22initialize_to_ident _ZNK4sycl3_V17context3getEv _ZNK4sycl3_V17context7is_hostEv _ZNK4sycl3_V17context8get_infoINS0_4info7context15reference_countEEENS0_6detail20is_context_info_descIT_E11return_typeEv +_ZNK4sycl3_V17context8get_infoINS0_4info7context31atomic_fence_order_capabilitiesEEENS0_6detail20is_context_info_descIT_E11return_typeEv +_ZNK4sycl3_V17context8get_infoINS0_4info7context31atomic_fence_scope_capabilitiesEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context8get_infoINS0_4info7context32atomic_memory_order_capabilitiesEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context8get_infoINS0_4info7context32atomic_memory_scope_capabilitiesEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context8get_infoINS0_4info7context7devicesEEENS0_6detail20is_context_info_descIT_E11return_typeEv diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index c8781577778b3..33109d8d78efa 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -16,6 +16,18 @@ namespace { thread_local bool deviceGetInfoCalled; +pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices) { + if (num_devices) + *num_devices = 2; + if (devices && num_entries > 0) { + devices[0] = reinterpret_cast(1); + devices[1] = reinterpret_cast(2); + } + return PI_SUCCESS; +} + pi_result redefinedDeviceGetInfoAfter(pi_device device, pi_device_info param_name, size_t param_value_size, @@ -26,24 +38,38 @@ pi_result redefinedDeviceGetInfoAfter(pi_device device, if (param_value) { auto *Result = reinterpret_cast(param_value); - *Result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + if (device == reinterpret_cast(1)) { + std::cout << "Order Device 1" << std::endl; + *Result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + } + if (device == reinterpret_cast(2)) { + std::cout << "Order Device 2" << std::endl; + *Result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_SEQ_CST; + } } } else if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { deviceGetInfoCalled = true; if (param_value) { auto *Result = reinterpret_cast(param_value); - *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | - PI_MEMORY_SCOPE_SYSTEM; + if (device == reinterpret_cast(1)) { + std::cout << "Scope Device 1" << std::endl; + *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | + PI_MEMORY_SCOPE_SYSTEM; + } + if (device == reinterpret_cast(2)) { + std::cout << "Scope Device 2" << std::endl; + *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SYSTEM; + } } } return PI_SUCCESS; } -TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { +TEST(AtomicFenceCapabilitiesCheck, CheckDeviceAtomicFenceOrderCapabilities) { sycl::unittest::PiMock Mock; sycl::platform Plt = Mock.getPlatform(); device Dev = Plt.get_devices()[0]; @@ -75,7 +101,7 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceOrderCapabilities) { EXPECT_FALSE(res == order_capabilities.end()); } -TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceScopeCapabilities) { +TEST(AtomicFenceCapabilitiesCheck, CheckDeviceAtomicFenceScopeCapabilities) { sycl::unittest::PiMock Mock; sycl::platform Plt = Mock.getPlatform(); device Dev = Plt.get_devices()[0]; @@ -106,4 +132,53 @@ TEST(AtomicFenceCapabilitiesCheck, CheckAtomicFenceScopeCapabilities) { sycl::memory_scope::system); EXPECT_FALSE(res == scope_capabilities.end()); } + +TEST(AtomicFenceCapabilitiesCheck, CheckContextAtomicFenceOrderCapabilities) { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + Mock.redefineAfter( + redefinedDeviceGetInfoAfter); + Mock.redefineAfter(redefinedDevicesGet); + auto devices = Plt.get_devices(); + context Ctx{devices}; + + deviceGetInfoCalled = false; + auto order_capabilities = + Ctx.get_info(); + EXPECT_TRUE(deviceGetInfoCalled); + size_t expectedSize = 2; + EXPECT_EQ(order_capabilities.size(), expectedSize); + + auto res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::relaxed); + EXPECT_FALSE(res == order_capabilities.end()); + res = std::find(order_capabilities.begin(), order_capabilities.end(), + sycl::memory_order::seq_cst); + EXPECT_FALSE(res == order_capabilities.end()); +} + +TEST(AtomicFenceCapabilitiesCheck, CheckContextAtomicFenceScopeCapabilities) { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + Mock.redefineAfter( + redefinedDeviceGetInfoAfter); + Mock.redefineAfter(redefinedDevicesGet); + auto devices = Plt.get_devices(); + context Ctx{devices}; + + deviceGetInfoCalled = false; + + auto scope_capabilities = + Ctx.get_info(); + EXPECT_TRUE(deviceGetInfoCalled); + size_t expectedSize = 2; + EXPECT_EQ(scope_capabilities.size(), expectedSize); + + auto res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::work_item); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::system); + EXPECT_FALSE(res == scope_capabilities.end()); +} } // anonymous namespace From 20f5e18194f91e3f87b795d2530ab9c9bc0c67b1 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 27 Mar 2023 06:57:54 -0700 Subject: [PATCH 34/37] Updates Windows ABI. Signed-off-by: Maronas, Marcos --- sycl/test/abi/sycl_symbols_windows.dump | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 04424816623ec..17408bc39e34c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -27,7 +27,9 @@ ??$get_info@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ ??$get_info@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic64@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ +??$get_info@Uatomic_fence_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_fence_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ +??$get_info@Uatomic_fence_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_fence_scope_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_memory_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_memory_order_capabilities@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ From bef2e363c3b9dcd2153ed5138494392d29ca8c86 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 27 Mar 2023 12:21:32 -0700 Subject: [PATCH 35/37] Returns minimum mandated capabilities for atomic capabilities in HIP. Signed-off-by: Maronas, Marcos --- sycl/plugins/hip/pi_hip.cpp | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 147723d6a3753..d41b7013e6c9a 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1863,6 +1863,28 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + // SYCL2020 4.6.4.2 minimum mandated capabilities for + // atomic_fence/memory_scope_capabilities. + // Because scopes are hierarchical, wider scopes support all narrower + // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) + pi_memory_scope_capabilities capabilities = PI_MEMORY_SCOPE_WORK_ITEM | + PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP; + return getInfo(param_value_size, param_value, param_value_size_ret, + capabilities); + } + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + // SYCL2020 4.6.4.2 minimum mandated capabilities for + // atomic_fence_order_capabilities. + pi_memory_order_capabilities capabilities = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; + return getInfo(param_value_size, param_value, param_value_size_ret, + capabilities); + } case PI_DEVICE_INFO_DEVICE_ID: { int value = 0; @@ -1889,9 +1911,6 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, } // TODO: Investigate if this information is available on HIP. - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: From cc6166e85759682d30db500db3ea280643234721 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 27 Mar 2023 13:53:10 -0700 Subject: [PATCH 36/37] Raise errors when querying context for atomic capabilities. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 8 +++--- sycl/include/sycl/info/context_traits.def | 8 +++--- sycl/plugins/cuda/pi_cuda.cpp | 30 +++++++---------------- sycl/plugins/hip/pi_hip.cpp | 11 ++++++++- sycl/plugins/level_zero/pi_level_zero.cpp | 18 ++++++++------ sycl/plugins/opencl/pi_opencl.cpp | 10 ++++++++ 6 files changed, 47 insertions(+), 38 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index dbed50acfcd09..e89ea947c28e5 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -351,10 +351,10 @@ typedef enum { PI_CONTEXT_INFO_PROPERTIES = 0x1082, PI_CONTEXT_INFO_REFERENCE_COUNT = 0x1080, // Atomics capabilities extensions - PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010, - PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011, - PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10012, - PI_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10013, + PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010, + PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011, + PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10012, + PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10013, // Native 2D USM memory operation support PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000, PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001, diff --git a/sycl/include/sycl/info/context_traits.def b/sycl/include/sycl/info/context_traits.def index e71f8ca712fbc..37594a1a6a985 100644 --- a/sycl/include/sycl/info/context_traits.def +++ b/sycl/include/sycl/info/context_traits.def @@ -1,7 +1,7 @@ __SYCL_PARAM_TRAITS_SPEC(context, reference_count, uint32_t, PI_CONTEXT_INFO_REFERENCE_COUNT) __SYCL_PARAM_TRAITS_SPEC(context, platform, sycl::platform, PI_CONTEXT_INFO_PLATFORM) __SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector, PI_CONTEXT_INFO_DEVICES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_order_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_scope_capabilities, std::vector, PI_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_order_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_scope_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 7be73a108fb8c..816bf94cd4f8c 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1003,27 +1003,15 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name, case PI_CONTEXT_INFO_REFERENCE_COUNT: return getInfo(param_value_size, param_value, param_value_size_ret, context->get_reference_count()); - case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - pi_memory_order_capabilities capabilities = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; - return getInfo(param_value_size, param_value, param_value_size_ret, - capabilities); - } - case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { - int major = 0; - sycl::detail::pi::assertion( - cuDeviceGetAttribute(&major, - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - context->get_device()->get()) == CUDA_SUCCESS); - pi_memory_order_capabilities capabilities = - (major >= 7) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | - PI_MEMORY_SCOPE_SYSTEM - : PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE; - return getInfo(param_value_size, param_value, param_value_size_ret, - capabilities); + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // These queries should be dealt with in context_impl.cpp by calling the + // queries of each device separately and building the intersection set. + setErrorMessage("These queries should have never come here.", + PI_ERROR_INVALID_ARG_VALUE); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: return getInfo(param_value_size, param_value, param_value_size_ret, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d41b7013e6c9a..5119e06aad9c7 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1007,7 +1007,16 @@ pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name, // 2D USM operations currently not supported. return getInfo(param_value_size, param_value, param_value_size_ret, false); - case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // These queries should be dealt with in context_impl.cpp by calling the + // queries of each device separately and building the intersection set. + setErrorMessage("These queries should have never come here.", + PI_ERROR_INVALID_ARG_VALUE); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + } default: __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index b33fe89c9a770..7dd215b86eb89 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2309,14 +2309,16 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: // 2D USM fill and memset is not supported. return ReturnValue(pi_bool{false}); - case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - pi_memory_order_capabilities capabilities = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; - return ReturnValue(capabilities); - } - case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // These queries should be dealt with in context_impl.cpp by calling the + // queries of each device separately and building the intersection set. + setErrorMessage("These queries should have never come here.", + UR_RESULT_ERROR_INVALID_VALUE); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + } default: // TODO: implement other parameters die("piGetContextInfo: unsuppported ParamName."); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 41d6d9261d967..61111af7f40c9 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1119,6 +1119,16 @@ pi_result piContextGetInfo(pi_context context, pi_context_info paramName, std::memcpy(paramValue, &result, sizeof(cl_bool)); return PI_SUCCESS; } + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // These queries should be dealt with in context_impl.cpp by calling the + // queries of each device separately and building the intersection set. + setErrorMessage("These queries should have never come here.", + PI_ERROR_INVALID_ARG_VALUE); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + } default: cl_int result = clGetContextInfo( cast(context), cast(paramName), From ffc9b921708d4a9f363d140e62db85de3196253b Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Tue, 28 Mar 2023 00:57:47 -0700 Subject: [PATCH 37/37] Fixes compilation error in HIP. Signed-off-by: Maronas, Marcos --- sycl/plugins/hip/pi_hip.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 5119e06aad9c7..110fa8452ade6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1872,8 +1872,8 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // SYCL2020 4.6.4.2 minimum mandated capabilities for // atomic_fence/memory_scope_capabilities. // Because scopes are hierarchical, wider scopes support all narrower @@ -1885,7 +1885,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // SYCL2020 4.6.4.2 minimum mandated capabilities for // atomic_fence_order_capabilities. pi_memory_order_capabilities capabilities =