Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,8 @@ result_t compute_kernel_csr_impl<Float>::operator()(const bk::context_gpu& ctx,
if (row_count != cur_row_count) {
auto cur_min = result_data_ptr[stat::min * column_count + col_idx];
auto cur_max = result_data_ptr[stat::max * column_count + col_idx];
#if __SYCL_COMPILER_VERSION >= 20240715
result_data_ptr[stat::min * column_count + col_idx] = Float(sycl::fmin(cur_min, 0));
result_data_ptr[stat::max * column_count + col_idx] = Float(sycl::fmax(cur_max, 0));
#else
result_data_ptr[stat::min * column_count + col_idx] = sycl::min<Float>(cur_min, 0);
result_data_ptr[stat::max * column_count + col_idx] = sycl::max<Float>(cur_max, 0);
#endif
cur_sum2_cent += Float(row_count - cur_row_count) * mean_val * mean_val;
}
result_data_ptr[stat::sum2_cent * column_count + col_idx] = cur_sum2_cent;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -696,11 +696,7 @@ inline void merge_blocks_kernel(sycl::nd_item<1> item,

if constexpr (!DefferedFin) {
Float mrgvariance = mrgsum2cent / (mrgvectors - Float(1));
#if __SYCL_COMPILER_VERSION >= 20240715
Float mrgstdev = (Float)sycl::sqrt(mrgvariance);
#else
Float mrgstdev = (Float)sqrt(mrgvariance);
#endif

if constexpr (check_mask_flag(bs_list::sorm, List)) {
rsorm_ptr[group_id] = mrgsum2 / mrgvectors;
Expand Down Expand Up @@ -827,7 +823,6 @@ compute_kernel_dense_impl<Float, List>::merge_blocks(local_buffer_list<Float, Li
const std::int64_t local_size = item.get_local_range()[0];
const std::int64_t id = item.get_local_id()[0];
const std::int64_t group_id = item.get_group().get_group_id(0);
#if __SYCL_COMPILER_VERSION >= 20230828
std::int64_t* lrc_ptr =
lrc_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
Float* lmin_ptr =
Expand All @@ -842,15 +837,6 @@ compute_kernel_dense_impl<Float, List>::merge_blocks(local_buffer_list<Float, Li
lsum2cent_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
Float* lmean_ptr =
lmean_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
#else
std::int64_t* lrc_ptr = lrc_buf.get_pointer().get();
Float* lmin_ptr = lmin_buf.get_pointer().get();
Float* lmax_ptr = lmax_buf.get_pointer().get();
Float* lsum_ptr = lsum_buf.get_pointer().get();
Float* lsum2_ptr = lsum2_buf.get_pointer().get();
Float* lsum2cent_ptr = lsum2cent_buf.get_pointer().get();
Float* lmean_ptr = lmean_buf.get_pointer().get();
#endif
if (distr_mode) {
merge_blocks_kernel<Float, List, deffered_fin_true>(item,
brc_ptr,
Expand Down
4 changes: 2 additions & 2 deletions cpp/oneapi/dal/algo/dbscan/backend/gpu/kernel_fp_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ sycl::event kernels_fp<Float>::fill_current_points_queue(
sycl::atomic_ref<int,
sycl::memory_order::relaxed,
sycl::memory_scope::device,
sycl::access::address_space::ext_intel_global_device_space>
sycl::access::address_space::global_space>
counter_atomic(queue_size_arr_ptr[0]);
auto cur_idx = counter_atomic.fetch_add(1);
for (std::int32_t col_idx = 0; col_idx < column_count; col_idx += 1) {
Expand Down Expand Up @@ -889,7 +889,7 @@ sycl::event kernels_fp<Float>::update_points_queue(sycl::queue& queue,
sycl::atomic_ref<std::int32_t,
sycl::memory_order::relaxed,
sycl::memory_scope::device,
sycl::access::address_space::ext_intel_global_device_space>
sycl::access::address_space::global_space>
counter_atomic(queue_size_arr_ptr[0]);
counter_atomic.fetch_add(1);
indices_cores_ptr[wg_id] = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -877,12 +877,8 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::compute_initial_his
const Index* node_tree_order_ptr = &tree_order_ptr[row_offset];

hist_type_t* local_buf_ptr = nullptr;
#if __SYCL_COMPILER_VERSION >= 20230828
local_buf_ptr =
local_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
#else
local_buf_ptr = local_buf.get_pointer().get();
#endif
if (use_private_mem_buf) {
compute_hist_for_node<Float, Index, true>(item,
ind_start,
Expand Down Expand Up @@ -957,12 +953,8 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::compute_initial_sum
const Index row_count = node_ptr[impl_const_t::ind_lrc];

const Index* node_tree_order_ptr = &tree_order_ptr[row_offset];
#if __SYCL_COMPILER_VERSION >= 20230828
Float* local_buf_ptr =
local_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
#else
Float* local_buf_ptr = local_buf.get_pointer().get();
#endif
Float sum = Float(0);
for (Index i = local_id; i < row_count; i += local_size) {
sum += response_ptr[node_tree_order_ptr[i]];
Expand Down Expand Up @@ -1036,12 +1028,8 @@ sycl::event train_kernel_hist_impl<Float, Bin, Index, Task>::compute_initial_sum
const Index* node_tree_order_ptr = &tree_order_ptr[row_offset];

const Float mean = sum_list_ptr[node_id] / global_row_count;
#if __SYCL_COMPILER_VERSION >= 20230828
Float* local_buf_ptr =
local_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
#else
Float* local_buf_ptr = local_buf.get_pointer().get();
#endif
Float sum2cent = Float(0);
for (Index i = local_id; i < row_count; i += local_size) {
sum2cent += (response_ptr[node_tree_order_ptr[i]] - mean) *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,10 @@ sycl::event train_splitter_impl<Float, Bin, Index, Task>::random_split(
split_info_t bs;

// slm pointers declaration
#if __SYCL_COMPILER_VERSION >= 20230828
hist_type_t* hist_ptr =
local_hist_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
Float* local_buf_float_ptr =
local_float_buf.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
#else
hist_type_t* hist_ptr = local_hist_buf.get_pointer().get();
Float* local_buf_float_ptr = local_float_buf.get_pointer().get();
#endif

bs.init_clear(hist_ptr + 0 * hist_prop_count, hist_prop_count);
split_scalar_t& bs_scal = bs.scalars;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ sycl::event count_clusters(sycl::queue& queue,
sycl::atomic_ref<std::int32_t,
sycl::memory_order::relaxed,
sycl::memory_scope::device,
sycl::access::address_space::ext_intel_global_device_space>
sycl::access::address_space::global_space>
counter_atomic(counter_ptr[cl]);
counter_atomic.fetch_add(1);
}
Expand Down
8 changes: 0 additions & 8 deletions cpp/oneapi/dal/algo/svm/backend/gpu/smo_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ sycl::event solve_smo(sycl::queue& q,

std::int32_t b_i = 0;
std::int32_t b_j = 0;
#if __SYCL_COMPILER_VERSION >= 20230828
Float* local_kernel_values_ptr =
local_kernel_values.template get_multi_ptr<sycl::access::decorated::yes>()
.get_raw();
Expand All @@ -178,13 +177,6 @@ sycl::event solve_smo(sycl::queue& q,
sg_cache_index.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
Float* local_vars_ptr =
local_vars.template get_multi_ptr<sycl::access::decorated::yes>().get_raw();
#else
Float* local_kernel_values_ptr = local_kernel_values.get_pointer().get();
Float* objective_func_ptr = objective_func.get_pointer().get();
Float* sg_cache_values_ptr = sg_cache_values.get_pointer().get();
std::int32_t* sg_cache_index_ptr = sg_cache_index.get_pointer().get();
Float* local_vars_ptr = local_vars.get_pointer().get();
#endif
local_kernel_values_ptr[i] = kernel_values_ptr[i * row_count + ws_index];
item.barrier(sycl::access::fence_space::local_space);

Expand Down
35 changes: 10 additions & 25 deletions cpp/oneapi/dal/backend/atomic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,17 @@ template <typename T,
sycl::memory_order mem_order = sycl::memory_order::relaxed,
sycl::memory_scope mem_scope = sycl::memory_scope::device>
inline T atomic_global_add(T* ptr, T operand) {
sycl::atomic_ref<T,
mem_order,
mem_scope,
sycl::access::address_space::ext_intel_global_device_space>
atomic_var(*ptr);
sycl::atomic_ref<T, mem_order, mem_scope, sycl::access::address_space::global_space> atomic_var(
*ptr);
return atomic_var.fetch_add(operand);
}

template <typename T,
sycl::memory_order mem_order = sycl::memory_order::relaxed,
sycl::memory_scope mem_scope = sycl::memory_scope::device>
inline T atomic_global_sum(T* ptr, T operand) {
sycl::atomic_ref<T,
mem_order,
mem_scope,
sycl::access::address_space::ext_intel_global_device_space>
atomic_var(*ptr);
sycl::atomic_ref<T, mem_order, mem_scope, sycl::access::address_space::global_space> atomic_var(
*ptr);
auto old = atomic_var.fetch_add(operand);
return old + operand;
}
Expand All @@ -53,23 +47,17 @@ template <typename T,
sycl::memory_order mem_order = sycl::memory_order::relaxed,
sycl::memory_scope mem_scope = sycl::memory_scope::device>
inline T atomic_global_min(T* ptr, T operand) {
sycl::atomic_ref<T,
mem_order,
mem_scope,
sycl::access::address_space::ext_intel_global_device_space>
atomic_var(*ptr);
sycl::atomic_ref<T, mem_order, mem_scope, sycl::access::address_space::global_space> atomic_var(
*ptr);
return atomic_var.fetch_min(operand);
}

template <typename T,
sycl::memory_order mem_order = sycl::memory_order::relaxed,
sycl::memory_scope mem_scope = sycl::memory_scope::device>
inline T atomic_global_max(T* ptr, T operand) {
sycl::atomic_ref<T,
mem_order,
mem_scope,
sycl::access::address_space::ext_intel_global_device_space>
atomic_var(*ptr);
sycl::atomic_ref<T, mem_order, mem_scope, sycl::access::address_space::global_space> atomic_var(
*ptr);
return atomic_var.fetch_max(operand);
}

Expand All @@ -78,11 +66,8 @@ template <typename T,
sycl::memory_scope mem_scope = sycl::memory_scope::device>
inline T atomic_global_cmpxchg(T* ptr, T expected, T desired) {
T expected_ = expected;
sycl::atomic_ref<T,
mem_order,
mem_scope,
sycl::access::address_space::ext_intel_global_device_space>
atomic_var(*ptr);
sycl::atomic_ref<T, mem_order, mem_scope, sycl::access::address_space::global_space> atomic_var(
*ptr);
atomic_var.compare_exchange_weak(expected_, desired, mem_order, mem_scope);
return expected_;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ sycl::event compute_hessian(sycl::queue& q,
sycl::atomic_ref<Float,
sycl::memory_order::relaxed,
sycl::memory_scope::device,
sycl::access::address_space::ext_intel_global_device_space>(out)
sycl::access::address_space::global_space>(out)
.fetch_add(val);
}
});
Expand Down
5 changes: 2 additions & 3 deletions cpp/oneapi/dal/backend/primitives/reduction/functors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,11 @@ struct min {
template <typename T>
struct logical_or {
using tag_t = reduce_binary_op_tag;
constexpr static inline T init_value = false;
constexpr static inline T init_value = T(false);
#ifdef ONEDAL_DATA_PARALLEL
constexpr static inline sycl::logical_or<T> native{};
#else
constexpr static inline std::logical_or<T> native{};
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confused how this file compiled with an extra } in here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it was under #else branch and was never built.

#endif
T operator()(const T& a, const T& b) const {
return native(a, b);
Expand Down Expand Up @@ -166,7 +165,7 @@ inline T atomic_binary_op(T* ptr, T val) {
sycl::atomic_ref<T,
sycl::memory_order::relaxed,
sycl::memory_scope::device,
sycl::access::address_space::ext_intel_global_device_space>
sycl::access::address_space::global_space>
atomic_ref(*ptr);
if constexpr (is_sum_op_v<BinaryOp>) {
return atomic_ref.fetch_add(val);
Expand Down
45 changes: 30 additions & 15 deletions cpp/oneapi/dal/backend/primitives/reduction/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace oneapi::dal::backend::primitives {

#ifdef ONEDAL_DATA_PARALLEL

template <typename Float, ndorder order, typename BinaryOp, typename UnaryOp>
template <typename Float, typename AccT, ndorder order, typename BinaryOp, typename UnaryOp>
sycl::event reduce_by_rows_impl(sycl::queue& q,
const ndview<Float, 2, order>& input,
ndview<Float, 1>& output,
Expand Down Expand Up @@ -64,10 +64,17 @@ inline sycl::event reduce_by_rows(sycl::queue& q,
static_assert(dal::detail::is_tag_one_of_v<UnaryOp, reduce_unary_op_tag>,
"UnaryOp must be a special unary operation defined "
"at the primitives level");
return reduce_by_rows_impl(q, input, output, binary, unary, deps, override_init);
using AccT = bin_op_t<BinaryOp>;
return reduce_by_rows_impl<Float, AccT, order, BinaryOp, UnaryOp>(q,
input,
output,
binary,
unary,
deps,
override_init);
}

template <typename Float, ndorder order, typename BinaryOp, typename UnaryOp>
template <typename Float, typename AccT, ndorder order, typename BinaryOp, typename UnaryOp>
sycl::event reduce_by_columns_impl(sycl::queue& q,
const ndview<Float, 2, order>& input,
ndview<Float, 1>& output,
Expand Down Expand Up @@ -105,10 +112,17 @@ inline sycl::event reduce_by_columns(sycl::queue& q,
static_assert(dal::detail::is_tag_one_of_v<UnaryOp, reduce_unary_op_tag>,
"UnaryOp must be a special unary operation defined "
"at the primitives level");
return reduce_by_columns_impl(q, input, output, binary, unary, deps, override_init);
using AccT = bin_op_t<BinaryOp>;
return reduce_by_columns_impl<Float, AccT, order, BinaryOp, UnaryOp>(q,
input,
output,
binary,
unary,
deps,
override_init);
}

template <typename Float, typename BinaryOp, typename UnaryOp>
template <typename Float, typename AccT, typename BinaryOp, typename UnaryOp>
sycl::event reduce_by_rows_impl(sycl::queue& q,
const ndview<Float, 1>& values,
const ndview<std::int64_t, 1>& column_indices,
Expand Down Expand Up @@ -151,16 +165,17 @@ inline sycl::event reduce_by_rows(sycl::queue& q,
static_assert(dal::detail::is_tag_one_of_v<UnaryOp, reduce_unary_op_tag>,
"UnaryOp must be a special unary operation defined "
"at the primitives level");
return reduce_by_rows_impl(q,
values,
column_indices,
row_offsets,
indexing,
output,
binary,
unary,
deps,
override_init);
using AccT = bin_op_t<BinaryOp>;
return reduce_by_rows_impl<Float, AccT, BinaryOp, UnaryOp>(q,
values,
column_indices,
row_offsets,
indexing,
output,
binary,
unary,
deps,
override_init);
}

#endif
Expand Down
Loading
Loading