Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cudax/include/cuda/experimental/__stf/places/places.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,7 @@ interpreted_execution_policy<spec...>::interpreted_execution_policy(
{
size_t l0_size = p.get_width(0);
size_t l1_size = p.get_width(1);
size_t l2_size = p.get_width(1);
size_t l2_size = p.get_width(2);
bool l0_sync = thread_hierarchy_spec<spec...>::template is_synchronizable<0>;
bool l1_sync = thread_hierarchy_spec<spec...>::template is_synchronizable<1>;
bool l2_sync = thread_hierarchy_spec<spec...>::template is_synchronizable<2>;
Expand Down
1 change: 1 addition & 0 deletions cudax/test/stf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ set(stf_test_sources
places/non_current_device.cu
places/place_partition.cu
places/recursion.cu
places/execution_policy_kernel_launch_test.cu
reclaiming/graph.cu
reclaiming/graph_2.cu
reclaiming/graph_real_oom.cu
Expand Down
56 changes: 56 additions & 0 deletions cudax/test/stf/places/execution_policy_kernel_launch_test.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDASTF in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

//! file
//! !brief Check that multi-level launch specification are fulfilled

#include <cuda/experimental/stf.cuh>

#include <cassert>
#include <iostream>

using namespace cuda::experimental::stf;

int main()
{
stream_ctx ctx;

// Create a 3-level thread hierarchy specification that would expose the bug:
// Level 0: only 1 device to run on CI
// Level 1: 4 blocks per device (width 4)
// Level 2: 64 threads per block (width 64)
//
auto spec = par(hw_scope::device, 1, con<4>(hw_scope::block, con<64>(hw_scope::thread)));

int test_result = 0;
auto l_test_result = ctx.logical_data(make_slice(&test_result, 1));

ctx.launch(spec, exec_place::current_device(), l_test_result.rw())->*[] __device__(auto th, auto result) {
if (th.rank() == 0)
{
bool level0_correct = (th.size(0) == 1); // device level
bool level1_correct = (th.size(1) == 1 * 4) && (gridDim.x == 4); // blocks per device
bool level2_correct = (th.size(2) == 1 * 4 * 64) && (blockDim.x == 64); // threads per block

// Set test result based on whether all levels are correct
result[0] = level0_correct && level1_correct && level2_correct ? 1 : 0;
}
};

ctx.finalize();

if (test_result != 1)
{
fprintf(stderr, "FAIL: Hierarchy dimensions are incorrect!\n");
return 1;
}

return 0;
}