Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
# to the root source directory of the project as ${MHM2_SOURCE_DIR} and to the
# root binary directory of the project as ${MHM2_BINARY_DIR}
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
cmake_policy(SET CMP0074 NEW)
project(MHM2)

message(STATUS "Building ${CMAKE_PROJECT_NAME} with CMake ${CMAKE_VERSION}")
Expand Down Expand Up @@ -84,23 +85,23 @@ if(NOT CMAKE_BUILD_TYPE)
endif()

include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
option(ENABLE_CUDA "Enable CUDA" ON)
if(ENABLE_CUDA)
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_CUDA_RESOLVE_DEVICE_SYMBOLS OFF) # only build device link objects
check_language(HIP)
if(CMAKE_HIP_COMPILER)
option(ENABLE_HIP "Enable HIP" ON)
if(ENABLE_HIP)
set(CMAKE_HIP_SEPARABLE_COMPILATION ON)
set(CMAKE_HIP_RESOLVE_DEVICE_SYMBOLS OFF) # only build device link objects
# for GPU targets
enable_language(CUDA)
enable_language(HIP)
endif()
else()
message(STATUS "No CUDA environment detected")
set(ENABLE_CUDA
message(STATUS "No HIP environment detected")
set(ENABLE_HIP
OFF
CACHE BOOL "Enable CUDA" FORCE)
CACHE BOOL "Enable HIP" FORCE)
endif()
if(ENABLE_CUDA)
message(STATUS "Building for GPU with CUDA")
if(ENABLE_HIP)
message(STATUS "Building for GPU with HIP")
else()
message(STATUS "Building for CPU only")
endif()
Expand Down Expand Up @@ -151,6 +152,13 @@ if(MHM2_VECTORS)
endforeach()
endif()

option(ENABLE_KOKKOS "Whether to use Kokkos" OFF)
if(ENABLE_KOKKOS)
message(STATUS "Building with Kokkos")
add_definitions(-DENABLE_KOKKOS)
find_package(Kokkos REQUIRED)
endif()

option(ENABLE_GASNET_STATS "Turn on gasnet stats recording" OFF)
message("Building ${CMAKE_BUILD_TYPE} version")
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
Expand Down
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ else
mkdir -p $rootdir/.build
cd $rootdir/.build
if [ "$1" == "Debug" ] || [ "$1" == "Release" ] || [ "$1" == "RelWithDebInfo" ]; then
rm -rf *
rm -rf $INSTALL_PATH/cmake
cmake $rootdir -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=$1 -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH \
# rm -rf *
# rm -rf $INSTALL_PATH/cmake
cmake $rootdir -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=$1 -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH -DCMAKE_CXX_COMPILER=mpicxx \
-DMHM2_ENABLE_TESTING=0 $MHM2_CMAKE_EXTRAS $2
#-DENABLE_CUDA=0
#-DENABLE_CUDA=0
fi
make -j ${MHM2_BUILD_THREADS} all install
#make VERBOSE=1 -j ${MHM2_BUILD_THREADS} all install
Expand Down
5 changes: 3 additions & 2 deletions cmake/Modules/CheckSubmodules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
message(FATAL_ERROR "git submodule failed with ${GIT_SUBMOD_RESULT}, please checkout submodules")
endif()

string(REPLACE "\n" ";" SUBMOD_LIST ${GIT_SUBMOD_OUTPUT})
if(NOT GIT_SUBMOD_OUTPUT STREQUAL "")
string(REPLACE "\n" ";" SUBMOD_LIST ${GIT_SUBMOD_OUTPUT})
endif()
set(UPDATE_SUBMODULES "")
foreach(tmp ${EXPECTED_SUBMODULES})
set(IS_SUB_OK FALSE)
Expand Down
4 changes: 2 additions & 2 deletions install/include/upcxx_utils/flat_aggr_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ class FlatAggrStore {
do {
rpc_counts->progress_timer.progress(); // call progress after firing a rpc
fut = limit_outstanding_futures(fut);
} while (!fut.ready());
} while (!fut.is_ready());
}

CountType max_vals[2], sum_vals[2];
Expand All @@ -478,7 +478,7 @@ class FlatAggrStore {

DBG("flush_updates() waiting for counts\n");
auto fut_done = flush_outstanding_futures_async();
while (!fut_done.ready()) {
while (!fut_done.is_ready()) {
rpc_counts->progress_timer.discharge();
}

Expand Down
10 changes: 5 additions & 5 deletions install/include/upcxx_utils/gather.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ upcxx::future<> binomial_gather(const T* send_buf, size_t send_count, T* dest_bu
if (is_sending) {
DBG_VERBOSE("is sending level=", level, "\n");
assert(!is_receiving && "sending is not also receiving");
assert(!workflow.prom_buffer_filled.get_future().ready() && "sending buffer has not been filled before workflow is prepared");
assert(!workflow.prom_buffer_filled.get_future().is_ready() && "sending buffer has not been filled before workflow is prepared");
if (!have_received) {
assert(send_buf);
assert(send_count != 0);
Expand Down Expand Up @@ -280,11 +280,11 @@ upcxx::future<> binomial_gather(const T* send_buf, size_t send_count, T* dest_bu
DBG_VERBOSE("Sent rpc to dest_rrank=", dest_rrank, ", dest_rank=", dest_rank, " rrank=", rrank, " level=", level,
" count=", sending_size, " ", get_size_str(sending_size * sizeof(T)), "\n");
// make buffer available for next level (may never be needed but steps workflow to completion)
assert(!workflow.prom_buffer_filled.get_future().ready() && "sending buffer that was just used has not been filled yet");
assert(!workflow.prom_buffer_filled.get_future().is_ready() && "sending buffer that was just used has not been filled yet");
workflow.prom_buffer_filled.fulfill_anonymous(1);
});
if (!have_received)
assert(fut.ready() &&
assert(fut.is_ready() &&
"first sending level always immediately executes ensuring send_buf is ready for reuse on exit of binomial_gather");
have_sent = true;
// not done until I have sent my message
Expand All @@ -295,7 +295,7 @@ upcxx::future<> binomial_gather(const T* send_buf, size_t send_count, T* dest_bu
if (!is_sending & !is_receiving) {
// some ranks on some levels have nothing to do but make the buffer available
DBG_VERBOSE("idle level=", level, "\n");
assert(!workflow.prom_buffer_filled.get_future().ready());
assert(!workflow.prom_buffer_filled.get_future().is_ready());
workflow.prom_buffer_filled.fulfill_anonymous(1);
if (!have_received && !have_sent) workflow.prom_buffer.fulfill_result(ShBuffer());
}
Expand All @@ -309,7 +309,7 @@ upcxx::future<> binomial_gather(const T* send_buf, size_t send_count, T* dest_bu
LevelWorkflow& next = dist_workflows->level(level + 1);
DBG_VERBOSE("Setting buffer for next level: ", level + 1, " size=", sh_buf->size(),
" cap=", sh_buf->capacity(), "\n");
if (!next.prom_buffer.get_future().ready()) {
if (!next.prom_buffer.get_future().is_ready()) {
// fulfill buffer for next level on this rank for it to send or recv
next.prom_buffer.fulfill_result(std::move(sh_buf));
} else {
Expand Down
16 changes: 8 additions & 8 deletions install/include/upcxx_utils/reduce_prefix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,15 @@ upcxx::future<> reduce_prefix_binary_tree_up(ShDistData<T, BinaryOp> sh_dist_dat
// scratch has partial_to_parent from right (j+1 ... rr) if there is a right
// if there is a left child, dst already has applied from left (ll ... j)
// calculate partial_to_parent as (ll ... rr) in scratch.
assert(proms.scratch_is_partial_right.get_future().ready());
assert(proms.scratch_is_partial_right.get_future().is_ready());

if (my_node.right < my_node.n) {
assert(!sh_scratch->empty());
}
T *partial_right = sh_scratch->data();
T *partial_left_right = sh_scratch->data();

assert(proms.dst_is_partial_left_me.get_future().ready());
assert(proms.dst_is_partial_left_me.get_future().is_ready());
const T *partial_left_me = my_node.left < my_node.me ? dst : src;
const T *send_to_parent = partial_left_me;

Expand Down Expand Up @@ -512,11 +512,11 @@ upcxx::future<> reduce_prefix_binary_tree_down(ShDistData<T, BinaryOp> sh_dist_d
}

// check that upstage is completed
assert(proms.ready_for_up.get_future().ready());
assert(proms.dst_is_partial_left_me.get_future().ready());
assert(proms.scratch_is_partial_right.get_future().ready());
assert(proms.scratch_is_partial_to_parent.get_future().ready());
assert(proms.sent_partial_to_parent.get_future().ready());
assert(proms.ready_for_up.get_future().is_ready());
assert(proms.dst_is_partial_left_me.get_future().is_ready());
assert(proms.scratch_is_partial_right.get_future().is_ready());
assert(proms.scratch_is_partial_to_parent.get_future().is_ready());
assert(proms.sent_partial_to_parent.get_future().is_ready());

// step 4 down
// receive from parent
Expand Down Expand Up @@ -555,7 +555,7 @@ upcxx::future<> reduce_prefix_binary_tree_down(ShDistData<T, BinaryOp> sh_dist_d
rpcs_sent = rpcs_sent.then(
[sh_dist_data, dst = dst, count = count, sh_scratch = sh_scratch, child, my_node = my_node, &proms, &team]() {
assert(proms.up_ready());
assert(proms.scratch_is_partial_from_parent.get_future().ready());
assert(proms.scratch_is_partial_from_parent.get_future().is_ready());
const T *send_data;
if (child < my_node.me) {
// relay just a copy from my parent (0 ... ll-1)
Expand Down
2 changes: 1 addition & 1 deletion install/include/upcxx_utils/thread_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class ThreadPool {
template <typename Func, class... Args>
static upcxx::future<> &enqueue_in_single_pool_serially(upcxx::future<> &serial_fut, Func &&func, Args &&... args) {
assert(upcxx::master_persona().active_with_caller() && "Called from master persona");
DBG_VERBOSE("enqueue_in_single_pool_serially: ", &serial_fut, " ", (serial_fut.ready() ? "ready" : "NOT READY"), "\n");
DBG_VERBOSE("enqueue_in_single_pool_serially: ", &serial_fut, " ", (serial_fut.is_ready() ? "ready" : "NOT READY"), "\n");

using return_t = typename std::invoke_result<Func, Args...>::type;
static_assert(std::is_void<return_t>::value, "void is the required return type for enqueue_in_serial_pool");
Expand Down
4 changes: 2 additions & 2 deletions install/include/upcxx_utils/three_tier_aggr_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1080,11 +1080,11 @@ class ThreeTierAggrStore : public FlatAggrStore<T, Data...> {
do {
tt_rpc_counts->progress_timer.progress(); // call progress after firing a rpc
fut = limit_outstanding_futures(fut);
} while (!fut.ready());
} while (!fut.is_ready());
}
}
auto fut_done = flush_outstanding_futures_async();
while (!fut_done.ready()) {
while (!fut_done.is_ready()) {
tt_rpc_counts->progress_timer.discharge();
}

Expand Down
10 changes: 5 additions & 5 deletions install/include/upcxx_utils/two_tier_aggr_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ class FixedMemoryRPC {
dest_stores.reserve(num_stores);
for (int i = 0; i < num_stores; i++) {
auto fut = global_dispatcher.pop();
if (!fut.ready()) {
if (!fut.is_ready()) {
DIE("Detected a global block that is not ready! i=", i, " available_size=", global_dispatcher.available_size(), "\n");
}
dest_stores.push_back(fut.wait());
Expand Down Expand Up @@ -1121,7 +1121,7 @@ class TwoTierAggrStore {
assert(store_block.first);
assert(store_block.first.where() == rank_me());
auto lblock_fut = inter_fixed_mem->pop_global(true);
assert(lblock_fut.ready());
assert(lblock_fut.is_ready());
auto lblock = lblock_fut.result();
assert(lblock.first);
assert(lblock.first.where() == rank_me());
Expand Down Expand Up @@ -1320,7 +1320,7 @@ class TwoTierAggrStore {
} else {
assert(!gblock.first);
auto fut = replace_intra_store(gblock, intra_fixed_mem);
if (!fut.ready()) DBG(__func__, " will wait\n");
if (!fut.is_ready()) DBG(__func__, " will wait\n");
fut.wait();
}
assert(gblock.first);
Expand Down Expand Up @@ -1403,7 +1403,7 @@ class TwoTierAggrStore {
assert(gblock.second > 0);
auto fut = update_remote_inter_nb(target_rank, gblock);
DBG(__func__, " my_progress\n");
if (!fut.ready()) {
if (!fut.is_ready()) {
DBG(__func__, " still waiting on inter dest store\n");
}
fut.wait();
Expand All @@ -1420,7 +1420,7 @@ class TwoTierAggrStore {
gblock = {}; // invalidate it
auto fut = replace_inter_store(gblock, inter_fixed_memory_store);
send_inter_rpc(split_rank::get_rank_from_node(node), sendBlock); // send to dedicated rank on remote node
if (!fut.ready()) DBG("intra dest store is not immediately ready\n");
if (!fut.is_ready()) DBG("intra dest store is not immediately ready\n");
return fut;
}

Expand Down
Loading