Skip to content

Commit 8137ba6

Browse files
author
Thomas Grützmacher
authored
Merge adaptation to the size_type change in the Accessor
The Accessor inside Ginkgo was recently changed to use std::int64_t instead of std::size_t as the size_type. This PR adopts this change to this repository as well.
2 parents 1db0470 + f3b2631 commit 8137ba6

10 files changed

+73
-68
lines changed

cuda/dot_benchmark.cu

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ int main(int argc, char **argv)
2121
{
2222
using ar_type = double;
2323
using st_type = float;
24+
using size_type = matrix_info::size_type;
2425

25-
constexpr std::size_t min_size{1'000'000};
26-
constexpr std::size_t default_max_size{535 * 1000 * 1000};
26+
constexpr size_type min_size{1'000'000};
27+
constexpr size_type default_max_size{535 * 1000 * 1000};
2728
constexpr char DELIM{';'};
2829

2930
bool detailed_error{false};
30-
std::size_t max_size{default_max_size};
31+
size_type max_size{default_max_size};
3132

3233
const std::string use_error_string("--error");
3334
const std::string set_size_string("--size");
@@ -84,7 +85,7 @@ int main(int argc, char **argv)
8485
return static_cast<ar_type>(st_data.get_result());
8586
};
8687

87-
constexpr std::size_t benchmark_reference{0};
88+
constexpr size_type benchmark_reference{0};
8889
using benchmark_info_t =
8990
std::tuple<std::string, std::function<void(matrix_info, matrix_info)>,
9091
std::function<ar_type()>>;
@@ -139,7 +140,7 @@ int main(int argc, char **argv)
139140
st_data.gpu_y(), st_data.gpu_res());
140141
},
141142
st_get_result}};
142-
const std::size_t benchmark_num{benchmark_info.size()};
143+
const size_type benchmark_num{static_cast<size_type>(benchmark_info.size())};
143144

144145

145146
std::cout << "Vector Size";
@@ -163,32 +164,32 @@ int main(int argc, char **argv)
163164
};
164165

165166
// Number of elements of a vector at the start of the benchmark
166-
const std::size_t start = std::min(max_size, min_size);
167+
const size_type start = std::min(max_size, min_size);
167168
// Increase in number of elements between consecutive benchmark runs
168-
constexpr std::size_t row_incr = 2'000'000;
169+
constexpr size_type row_incr = 2'000'000;
169170
// Number of benchmark runs (ignoring randomization)
170-
const std::size_t steps =
171+
const size_type steps =
171172
(max_size < start) ? 0 : (max_size - start) / row_incr;
172173
// Number of benchmark restarts with a different randomization for vectors
173174
// Only used for a detailed error run
174-
constexpr std::size_t max_randomize_num{10};
175+
constexpr size_type max_randomize_num{10};
175176

176-
std::vector<std::size_t> benchmark_vec_size((steps + 1));
177+
std::vector<size_type> benchmark_vec_size((steps + 1));
177178
std::vector<double> benchmark_time((steps + 1) * benchmark_num);
178179
// std::vector<ar_type> benchmark_error((steps + 1) * benchmark_num);
179180
// stores the result for all different benchmark runs to compute the error
180181
const auto actual_randomize_num = detailed_error ? max_randomize_num : 1;
181182
std::vector<ar_type> raw_result(actual_randomize_num * (steps + 1) *
182183
benchmark_num);
183184
const auto get_raw_idx = [benchmark_num, actual_randomize_num](
184-
std::size_t rnd, std::size_t step,
185-
std::size_t bi) {
185+
size_type rnd, size_type step,
186+
size_type bi) {
186187
return step * actual_randomize_num * benchmark_num +
187188
bi * actual_randomize_num + rnd;
188189
};
189190

190191
// Run all benchmarks and collect the raw data here
191-
for (std::size_t randomize = 0; randomize < actual_randomize_num;
192+
for (size_type randomize = 0; randomize < actual_randomize_num;
192193
++randomize) {
193194
if (randomize != 0) {
194195
write_random({{max_size, 1}}, vector_dist, rengine,
@@ -198,14 +199,14 @@ int main(int argc, char **argv)
198199
ar_data.copy_cpu_to_gpu();
199200
st_data.convert_from(ar_data);
200201
}
201-
for (std::size_t vec_size = start, i = 0; vec_size <= max_size;
202+
for (size_type vec_size = start, i = 0; vec_size <= max_size;
202203
vec_size += row_incr, ++i) {
203204
benchmark_vec_size.at(i) = vec_size;
204205
const matrix_info x_info{{vec_size, 1}};
205206
const matrix_info y_info{{vec_size, 1}};
206207

207-
for (std::size_t bi = 0; bi < benchmark_num; ++bi) {
208-
const std::size_t idx = i * benchmark_num + bi;
208+
for (size_type bi = 0; bi < benchmark_num; ++bi) {
209+
const size_type idx = i * benchmark_num + bi;
209210
auto curr_lambda = [&]() {
210211
std::get<1>(benchmark_info[bi])(x_info, y_info);
211212
};
@@ -218,26 +219,26 @@ int main(int argc, char **argv)
218219
}
219220

220221
// Print the evaluated results
221-
for (std::size_t i = 0; i <= steps; ++i) {
222+
for (size_type i = 0; i <= steps; ++i) {
222223
if (!detailed_error) {
223224
std::cout << benchmark_vec_size[i];
224-
for (std::size_t bi = 0; bi < benchmark_num; ++bi) {
225+
for (size_type bi = 0; bi < benchmark_num; ++bi) {
225226
std::cout << DELIM << benchmark_time[i * benchmark_num + bi];
226227
}
227228
const auto result_ref =
228229
raw_result[get_raw_idx(0, i, benchmark_reference)];
229-
for (std::size_t bi = 0; bi < benchmark_num; ++bi) {
230+
for (size_type bi = 0; bi < benchmark_num; ++bi) {
230231
std::cout << DELIM
231232
<< get_error(raw_result[i * benchmark_num + bi],
232233
result_ref);
233234
}
234235
std::cout << '\n';
235236
} else {
236237
std::cout << benchmark_vec_size[i];
237-
for (std::size_t bi = 0; bi < benchmark_num; ++bi) {
238+
for (size_type bi = 0; bi < benchmark_num; ++bi) {
238239
// sort and compute the median
239240
std::array<ar_type, max_randomize_num> local_error;
240-
for (std::size_t rnd = 0; rnd < actual_randomize_num; ++rnd) {
241+
for (size_type rnd = 0; rnd < actual_randomize_num; ++rnd) {
241242
const auto result_ref =
242243
raw_result[get_raw_idx(rnd, i, benchmark_reference)];
243244
local_error[rnd] = get_error(
@@ -270,11 +271,11 @@ int main(int argc, char **argv)
270271
std::cout << DELIM << "Result " << std::get<0>(info);
271272
}
272273
std::cout << '\n';
273-
for (std::size_t i = 0; i <= steps; ++i) {
274-
for (std::size_t randomize = 0; randomize < actual_randomize_num;
274+
for (size_type i = 0; i <= steps; ++i) {
275+
for (size_type randomize = 0; randomize < actual_randomize_num;
275276
++randomize) {
276277
std::cout << randomize << DELIM << benchmark_vec_size[i];
277-
for (std::size_t bi = 0; bi < benchmark_num; ++bi) {
278+
for (size_type bi = 0; bi < benchmark_num; ++bi) {
278279
std::cout << DELIM << raw_result[get_raw_idx(randomize, i, bi)];
279280
}
280281
std::cout << '\n';

cuda/dot_kernels.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,8 @@ void acc_dot(myBlasHandle *handle, const matrix_info x_info, const StType *x,
232232

233233
// Accessor Setup
234234
constexpr std::size_t dimensionality{2};
235-
std::array<std::size_t, dimensionality - 1> x_stride{x_info.stride};
236-
std::array<std::size_t, dimensionality - 1> y_stride{y_info.stride};
235+
std::array<gko::acc::size_type, dimensionality - 1> x_stride{x_info.stride};
236+
std::array<gko::acc::size_type, dimensionality - 1> y_stride{y_info.stride};
237237

238238
using accessor =
239239
gko::acc::reduced_row_major<dimensionality, ArType, StType>;

cuda/dot_memory.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public:
3434
* @param engine random engine used to generate the values
3535
*/
3636
template <typename VectDist, typename RndEngine>
37-
DotMemory(std::size_t size, VectDist &&vect_dist, RndEngine &&engine)
37+
DotMemory(matrix_info::size_type size, VectDist &&vect_dist, RndEngine &&engine)
3838
: x_info_{{size, 1}},
3939
y_info_{{size, 1}},
4040
cpu_x_(gen_mtx<ValueType>(x_info_, vect_dist, engine)),

cuda/gemv_benchmark.cu

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,16 @@ int main(int argc, char **argv)
2020
{
2121
using ar_type = double;
2222
using st_type = float;
23+
using size_type = matrix_info::size_type;
2324

2425
constexpr ar_type ar_alpha{1.0};
2526
constexpr ar_type ar_beta{1.0};
2627
constexpr st_type st_alpha{static_cast<st_type>(ar_alpha)};
2728
constexpr st_type st_beta{static_cast<st_type>(ar_beta)};
2829

29-
constexpr std::size_t default_max_size{24500};
30-
constexpr std::size_t min_size{100};
31-
std::size_t max_size{default_max_size};
30+
constexpr size_type default_max_size{24500};
31+
constexpr size_type min_size{100};
32+
size_type max_size{default_max_size};
3233

3334
bool measure_error{false};
3435

@@ -122,7 +123,7 @@ int main(int argc, char **argv)
122123
return error / res_ref_norm;
123124
};
124125

125-
constexpr std::size_t benchmark_reference{0};
126+
constexpr size_type benchmark_reference{0};
126127
using benchmark_info_t =
127128
std::tuple<std::string,
128129
std::function<void(matrix_info, matrix_info, matrix_info)>,
@@ -190,7 +191,7 @@ int main(int argc, char **argv)
190191
},
191192
st_compute_error},
192193
};
193-
const std::size_t benchmark_num{benchmark_info.size()};
194+
const size_type benchmark_num{static_cast<size_type>(benchmark_info.size())};
194195

195196
std::cout << "Num rows";
196197
for (const auto &info : benchmark_info) {
@@ -209,7 +210,7 @@ int main(int argc, char **argv)
209210
std::vector<ar_type> local_res(benchmark_num);
210211
constexpr auto start = min_size;
211212
constexpr auto row_incr = start;
212-
for (std::size_t num_rows = start; num_rows <= max_size;
213+
for (size_type num_rows = start; num_rows <= max_size;
213214
num_rows += row_incr) {
214215
const matrix_info m_info{{num_rows, num_rows}, max_size};
215216
const matrix_info x_info{{num_rows, 1}};
@@ -229,7 +230,7 @@ int main(int argc, char **argv)
229230
ar_data.gpu_res_memory().copy_from(ar_cpu_res_init);
230231
}
231232
}
232-
for (std::size_t i = 0; i < benchmark_num; ++i) {
233+
for (size_type i = 0; i < benchmark_num; ++i) {
233234
auto local_func = [&]() {
234235
std::get<1>(benchmark_info[i])(m_info, x_info, res_info);
235236
};

cuda/gemv_kernels.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ void acc_gemv(const matrix_info m_info, ArType alpha, const StType *mtx,
176176

177177
// Accessor Setup
178178
constexpr std::size_t dimensionality{2};
179-
std::array<std::size_t, dimensionality - 1> m_stride{m_info.stride};
180-
std::array<std::size_t, dimensionality - 1> x_stride{x_info.stride};
181-
std::array<std::size_t, dimensionality - 1> res_stride{res_info.stride};
179+
std::array<gko::acc::size_type, dimensionality - 1> m_stride{m_info.stride};
180+
std::array<gko::acc::size_type, dimensionality - 1> x_stride{x_info.stride};
181+
std::array<gko::acc::size_type, dimensionality - 1> res_stride{res_info.stride};
182182

183183
using accessor =
184184
gko::acc::reduced_row_major<dimensionality, ArType, StType>;

cuda/gemv_memory.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public:
3737
* @param engine random engine used to generate the values
3838
*/
3939
template <typename MtxDist, typename VectDist, typename RndEngine>
40-
GemvMemory(std::size_t max_size, MtxDist &&mtx_dist,
40+
GemvMemory(matrix_info::size_type max_size, MtxDist &&mtx_dist,
4141
VectDist &&vect_dist, RndEngine &&engine)
4242
: m_info_{{max_size, max_size}},
4343
x_info_{{max_size, 1}},

cuda/trsv_benchmark.cu

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ int main(int argc, char **argv)
2121
{
2222
using ar_type = double;
2323
using st_type = float;
24+
using size_type = matrix_info::size_type;
2425

2526
constexpr tmtx_t t_matrix_type = tmtx_t::upper;
2627
constexpr dmtx_t d_matrix_type = dmtx_t::unit;
2728

28-
constexpr std::size_t default_max_size{24 * 1000};
29-
constexpr std::size_t min_size{100};
29+
constexpr size_type default_max_size{24 * 1000};
30+
constexpr size_type min_size{100};
3031

31-
std::size_t max_size{default_max_size};
32+
auto max_size{default_max_size};
3233
bool measure_error{false};
3334

3435
const std::string use_error_string("--error");
@@ -196,8 +197,8 @@ int main(int argc, char **argv)
196197

197198
std::vector<ar_type> local_res(benchmark_num);
198199

199-
const std::size_t start = std::min(max_size, min_size);
200-
const std::size_t row_incr = start;
200+
const auto start = std::min(max_size, min_size);
201+
const auto row_incr = start;
201202

202203
for (auto num_rows = start; num_rows <= max_size; num_rows += row_incr) {
203204
const matrix_info m_info{{num_rows, num_rows}, max_size};

cuda/trsv_kernels.cuh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ __global__ __launch_bounds__(swarps_per_block *swarp_size) void lower_trsv(
8686

8787
// stores the trianglular system in column major
8888
__shared__ ValueType triang[swarp_size * triang_stride];
89-
__shared__ std::uint32_t shared_row_block_idx;
89+
__shared__ std::int32_t shared_row_block_idx;
9090
__shared__ ValueType x_correction[swarp_size];
9191

9292
const auto group = cg::this_thread_block();
@@ -280,7 +280,7 @@ __global__ __launch_bounds__(swarps_per_block *swarp_size) void upper_trsv(
280280

281281
// stores the trianglular system in column major
282282
__shared__ ValueType triang[swarp_size * triang_stride];
283-
__shared__ std::uint32_t shared_row_block_idx;
283+
__shared__ std::int32_t shared_row_block_idx;
284284
__shared__ ValueType x_correction[swarp_size];
285285

286286
const auto group = cg::this_thread_block();
@@ -461,7 +461,7 @@ void trsv(const matrix_info m_info, tmtx_t ttype, dmtx_t dtype,
461461
constexpr std::int32_t swarps_per_block{4};
462462
const dim3 block_solve(subwarp_size, swarps_per_block, 1);
463463
const dim3 grid_solve(
464-
ceildiv(m_info.size[0], static_cast<std::size_t>(subwarp_size)), 1, 1);
464+
ceildiv(m_info.size[0], static_cast<std::int64_t>(subwarp_size)), 1, 1);
465465

466466
kernel::trsv_init<<<1, 1>>>(trsv_helper);
467467
if (dtype == dmtx_t::unit) {
@@ -544,7 +544,7 @@ __global__ __launch_bounds__(swarps_per_block *swarp_size) void acc_lower_trsv(
544544

545545
// stores the trianglular system in column major
546546
__shared__ ar_type triang[swarp_size * triang_stride];
547-
__shared__ std::uint32_t shared_row_block_idx;
547+
__shared__ std::int32_t shared_row_block_idx;
548548
__shared__ ar_type x_correction[swarp_size];
549549

550550
const auto group = cg::this_thread_block();
@@ -742,7 +742,7 @@ __global__ __launch_bounds__(swarps_per_block *swarp_size) void acc_upper_trsv(
742742

743743
// stores the trianglular system in column major
744744
__shared__ ar_type triang[swarp_size * triang_stride];
745-
__shared__ std::uint32_t shared_row_block_idx;
745+
__shared__ std::int32_t shared_row_block_idx;
746746
__shared__ ar_type x_correction[swarp_size];
747747

748748
const auto group = cg::this_thread_block();
@@ -922,8 +922,8 @@ void acc_trsv(const matrix_info m_info, tmtx_t ttype, dmtx_t dtype,
922922
{
923923
// Accessor Setup
924924
constexpr std::size_t dimensionality{2};
925-
std::array<std::size_t, dimensionality - 1> m_stride{m_info.stride};
926-
std::array<std::size_t, dimensionality - 1> x_stride{x_info.stride};
925+
std::array<gko::acc::size_type, dimensionality - 1> m_stride{m_info.stride};
926+
std::array<gko::acc::size_type, dimensionality - 1> x_stride{x_info.stride};
927927

928928
using accessor =
929929
gko::acc::reduced_row_major<dimensionality, ArType, StType>;
@@ -936,7 +936,7 @@ void acc_trsv(const matrix_info m_info, tmtx_t ttype, dmtx_t dtype,
936936
constexpr std::int32_t swarps_per_block{4};
937937
const dim3 block_solve(subwarp_size, swarps_per_block, 1);
938938
const dim3 grid_solve(
939-
ceildiv(m_info.size[0], static_cast<std::size_t>(subwarp_size)), 1, 1);
939+
ceildiv(m_info.size[0], static_cast<std::int64_t>(subwarp_size)), 1, 1);
940940

941941
kernel::trsv_init<<<1, 1>>>(trsv_helper);
942942
if (dtype == dmtx_t::unit) {

cuda/trsv_memory.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ public:
108108
* initializing)
109109
*/
110110
template <typename MtxGen, typename VectGen>
111-
TrsvMemory(std::size_t max_size, MtxGen &&cpu_mtx_gen,
111+
TrsvMemory(matrix_info::size_type max_size, MtxGen &&cpu_mtx_gen,
112112
VectGen &&cpu_vect_gen)
113113
: m_info_{{max_size, max_size}},
114114
x_info_{{max_size, 1}},
@@ -139,7 +139,7 @@ public:
139139
const auto pivot_size = std::max(m_info_.size[0], m_info_.size[1]);
140140
Memory<int> cpu_pivot(Memory<int>::Device::cpu, pivot_size);
141141
Memory<int> gpu_pivot(Memory<int>::Device::gpu, pivot_size);
142-
for (std::size_t i = 0; i < pivot_size; ++i) {
142+
for (matrix_info::size_type i = 0; i < pivot_size; ++i) {
143143
cpu_pivot.data()[i] = i;
144144
}
145145
gpu_pivot = cpu_pivot;

0 commit comments

Comments
 (0)