@@ -171,13 +171,11 @@ extern "C"
171
171
void counters_initialise_ ()
172
172
{
173
173
using namespace counters ;
174
- if ( getenv ( " CUDACPP_RUNTIME_DISABLECALLTIMERS" ) ) disablecalltimers = true ;
175
- if ( getenv ( " CUDACPP_RUNTIME_DISABLETESTTIMERS" ) ) disabletesttimers = true ;
176
174
#ifdef MGONGPU_HASRDTSC
177
175
if ( getenv ( " CUDACPP_RUNTIME_USECHRONOTIMERS" ) ) usechronotimers = true ;
178
176
#endif
179
177
if ( getenv ( " CUDACPP_RUNTIME_REMOVECOUNTEROVERHEAD" ) ) removetimeroverhead = true ;
180
- for ( int icounter = 0 ; icounter < NCOUNTERSMAX + 3 ; icounter++ )
178
+ for ( int icounter = 0 ; icounter < NCOUNTERSMAX + 4 ; icounter++ ) // include icalibcounter = NCOUNTERSMAX+3
181
179
{
182
180
array_tags[icounter] = " " ; // ensure that this is initialized to ""
183
181
array_istesttimer[icounter] = false ; // ensure that this is initialized to false
@@ -193,7 +191,7 @@ extern "C"
193
191
counters_register_counter_ ( &icalibcounter, " OVERHEAD CALIBRATION" );
194
192
mgOnGpu::ChronoTimer<std::chrono::high_resolution_clock> calibtimer;
195
193
calibtimer.start ();
196
- constexpr size_t ncall = 1000000 ;
194
+ constexpr size_t ncall = 10000000 ; // 10M calls are expected to take slightly less than ~1s (this will be in counter overhead)
197
195
for ( size_t icall = 0 ; icall < ncall; icall++ )
198
196
{
199
197
counters_start_counter_ ( &icalibcounter, &nevtdummy );
@@ -202,6 +200,8 @@ extern "C"
202
200
calibtimer.stop ();
203
201
overheadpercallseconds = calibtimer.getTotalDurationSeconds () / ncall;
204
202
}
203
+ if ( getenv ( " CUDACPP_RUNTIME_DISABLECALLTIMERS" ) ) disablecalltimers = true ;
204
+ if ( getenv ( " CUDACPP_RUNTIME_DISABLETESTTIMERS" ) ) disabletesttimers = true ;
205
205
return ;
206
206
}
207
207
@@ -216,10 +216,12 @@ extern "C"
216
216
float program_totaltime = ( usechronotimers ? program_chronotimer.getTotalDurationSeconds () : program_rdtsctimer.getTotalDurationSeconds () );
217
217
float program_overhead = 0 ;
218
218
// Extract time duration from all timers
219
- float array_totaltimes[NCOUNTERSMAX + 3 ] = { 0 };
220
- float array_overheads[NCOUNTERSMAX + 3 ] = { 0 };
221
- for ( int icounter = 1 ; icounter < NCOUNTERSMAX + 1 ; icounter++ )
219
+ float array_totaltimes[NCOUNTERSMAX + 4 ] = { 0 };
220
+ float array_overheads[NCOUNTERSMAX + 4 ] = { 0 };
221
+ for ( int icounter = 1 ; icounter < NCOUNTERSMAX + 4 ; icounter++ ) // include icalibcounter = NCOUNTERSMAX+3
222
222
{
223
+ if ( icounter == NCOUNTERSMAX + 1 ) continue ;
224
+ if ( icounter == NCOUNTERSMAX + 2 ) continue ;
223
225
if ( usechronotimers )
224
226
array_totaltimes[icounter] = array_chronotimers[icounter].getTotalDurationSeconds ();
225
227
else
@@ -235,7 +237,7 @@ extern "C"
235
237
// Remove overheads of included timers if any
236
238
if ( removetimeroverhead )
237
239
{
238
- for ( int icounter = 1 ; icounter < NCOUNTERSMAX + 1 ; icounter++ )
240
+ for ( int icounter = 1 ; icounter < NCOUNTERSMAX + 1 ; icounter++ ) // no need to include icalibcounter = NCOUNTERSMAX+3
239
241
{
240
242
for ( int icounterIn : array_included[icounter] )
241
243
array_totaltimes[icounter] -= array_overheads[icounterIn];
@@ -259,7 +261,7 @@ extern "C"
259
261
array_tags[0 ] = " Fortran Other" ;
260
262
array_counters[0 ] = 1 ;
261
263
array_totaltimes[0 ] = program_totaltime;
262
- for ( int icounter = 1 ; icounter < NCOUNTERSMAX + 1 ; icounter++ )
264
+ for ( int icounter = 1 ; icounter < NCOUNTERSMAX + 4 ; icounter++ ) // include icalibcounter = NCOUNTERSMAX+3
263
265
{
264
266
if ( !array_istesttimer[icounter] ) // skip TEST counters
265
267
array_totaltimes[0 ] -= array_totaltimes[icounter];
@@ -280,7 +282,7 @@ extern "C"
280
282
array_counters[NCOUNTERSMAX + 1 ] = 1 ;
281
283
array_totaltimes[NCOUNTERSMAX + 1 ] = program_totaltime - array_totaltimes[NCOUNTERSMAX + 2 ];
282
284
// Dump individual counters
283
- for ( int icounter = 0 ; icounter < NCOUNTERSMAX + 3 ; icounter++ )
285
+ for ( int icounter = 0 ; icounter < NCOUNTERSMAX + 3 ; icounter++ ) // exclude icalibcounter = NCOUNTERSMAX+3 (would print a negative value here!)
284
286
{
285
287
if ( array_tags[icounter] != " " )
286
288
{
0 commit comments