Skip to content

Commit c273c1b

Browse files
Standalone functions for computing score
1 parent ed4e6d5 commit c273c1b

File tree

1 file changed

+163
-97
lines changed

1 file changed

+163
-97
lines changed

mlperf_logging/result_summarizer/result_summarizer.py

Lines changed: 163 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,127 @@ def _get_scaling_factor(folder):
324324
return scaling_factor
325325

326326

327+
def _compute_strong_score_standalone(benchmark, system, has_power, benchmark_folder, usage, ruleset, desc = {"submitter": None}):
328+
pattern = '{folder}/result_*.txt'.format(folder=benchmark_folder)
329+
result_files = glob.glob(pattern, recursive=True)
330+
scores = []
331+
power_scores = []
332+
dropped_scores = 0
333+
for result_file in result_files:
334+
try:
335+
loglines = _read_result_file(result_file, usage, ruleset)
336+
start, stop = _query_run_start_stop(loglines)
337+
time_to_train_ms = stop - start
338+
scores.append(time_to_train_ms / 60 / 1000)
339+
except ValueError as e:
340+
print('{} in {}'.format(e, result_file))
341+
dropped_scores += 1
342+
continue
343+
if has_power:
344+
power_scores.append(_compute_total_power(benchmark_folder, result_file, time_to_train_ms, ruleset))
345+
max_dropped_scores = 4 if benchmark == 'unet3d' else 1
346+
if dropped_scores > max_dropped_scores:
347+
print('CRITICAL ERROR: Too many non-converging runs '
348+
'for {} {}/{}'.format(desc['submitter'], system, benchmark))
349+
print('** CRITICAL ERROR ** Results in the table for {} {}/{} are '
350+
'NOT correct'.format(desc['submitter'], system, benchmark))
351+
elif dropped_scores >= 1:
352+
print('NOTICE: Dropping non-converged run(s) for {} {}/{} using '
353+
'olympic scoring.'.format(
354+
desc['submitter'],
355+
system,
356+
benchmark,
357+
))
358+
359+
if has_power:
360+
unsorted_scores = scores.copy()
361+
362+
score = None
363+
scaling_factor = _get_scaling_factor(benchmark_folder)
364+
if dropped_scores <= max_dropped_scores:
365+
olympic_avg = _compute_olympic_average(
366+
scores, dropped_scores, max_dropped_scores)
367+
if olympic_avg is not None:
368+
score = olympic_avg
369+
score *= scaling_factor
370+
371+
power_score = None
372+
if has_power and dropped_scores <= max_dropped_scores:
373+
index = [i[0] for i in sorted(enumerate(unsorted_scores), key=lambda x:x[1])]
374+
olympic_avg = _index_olympic_average(
375+
power_scores, index, dropped_scores, max_dropped_scores)
376+
if olympic_avg is not None:
377+
power_score = olympic_avg
378+
power_score *= scaling_factor
379+
return score, power_score
380+
381+
382+
def _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folder, usage, ruleset, desc = {"submitter": None}):
383+
power_scores = []
384+
# Read scores from result files.
385+
pattern = '{folder}/result_*.txt'.format(folder=benchmark_folder)
386+
result_files = glob.glob(pattern, recursive=True)
387+
global_start, global_stop = float('inf'), float('-inf')
388+
number_of_models = 0
389+
instance_scale = None
390+
for result_file in result_files:
391+
try:
392+
loglines = _read_result_file(result_file, usage, ruleset)
393+
start, stop = _query_run_start_stop(loglines)
394+
global_start = min(global_start, start)
395+
global_stop = max(global_stop, stop)
396+
number_of_models += 1
397+
if instance_scale == None:
398+
instance_scale = _query_instance_scale(loglines)
399+
else:
400+
assert instance_scale == _query_instance_scale(loglines)
401+
except ValueError as e:
402+
print('{} in {}'.format(e, result_file))
403+
continue
404+
if has_power:
405+
time_to_train_ms = stop - start
406+
power_scores.append(_compute_total_power(benchmark_folder, result_file, time_to_train_ms, ruleset))
407+
408+
scores = {}
409+
power = {}
410+
if number_of_models >= get_result_file_counts(usage)[benchmark]:
411+
scores['{}:{}'.format(
412+
benchmark,
413+
'time_to_train_all',
414+
)] = (global_stop - global_start) / 60 / 1000
415+
scores['{}:{}'.format(
416+
benchmark,
417+
'number_of_models',
418+
)] = number_of_models
419+
scores['{}:{}'.format(
420+
benchmark,
421+
'instance_scale',
422+
)] = instance_scale
423+
else:
424+
print('CRITICAL ERROR: Not enough converging weak scaling runs '
425+
'for {} {}/{}'.format(desc['submitter'], system, benchmark))
426+
427+
if has_power:
428+
olympic_avg = _compute_olympic_average(
429+
power_scores, 1, 1)
430+
if olympic_avg is not None:
431+
power['{}:{}'.format(
432+
benchmark,
433+
'time_to_train_all',
434+
)] = olympic_avg
435+
power['{}:{}'.format(
436+
benchmark,
437+
'number_of_models',
438+
)] = olympic_avg
439+
power['{}:{}'.format(
440+
benchmark,
441+
'instance_scale',
442+
)] = olympic_avg
443+
444+
return scores, power
445+
446+
447+
327448
def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset):
328449
# Collect scores for benchmarks.
329450
benchmark_scores = {}
@@ -340,55 +461,11 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset):
340461
benchmark = _benchmark_alias(folder_parts[-1])
341462
system = folder_parts[-3] if usage == 'hpc' else folder_parts[-2]
342463
# Read scores from result files.
343-
pattern = '{folder}/result_*.txt'.format(folder=benchmark_folder)
344-
result_files = glob.glob(pattern, recursive=True)
345-
scores = []
346-
power_scores = []
347-
dropped_scores = 0
348-
for result_file in result_files:
349-
try:
350-
loglines = _read_result_file(result_file, usage, ruleset)
351-
start, stop = _query_run_start_stop(loglines)
352-
time_to_train_ms = stop - start
353-
scores.append(time_to_train_ms / 60 / 1000)
354-
except ValueError as e:
355-
print('{} in {}'.format(e, result_file))
356-
dropped_scores += 1
357-
continue
358-
if has_power:
359-
power_scores.append(_compute_total_power(benchmark_folder, result_file, time_to_train_ms, ruleset))
360-
max_dropped_scores = 4 if benchmark == 'unet3d' else 1
361-
if dropped_scores > max_dropped_scores:
362-
print('CRITICAL ERROR: Too many non-converging runs '
363-
'for {} {}/{}'.format(desc['submitter'], system, benchmark))
364-
print('** CRITICAL ERROR ** Results in the table for {} {}/{} are '
365-
'NOT correct'.format(desc['submitter'], system, benchmark))
366-
elif dropped_scores >= 1:
367-
print('NOTICE: Dropping non-converged run(s) for {} {}/{} using '
368-
'olympic scoring.'.format(
369-
desc['submitter'],
370-
system,
371-
benchmark,
372-
))
373-
374-
if has_power:
375-
unsorted_scores = scores.copy()
376-
377-
scaling_factor = _get_scaling_factor(benchmark_folder)
378-
if dropped_scores <= max_dropped_scores:
379-
olympic_avg = _compute_olympic_average(
380-
scores, dropped_scores, max_dropped_scores)
381-
if olympic_avg is not None:
382-
benchmark_scores[benchmark] = olympic_avg
383-
benchmark_scores[benchmark] *= scaling_factor
384-
385-
if has_power and dropped_scores <= max_dropped_scores:
386-
index = [i[0] for i in sorted(enumerate(unsorted_scores), key=lambda x:x[1])]
387-
olympic_avg = _index_olympic_average(
388-
power_scores, index, dropped_scores, max_dropped_scores)
389-
if olympic_avg is not None:
390-
benchmark_power_scores[benchmark] = olympic_avg
391-
benchmark_power_scores[benchmark] *= scaling_factor
464+
score, power_score = _compute_strong_score_standalone(benchmark, system, has_power, benchmark_folder, usage, ruleset, desc)
465+
if score is not None:
466+
benchmark_scores[benchmark] = score
467+
if power_score is not None:
468+
benchmark_power_scores[benchmark] = power_score
392469
_fill_empty_benchmark_scores(benchmark_scores, usage, ruleset)
393470
if len(benchmark_power_scores) > 0:
394471
_fill_empty_benchmark_scores(benchmark_power_scores, usage, ruleset)
@@ -426,64 +503,53 @@ def _compute_weak_scaling_scores(desc, system_folder, usage, ruleset):
426503
system = folder_parts[-3]
427504
# Check if this benchmark has power results
428505
has_power = _has_power(benchmark_folder)
429-
power_scores = []
430-
# Read scores from result files.
431-
pattern = '{folder}/result_*.txt'.format(folder=benchmark_folder)
432-
result_files = glob.glob(pattern, recursive=True)
433-
global_start, global_stop = float('inf'), float('-inf')
434-
number_of_models = 0
435-
instance_scale = None
436-
for result_file in result_files:
437-
try:
438-
loglines = _read_result_file(result_file, usage, ruleset)
439-
start, stop = _query_run_start_stop(loglines)
440-
global_start = min(global_start, start)
441-
global_stop = max(global_stop, stop)
442-
number_of_models += 1
443-
if instance_scale == None:
444-
instance_scale = _query_instance_scale(loglines)
445-
else:
446-
assert instance_scale == _query_instance_scale(loglines)
447-
except ValueError as e:
448-
print('{} in {}'.format(e, result_file))
449-
continue
450-
if has_power:
451-
time_to_train_ms = stop - start
452-
power_scores.append(_compute_total_power(benchmark_folder, result_file, time_to_train_ms, ruleset))
453-
454-
if number_of_models >= get_result_file_counts(usage)[benchmark]:
506+
scores, power_scores = _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folder, usage, ruleset, desc)
507+
508+
if scores:
455509
benchmark_scores['{}:{}'.format(
456510
benchmark,
457511
'time_to_train_all',
458-
)] = (global_stop - global_start) / 60 / 1000
512+
)] = scores['{}:{}'.format(
513+
benchmark,
514+
'time_to_train_all',
515+
)]
459516
benchmark_scores['{}:{}'.format(
460517
benchmark,
461518
'number_of_models',
462-
)] = number_of_models
519+
)] = scores['{}:{}'.format(
520+
benchmark,
521+
'number_of_models',
522+
)]
463523
benchmark_scores['{}:{}'.format(
464524
benchmark,
465525
'instance_scale',
466-
)] = instance_scale
467-
else:
468-
print('CRITICAL ERROR: Not enough converging weak scaling runs '
469-
'for {} {}/{}'.format(desc['submitter'], system, benchmark))
526+
)] = scores['{}:{}'.format(
527+
benchmark,
528+
'instance_scale',
529+
)]
470530

471-
if has_power:
472-
olympic_avg = _compute_olympic_average(
473-
power_scores, 1, 1)
474-
if olympic_avg is not None:
475-
benchmark_power_scores['{}:{}'.format(
476-
benchmark,
477-
'time_to_train_all',
478-
)] = olympic_avg
479-
benchmark_power_scores['{}:{}'.format(
480-
benchmark,
481-
'number_of_models',
482-
)] = olympic_avg
483-
benchmark_power_scores['{}:{}'.format(
484-
benchmark,
485-
'instance_scale',
486-
)] = olympic_avg
531+
if power_scores:
532+
benchmark_power_scores['{}:{}'.format(
533+
benchmark,
534+
'time_to_train_all',
535+
)] = power_scores['{}:{}'.format(
536+
benchmark,
537+
'time_to_train_all',
538+
)]
539+
benchmark_power_scores['{}:{}'.format(
540+
benchmark,
541+
'number_of_models',
542+
)] = power_scores['{}:{}'.format(
543+
benchmark,
544+
'number_of_models',
545+
)]
546+
benchmark_power_scores['{}:{}'.format(
547+
benchmark,
548+
'instance_scale',
549+
)] = power_scores['{}:{}'.format(
550+
benchmark,
551+
'instance_scale',
552+
)]
487553

488554
_fill_empty_benchmark_scores(benchmark_scores,
489555
usage,

0 commit comments

Comments
 (0)