Skip to content

Commit ebdf9ea

Browse files
authored
refactor(scripts): unify metric naming format in log parser (#6)
### Description Standardizes NeMo metric names by replacing spaces with underscores in the log parser. This ensures consistent naming conventions across all extracted metrics. Change-Id: I6da106b1ed56e789b3bde6c86d5e871dfdea02d5
1 parent 123596a commit ebdf9ea

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

scripts/parse_log_and_summarize_nemo.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def parse_log_file(log_file):
3535
),
3636
(
3737
re.compile(r"\[NeMo [DI] .*? (fully_parallel):\d+ rank:(\d+)\] parallel save sharding, time: ([\d.eE+-]+)"),
38-
"parallel save sharding",
38+
"parallel_save_sharding",
3939
),
4040
(re.compile(r"\[NeMo [DI] .*? (state_dict_saver):\d+ rank:(\d+)\] .*?, plan time: ([\d.eE+-]+)"), "plan time"),
4141
(
@@ -51,14 +51,14 @@ def parse_log_file(log_file):
5151
),
5252
(
5353
re.compile(r"\[NeMo [DI] .*? (filesystem_async):\d+ rank:(\d+)\] D2H and push, time: ([\d.eE+-]+)"),
54-
"D2H and push",
54+
"D2H_and_push",
5555
),
5656
(
5757
re.compile(
5858
r"\[NeMo I .*? (nemo_logging):\d+ rank:(\d+)\] Global Checkpoint Save :.*? "
5959
r"Save duration: ([\d.eE+-]+)s"
6060
),
61-
"Global Checkpoint Save",
61+
"global_checkpoint_save",
6262
),
6363
(
6464
re.compile(
@@ -70,7 +70,7 @@ def parse_log_file(log_file):
7070
(re.compile(r"\[NeMo D .*? (async_utils):\d+ rank:(\d+)\].*? takes ([\d.eE+-]+) to finish D2H"), "finish D2H"),
7171
(
7272
re.compile(r"\[NeMo D .*? (async_utils):\d+ rank:(\d+)\].*? takes ([\d.eE+-]+) to schedule async ckpt"),
73-
"schedule async ckpt",
73+
"schedule_async_ckpt",
7474
),
7575
(
7676
re.compile(
@@ -88,7 +88,7 @@ def parse_log_file(log_file):
8888
),
8989
(
9090
re.compile(r"\[NeMo [DI] .*? (filesystem_async):\d+ rank:(\d+)\] .*? write\(sync,parallel\): ([\d.eE+-]+)"),
91-
"write sync parallel",
91+
"write_sync_parallel",
9292
),
9393
(
9494
re.compile(
@@ -102,7 +102,7 @@ def parse_log_file(log_file):
102102
r"\[NeMo D .*? (async_utils):\d+ rank:(\d+)\] "
103103
r"TemporalAsyncCaller: Async process join finished after ([\d.eE+-]+)s from forking"
104104
),
105-
"async process join",
105+
"async_process_join",
106106
),
107107
(re.compile(r"\[NeMo D .*? (state_dict_saver):\d+ rank:(\d+)\].*?, gather: ([\d.eE+-]+)"), "gather"),
108108
(
@@ -118,7 +118,7 @@ def parse_log_file(log_file):
118118
(re.compile(r"\[NeMo D .*? (utils):\d+ rank:(\d+)\] finalize took ([\d.eE+-]+)s"), "finalize"),
119119
(
120120
re.compile(r"\[NeMo I .*? (nemo_logging):\d+ rank:(\d+)\] Async finalization time took ([\d.eE+-]+) s"),
121-
"Async finalization time",
121+
"async_finalization_time",
122122
),
123123
(
124124
re.compile(

0 commit comments

Comments
 (0)