Skip to content

Commit 991fda7

Browse files
Fix Total CPU % on /workers tab to normalize by total nthreads (#9195)
1 parent 4fb4814 commit 991fda7

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

distributed/dashboard/components/scheduler.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4076,6 +4076,7 @@ def __init__(self, scheduler, **kwargs):
40764076
"host_disk_io.read_bps",
40774077
"host_disk_io.write_bps",
40784078
"cpu_fraction",
4079+
"_is_total",
40794080
]
40804081
workers = self.scheduler.workers.values()
40814082
self.extra_names = sorted(
@@ -4128,7 +4129,14 @@ def __init__(self, scheduler, **kwargs):
41284129
}
41294130

41304131
formatters = {
4131-
"cpu": NumberFormatter(format="0 %"),
4132+
# Use a pure number (0 to nthreads) on the total line and a %
4133+
# (e.g. 0 to 400% for 4 threads per worker ) on the individual workers.
4134+
# It would be very confusing to read e.g. 9000% on the total, whereas
4135+
# seeing that ~90 CPU equivalents are being fully used is more meaningful.
4136+
"cpu": HTMLTemplateFormatter(
4137+
template="<% if (_is_total) { %><%= (value).toFixed(1) %>"
4138+
"<% } else { %><%= Math.round(value * 100) %> %<% } %>"
4139+
),
41324140
"memory_percent": NumberFormatter(format="0.0 %"),
41334141
"memory": NumberFormatter(format="0.0 b"),
41344142
"memory_limit": NumberFormatter(format="0.0 b"),
@@ -4281,11 +4289,15 @@ def update(self):
42814289
data["cpu"][-1] = ws.metrics["cpu"] / 100.0
42824290
data["cpu_fraction"][-1] = ws.metrics["cpu"] / 100.0 / ws.nthreads
42834291
data["nthreads"][-1] = ws.nthreads
4292+
data["_is_total"][-1] = False
42844293

42854294
for name in self.names + self.extra_names:
42864295
if name == "name":
42874296
data[name].insert(0, f"Total ({len(data[name])})")
42884297
continue
4298+
if name == "_is_total":
4299+
data[name].insert(0, True)
4300+
continue
42894301
try:
42904302
if len(self.scheduler.workers) == 0:
42914303
total_data = None
@@ -4308,7 +4320,6 @@ def update(self):
43084320
total_data = (
43094321
sum(ws.metrics["cpu"] for ws in self.scheduler.workers.values())
43104322
/ 100
4311-
/ len(self.scheduler.workers.values())
43124323
)
43134324
elif name == "cpu_fraction":
43144325
total_data = (

distributed/dashboard/tests/test_scheduler_bokeh.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,17 @@ async def test_WorkerTable(c, s, a, b):
564564
assert all(nthreads)
565565
assert nthreads[0] == nthreads[1] + nthreads[2]
566566

567+
# Total CPU should show raw core count (sum of all worker CPU / 100)
568+
cpu = wt.source.data["cpu"]
569+
expected_cpu_total = sum(ws.metrics["cpu"] for ws in s.workers.values()) / 100
570+
assert cpu[0] == expected_cpu_total
571+
572+
# _is_total flag should be set correctly
573+
is_total = wt.source.data["_is_total"]
574+
assert is_total[0] is True
575+
assert is_total[1] is False
576+
assert is_total[2] is False
577+
567578

568579
@gen_cluster(client=True)
569580
async def test_WorkerTable_custom_metrics(c, s, a, b):

0 commit comments

Comments
 (0)