Skip to content

Commit 7e6f676

Browse files
authored
feat(metrics): improve multiproof worker metrics (#19337)
1 parent e2b5c73 commit 7e6f676

File tree

3 files changed

+140
-23
lines changed

3 files changed

+140
-23
lines changed

crates/engine/tree/src/tree/payload_processor/multiproof.rs

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use alloy_primitives::{
99
use crossbeam_channel::{unbounded, Receiver as CrossbeamReceiver, Sender as CrossbeamSender};
1010
use dashmap::DashMap;
1111
use derive_more::derive::Deref;
12-
use metrics::Histogram;
12+
use metrics::{Gauge, Histogram};
1313
use reth_metrics::Metrics;
1414
use reth_revm::state::EvmState;
1515
use reth_trie::{
@@ -319,8 +319,6 @@ impl MultiproofInput {
319319
/// `ProofSequencer`.
320320
#[derive(Debug)]
321321
pub struct MultiproofManager {
322-
/// Currently running calculations.
323-
inflight: usize,
324322
/// Handle to the proof worker pools (storage and account).
325323
proof_worker_handle: ProofWorkerHandle,
326324
/// Cached storage proof roots for missed leaves; this maps
@@ -349,8 +347,11 @@ impl MultiproofManager {
349347
proof_worker_handle: ProofWorkerHandle,
350348
proof_result_tx: CrossbeamSender<ProofResultMessage>,
351349
) -> Self {
350+
// Initialize the max worker gauges with the worker pool sizes
351+
metrics.max_storage_workers.set(proof_worker_handle.total_storage_workers() as f64);
352+
metrics.max_account_workers.set(proof_worker_handle.total_account_workers() as f64);
353+
352354
Self {
353-
inflight: 0,
354355
metrics,
355356
proof_worker_handle,
356357
missed_leaves_storage_roots: Default::default(),
@@ -359,7 +360,7 @@ impl MultiproofManager {
359360
}
360361

361362
/// Dispatches a new multiproof calculation to worker pools.
362-
fn dispatch(&mut self, input: PendingMultiproofTask) {
363+
fn dispatch(&self, input: PendingMultiproofTask) {
363364
// If there are no proof targets, we can just send an empty multiproof back immediately
364365
if input.proof_targets_is_empty() {
365366
debug!(
@@ -381,7 +382,7 @@ impl MultiproofManager {
381382
}
382383

383384
/// Dispatches a single storage proof calculation to worker pool.
384-
fn dispatch_storage_proof(&mut self, storage_multiproof_input: StorageMultiproofInput) {
385+
fn dispatch_storage_proof(&self, storage_multiproof_input: StorageMultiproofInput) {
385386
let StorageMultiproofInput {
386387
hashed_state_update,
387388
hashed_address,
@@ -432,8 +433,12 @@ impl MultiproofManager {
432433
return;
433434
}
434435

435-
self.inflight += 1;
436-
self.metrics.inflight_multiproofs_histogram.record(self.inflight as f64);
436+
self.metrics
437+
.active_storage_workers_histogram
438+
.record(self.proof_worker_handle.active_storage_workers() as f64);
439+
self.metrics
440+
.active_account_workers_histogram
441+
.record(self.proof_worker_handle.active_account_workers() as f64);
437442
self.metrics
438443
.pending_storage_multiproofs_histogram
439444
.record(self.proof_worker_handle.pending_storage_tasks() as f64);
@@ -443,9 +448,13 @@ impl MultiproofManager {
443448
}
444449

445450
/// Signals that a multiproof calculation has finished.
446-
fn on_calculation_complete(&mut self) {
447-
self.inflight = self.inflight.saturating_sub(1);
448-
self.metrics.inflight_multiproofs_histogram.record(self.inflight as f64);
451+
fn on_calculation_complete(&self) {
452+
self.metrics
453+
.active_storage_workers_histogram
454+
.record(self.proof_worker_handle.active_storage_workers() as f64);
455+
self.metrics
456+
.active_account_workers_histogram
457+
.record(self.proof_worker_handle.active_account_workers() as f64);
449458
self.metrics
450459
.pending_storage_multiproofs_histogram
451460
.record(self.proof_worker_handle.pending_storage_tasks() as f64);
@@ -455,7 +464,7 @@ impl MultiproofManager {
455464
}
456465

457466
/// Dispatches a single multiproof calculation to worker pool.
458-
fn dispatch_multiproof(&mut self, multiproof_input: MultiproofInput) {
467+
fn dispatch_multiproof(&self, multiproof_input: MultiproofInput) {
459468
let MultiproofInput {
460469
source,
461470
hashed_state_update,
@@ -506,8 +515,12 @@ impl MultiproofManager {
506515
return;
507516
}
508517

509-
self.inflight += 1;
510-
self.metrics.inflight_multiproofs_histogram.record(self.inflight as f64);
518+
self.metrics
519+
.active_storage_workers_histogram
520+
.record(self.proof_worker_handle.active_storage_workers() as f64);
521+
self.metrics
522+
.active_account_workers_histogram
523+
.record(self.proof_worker_handle.active_account_workers() as f64);
511524
self.metrics
512525
.pending_storage_multiproofs_histogram
513526
.record(self.proof_worker_handle.pending_storage_tasks() as f64);
@@ -520,8 +533,14 @@ impl MultiproofManager {
520533
#[derive(Metrics, Clone)]
521534
#[metrics(scope = "tree.root")]
522535
pub(crate) struct MultiProofTaskMetrics {
523-
/// Histogram of inflight multiproofs.
524-
pub inflight_multiproofs_histogram: Histogram,
536+
/// Histogram of active storage workers processing proofs.
537+
pub active_storage_workers_histogram: Histogram,
538+
/// Histogram of active account workers processing proofs.
539+
pub active_account_workers_histogram: Histogram,
540+
/// Gauge for the maximum number of storage workers in the pool.
541+
pub max_storage_workers: Gauge,
542+
/// Gauge for the maximum number of account workers in the pool.
543+
pub max_account_workers: Gauge,
525544
/// Histogram of pending storage multiproofs in the queue.
526545
pub pending_storage_multiproofs_histogram: Histogram,
527546
/// Histogram of pending account multiproofs in the queue.
@@ -583,7 +602,6 @@ pub(crate) struct MultiProofTaskMetrics {
583602
/// ▼ │
584603
/// ┌──────────────────────────────────────────────────────────────┐ │
585604
/// │ MultiproofManager │ │
586-
/// │ - Tracks inflight calculations │ │
587605
/// │ - Deduplicates against fetched_proof_targets │ │
588606
/// │ - Routes to appropriate worker pool │ │
589607
/// └──┬───────────────────────────────────────────────────────────┘ │
@@ -624,7 +642,6 @@ pub(crate) struct MultiProofTaskMetrics {
624642
///
625643
/// - **[`MultiproofManager`]**: Calculation orchestrator
626644
/// - Decides between fast path ([`EmptyProof`]) and worker dispatch
627-
/// - Tracks inflight calculations
628645
/// - Routes storage-only vs full multiproofs to appropriate workers
629646
/// - Records metrics for monitoring
630647
///

crates/trie/parallel/src/proof_task.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,6 +1017,10 @@ pub struct ProofWorkerHandle {
10171017
/// Counter tracking available account workers. Workers decrement when starting work,
10181018
/// increment when finishing. Used to determine whether to chunk multiproofs.
10191019
account_available_workers: Arc<AtomicUsize>,
1020+
/// Total number of storage workers spawned
1021+
storage_worker_count: usize,
1022+
/// Total number of account workers spawned
1023+
account_worker_count: usize,
10201024
}
10211025

10221026
impl ProofWorkerHandle {
@@ -1118,6 +1122,8 @@ impl ProofWorkerHandle {
11181122
account_work_tx,
11191123
storage_available_workers,
11201124
account_available_workers,
1125+
storage_worker_count,
1126+
account_worker_count,
11211127
}
11221128
}
11231129

@@ -1141,6 +1147,32 @@ impl ProofWorkerHandle {
11411147
self.account_work_tx.len()
11421148
}
11431149

1150+
/// Returns the total number of storage workers in the pool.
1151+
pub const fn total_storage_workers(&self) -> usize {
1152+
self.storage_worker_count
1153+
}
1154+
1155+
/// Returns the total number of account workers in the pool.
1156+
pub const fn total_account_workers(&self) -> usize {
1157+
self.account_worker_count
1158+
}
1159+
1160+
/// Returns the number of storage workers currently processing tasks.
1161+
///
1162+
/// This is calculated as total workers minus available workers.
1163+
pub fn active_storage_workers(&self) -> usize {
1164+
self.storage_worker_count
1165+
.saturating_sub(self.storage_available_workers.load(Ordering::Relaxed))
1166+
}
1167+
1168+
/// Returns the number of account workers currently processing tasks.
1169+
///
1170+
/// This is calculated as total workers minus available workers.
1171+
pub fn active_account_workers(&self) -> usize {
1172+
self.account_worker_count
1173+
.saturating_sub(self.account_available_workers.load(Ordering::Relaxed))
1174+
}
1175+
11441176
/// Dispatch a storage proof computation to storage worker pool
11451177
///
11461178
/// The result will be sent via the `proof_result_sender` channel.

etc/grafana/dashboards/overview.json

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4320,14 +4320,46 @@
43204320
},
43214321
"unit": "none"
43224322
},
4323-
"overrides": []
4323+
"overrides": [
4324+
{
4325+
"matcher": {
4326+
"id": "byName",
4327+
"options": "Max storage workers"
4328+
},
4329+
"properties": [
4330+
{
4331+
"id": "custom.lineStyle",
4332+
"value": {
4333+
"dash": [10, 10],
4334+
"fill": "dash"
4335+
}
4336+
}
4337+
]
4338+
},
4339+
{
4340+
"matcher": {
4341+
"id": "byName",
4342+
"options": "Max account workers"
4343+
},
4344+
"properties": [
4345+
{
4346+
"id": "custom.lineStyle",
4347+
"value": {
4348+
"dash": [10, 10],
4349+
"fill": "dash"
4350+
}
4351+
}
4352+
]
4353+
}
4354+
]
43244355
},
43254356
"gridPos": {
43264357
"h": 8,
43274358
"w": 12,
43284359
"x": 12,
43294360
"y": 104
43304361
},
4362+
"description": "The max metrics (Max storage workers and Max account workers) are displayed as dotted lines to highlight the configured upper limits.",
43314363
"id": 256,
43324364
"options": {
43334365
"legend": {
@@ -4350,14 +4382,50 @@
43504382
"uid": "${DS_PROMETHEUS}"
43514383
},
43524384
"editorMode": "code",
4353-
"expr": "reth_tree_root_inflight_multiproofs_histogram{$instance_label=\"$instance\",quantile=~\"(0|0.5|0.9|0.95|1)\"}",
4385+
"expr": "reth_tree_root_active_storage_workers_histogram{$instance_label=\"$instance\",quantile=~\"(0|0.5|0.9|0.95|1)\"}",
43544386
"instant": false,
4355-
"legendFormat": "{{quantile}} percentile",
4387+
"legendFormat": "Storage workers {{quantile}} percentile",
43564388
"range": true,
4357-
"refId": "Branch Nodes"
4389+
"refId": "A"
4390+
},
4391+
{
4392+
"datasource": {
4393+
"type": "prometheus",
4394+
"uid": "${DS_PROMETHEUS}"
4395+
},
4396+
"editorMode": "code",
4397+
"expr": "reth_tree_root_active_account_workers_histogram{$instance_label=\"$instance\",quantile=~\"(0|0.5|0.9|0.95|1)\"}",
4398+
"instant": false,
4399+
"legendFormat": "Account workers {{quantile}} percentile",
4400+
"range": true,
4401+
"refId": "B"
4402+
},
4403+
{
4404+
"datasource": {
4405+
"type": "prometheus",
4406+
"uid": "${DS_PROMETHEUS}"
4407+
},
4408+
"editorMode": "code",
4409+
"expr": "reth_tree_root_max_storage_workers{$instance_label=\"$instance\"}",
4410+
"instant": false,
4411+
"legendFormat": "Max storage workers",
4412+
"range": true,
4413+
"refId": "C"
4414+
},
4415+
{
4416+
"datasource": {
4417+
"type": "prometheus",
4418+
"uid": "${DS_PROMETHEUS}"
4419+
},
4420+
"editorMode": "code",
4421+
"expr": "reth_tree_root_max_account_workers{$instance_label=\"$instance\"}",
4422+
"instant": false,
4423+
"legendFormat": "Max account workers",
4424+
"range": true,
4425+
"refId": "D"
43584426
}
43594427
],
4360-
"title": "In-flight MultiProof requests",
4428+
"title": "Active MultiProof Workers",
43614429
"type": "timeseries"
43624430
},
43634431
{

0 commit comments

Comments
 (0)