From c1a50bb55eb5326392cc1334bc75a0a5c756b00a Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 13:05:06 +1000 Subject: [PATCH 01/35] Add opentelemetry OTLP exporter for spans. --- Cargo.lock | 302 ++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 4 + lighthouse/Cargo.toml | 4 + lighthouse/src/main.rs | 29 +++- 4 files changed, 313 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3786392feb6..ba192a151b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1253,7 +1253,7 @@ dependencies = [ "eth2", "ethereum_ssz", "lighthouse_version", - "reqwest", + "reqwest 0.11.27", "sensitive_url", "serde", "serde_json", @@ -1707,7 +1707,7 @@ dependencies = [ "futures-core", "prost", "prost-types", - "tonic", + "tonic 0.12.3", "tracing-core", ] @@ -1731,7 +1731,7 @@ dependencies = [ "thread_local", "tokio", "tokio-stream", - "tonic", + "tonic 0.12.3", "tracing", "tracing-core", "tracing-subscriber", @@ -2270,7 +2270,7 @@ dependencies = [ "ethabi 16.0.0", "ethereum_ssz", "hex", - "reqwest", + "reqwest 0.11.27", "serde_json", "sha2 0.9.9", "tree_hash", @@ -2835,7 +2835,7 @@ dependencies = [ "pretty_reqwest_error", "proto_array", "rand 0.8.5", - "reqwest", + "reqwest 0.11.27", "reqwest-eventsource", "sensitive_url", "serde", @@ -2913,7 +2913,7 @@ dependencies = [ "ethereum_ssz", "kzg", "pretty_reqwest_error", - "reqwest", + "reqwest 0.11.27", "sensitive_url", "serde_yaml", "sha2 0.9.9", @@ -3146,7 +3146,7 @@ checksum = "a9713f525348e5dde025d09b0a4217429f8074e8ff22c886263cc191e87d8216" dependencies = [ "ethers-core", "getrandom 0.2.15", - "reqwest", + "reqwest 0.11.27", "semver 1.0.26", "serde", "serde-aux", @@ -3171,7 +3171,7 @@ dependencies = [ "futures-locks", "futures-util", "instant", - "reqwest", + "reqwest 0.11.27", "serde", "serde_json", "thiserror 1.0.69", @@ -3201,7 +3201,7 @@ dependencies = [ "once_cell", "parking_lot 0.11.2", "pin-project", - "reqwest", + "reqwest 0.11.27", "serde", "serde_json", "thiserror 1.0.69", @@ -3287,7 +3287,7 @@ dependencies = [ "futures", "hex", "logging", - "reqwest", + "reqwest 0.11.27", "sensitive_url", "serde_json", "task_executor", @@ -3326,7 +3326,7 @@ dependencies = [ "parking_lot 0.12.3", "pretty_reqwest_error", "rand 0.8.5", - "reqwest", + "reqwest 0.11.27", "sensitive_url", "serde", "serde_json", @@ -4326,7 +4326,7 @@ dependencies = [ "logging", "malloc_utils", "metrics", - "reqwest", + "reqwest 0.11.27", "serde", "slot_clock", "store", @@ -4793,7 +4793,7 @@ dependencies = [ "metrics", "parking_lot 0.12.3", "rand 0.8.5", - "reqwest", + "reqwest 0.11.27", "serde", "serde_json", "signing_method", @@ -5637,6 +5637,9 @@ dependencies = [ "logging", "malloc_utils", "metrics", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "sensitive_url", "serde", "serde_json", @@ -5647,6 +5650,7 @@ dependencies = [ "task_executor", "tempfile", "tracing", + "tracing-opentelemetry", "tracing-subscriber", "types", "unused_port", @@ -6175,7 +6179,7 @@ dependencies = [ "lighthouse_version", "metrics", "regex", - "reqwest", + "reqwest 0.11.27", "sensitive_url", "serde", "serde_json", @@ -6698,6 +6702,80 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.12", + "tracing", +] + +[[package]] +name = "opentelemetry-http" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f6639e842a97dbea8886e3439710ae463120091e2e064518ba8e716e6ac36d" +dependencies = [ + "async-trait", + "bytes", + "http 1.3.0", + "opentelemetry", + "reqwest 0.12.15", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbee664a43e07615731afc539ca60c6d9f1a9425e25ca09c57bc36c87c55852b" +dependencies = [ + "http 1.3.0", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest 0.12.15", + "thiserror 2.0.12", + "tokio", + "tonic 0.13.1", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e046fd7660710fe5a05e8748e70d9058dc15c94ba914e7c4faa7c728f0e8ddc" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic 0.13.1", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.0", + "serde_json", + "thiserror 2.0.12", +] + [[package]] name = "operation_pool" version = "0.2.0" @@ -7129,7 +7207,7 @@ dependencies = [ name = "pretty_reqwest_error" version = "0.1.0" dependencies = [ - "reqwest", + "reqwest 0.11.27", "sensitive_url", ] @@ -7760,6 +7838,43 @@ dependencies = [ "winreg", ] +[[package]] +name = "reqwest" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.3.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.6.0", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 1.0.2", + "tokio", + "tower 0.5.2", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "windows-registry", +] + [[package]] name = "reqwest-eventsource" version = "0.5.0" @@ -7772,7 +7887,7 @@ dependencies = [ "mime", "nom", "pin-project-lite", - "reqwest", + "reqwest 0.11.27", "thiserror 1.0.69", ] @@ -8605,7 +8720,7 @@ dependencies = [ "ethereum_serde_utils", "lockfile", "parking_lot 0.12.3", - "reqwest", + "reqwest 0.11.27", "serde", "task_executor", "types", @@ -9003,6 +9118,9 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -9517,6 +9635,32 @@ dependencies = [ "tracing", ] +[[package]] +name = "tonic" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "http 1.3.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.6.0", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "tokio", + "tokio-stream", + "tower 0.5.2", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.4.13" @@ -9545,10 +9689,15 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", + "indexmap 2.8.0", "pin-project-lite", + "slab", "sync_wrapper 1.0.2", + "tokio", + "tokio-util", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9629,6 +9778,24 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + [[package]] name = "tracing-serde" version = "0.2.0" @@ -9962,7 +10129,7 @@ dependencies = [ "metrics", "monitoring_api", "parking_lot 0.12.3", - "reqwest", + "reqwest 0.11.27", "sensitive_url", "serde", "slashing_protection", @@ -10389,7 +10556,7 @@ dependencies = [ "lighthouse_validator_store", "logging", "parking_lot 0.12.3", - "reqwest", + "reqwest 0.11.27", "serde", "serde_json", "serde_yaml", @@ -10525,7 +10692,7 @@ dependencies = [ "windows-implement", "windows-interface", "windows-result 0.2.0", - "windows-strings", + "windows-strings 0.1.0", "windows-targets 0.52.6", ] @@ -10557,6 +10724,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" +[[package]] +name = "windows-registry" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" +dependencies = [ + "windows-result 0.3.1", + "windows-strings 0.3.1", + "windows-targets 0.53.2", +] + [[package]] name = "windows-result" version = "0.1.2" @@ -10575,6 +10753,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-result" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06374efe858fab7e4f881500e6e86ec8bc28f9462c47e5a9941a0142ad86b189" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-strings" version = "0.1.0" @@ -10585,6 +10772,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.45.0" @@ -10660,13 +10856,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -10685,6 +10897,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -10703,6 +10921,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -10721,12 +10945,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -10745,6 +10981,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -10763,6 +11005,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -10781,6 +11029,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -10799,6 +11053,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" version = "0.5.40" diff --git a/Cargo.toml b/Cargo.toml index ea64f5ae98a..82d63d42d88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -275,6 +275,10 @@ xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb zeroize = { version = "1", features = ["zeroize_derive", "serde"] } zip = "0.6" zstd = "0.13" +tracing-opentelemetry = "0.31.0" +opentelemetry_sdk = "0.30.0" +opentelemetry-otlp = { version = "0.30.0", features = ["grpc-tonic"] } +opentelemetry = "0.30.0" [profile.maxperf] inherits = "release" diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index 3767b4880e9..08741d72a57 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -72,6 +72,10 @@ types = { workspace = true } unused_port = { workspace = true } validator_client = { workspace = true } validator_manager = { path = "../validator_manager" } +tracing-opentelemetry = { workspace = true } +opentelemetry_sdk = { workspace = true } +opentelemetry-otlp = { workspace = true } +opentelemetry = { workspace = true } [target.'cfg(any(target_os = "windows", features = "heaptrack"))'.dependencies] malloc_utils = { workspace = true } diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index 28b8c7b8fc4..dd6123083b7 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -19,6 +19,8 @@ use futures::TryFutureExt; use lighthouse_version::VERSION; use logging::{build_workspace_filter, crit, MetricsLayer}; use malloc_utils::configure_memory_allocator; +use opentelemetry::trace::TracerProvider; +use opentelemetry_otlp::WithExportConfig; use std::backtrace::Backtrace; use std::io::IsTerminal; use std::path::PathBuf; @@ -673,6 +675,28 @@ fn run( logging_layers.push(MetricsLayer.boxed()); + let mut environment = builder + .multi_threaded_tokio_runtime()? + .eth2_network_config(eth2_network_config)? + .build()?; + + let telemetry_layer = environment.runtime().block_on(async { + let exporter = opentelemetry_otlp::SpanExporter::builder() + .with_tonic() + .with_endpoint("http://localhost:4317") + .build() + .map_err(|e| format!("Failed to create OTLP exporter: {:?}", e))?; + + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .build(); + + let tracer = provider.tracer("lighthouse"); + Ok::<_, String>(tracing_opentelemetry::layer().with_tracer(tracer)) + })?; + + logging_layers.push(telemetry_layer.boxed()); + #[cfg(feature = "console-subscriber")] { let console_layer = console_subscriber::spawn(); @@ -687,11 +711,6 @@ fn run( eprintln!("Failed to initialize logger: {e}"); } - let mut environment = builder - .multi_threaded_tokio_runtime()? - .eth2_network_config(eth2_network_config)? - .build()?; - // Log panics properly. { std::panic::set_hook(Box::new(move |info| { From 6dba43139631c39efb78e69a66f31504bff77337 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 13:08:32 +1000 Subject: [PATCH 02/35] Add `workspace_filter` to telemetry layer to exclude libp2p traces. --- lighthouse/src/main.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index dd6123083b7..08c838cf66d 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -652,7 +652,7 @@ fn run( logging_layers.push( file_logging_layer .with_filter(logger_config.logfile_debug_level) - .with_filter(workspace_filter) + .with_filter(workspace_filter.clone()) .boxed(), ); } @@ -692,7 +692,11 @@ fn run( .build(); let tracer = provider.tracer("lighthouse"); - Ok::<_, String>(tracing_opentelemetry::layer().with_tracer(tracer)) + Ok::<_, String>( + tracing_opentelemetry::layer() + .with_tracer(tracer) + .with_filter(workspace_filter), + ) })?; logging_layers.push(telemetry_layer.boxed()); From 4147599b43293ab7d96979a2c3d3104da494bd5c Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 14:19:07 +1000 Subject: [PATCH 03/35] Squashed commit of the following: commit 52bc83105bfd8d21aa5bc57f3cceb103f5d60353 Author: Michael Sproul Date: Mon Jul 21 13:56:44 2025 +1000 Remove tracing instrumentation # Conflicts: # beacon_node/network/src/sync/manager.rs # beacon_node/network/src/sync/peer_sampling.rs --- .../src/data_availability_checker.rs | 11 +- .../beacon_chain/src/validator_monitor.rs | 230 +------------ beacon_node/lighthouse_network/src/rpc/mod.rs | 38 +-- .../lighthouse_network/src/service/mod.rs | 309 +----------------- .../lighthouse_network/tests/common.rs | 8 +- .../lighthouse_network/tests/rpc_tests.rs | 56 ++-- .../src/network_beacon_processor/mod.rs | 7 +- beacon_node/network/src/router.rs | 3 +- beacon_node/network/src/service.rs | 4 +- beacon_node/network/src/subnet_service/mod.rs | 51 +-- .../network/src/sync/backfill_sync/mod.rs | 111 +------ .../network/src/sync/block_lookups/mod.rs | 118 +------ beacon_node/network/src/sync/manager.rs | 11 +- beacon_node/network/src/sync/peer_sampling.rs | 0 .../network/src/sync/range_sync/chain.rs | 17 +- .../network/src/sync/range_sync/range.rs | 52 +-- common/task_executor/src/lib.rs | 15 +- slasher/service/src/service.rs | 5 +- 18 files changed, 44 insertions(+), 1002 deletions(-) create mode 100644 beacon_node/network/src/sync/peer_sampling.rs diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 4f05e8a3b6f..e2947bb2d6f 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -16,7 +16,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; -use tracing::{debug, error, info_span, Instrument}; +use tracing::{debug, error}; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, @@ -622,14 +622,7 @@ pub fn start_availability_cache_maintenance_service( if chain.spec.deneb_fork_epoch.is_some() { let overflow_cache = chain.data_availability_checker.availability_cache.clone(); executor.spawn( - async move { - availability_cache_maintenance_service(chain, overflow_cache) - .instrument(info_span!( - "DataAvailabilityChecker", - service = "data_availability_checker" - )) - .await - }, + async move { availability_cache_maintenance_service(chain, overflow_cache).await }, "availability_cache_service", ); } else { diff --git a/beacon_node/beacon_chain/src/validator_monitor.rs b/beacon_node/beacon_chain/src/validator_monitor.rs index 8d3d748e8cd..80a6f64582f 100644 --- a/beacon_node/beacon_chain/src/validator_monitor.rs +++ b/beacon_node/beacon_chain/src/validator_monitor.rs @@ -21,7 +21,7 @@ use std::str::Utf8Error; use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use store::AbstractExecPayload; -use tracing::{debug, error, info, instrument, warn}; +use tracing::{debug, error, info, warn}; use types::consts::altair::{ TIMELY_HEAD_FLAG_INDEX, TIMELY_SOURCE_FLAG_INDEX, TIMELY_TARGET_FLAG_INDEX, }; @@ -405,10 +405,6 @@ pub struct ValidatorMonitor { } impl ValidatorMonitor { - #[instrument(parent = None, - name = "validator_monitor", - skip_all - )] pub fn new( config: ValidatorMonitorConfig, beacon_proposer_cache: Arc>, @@ -438,21 +434,11 @@ impl ValidatorMonitor { /// Returns `true` when the validator count is sufficiently low enough to /// emit metrics and logs on a per-validator basis (rather than just an /// aggregated basis). - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn individual_tracking(&self) -> bool { self.validators.len() <= self.individual_tracking_threshold } /// Add some validators to `self` for additional monitoring. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) { let index_opt = self .indices @@ -470,11 +456,6 @@ impl ValidatorMonitor { } /// Add an unaggregated attestation - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn set_unaggregated_attestation(&mut self, attestation: Attestation) { let unaggregated_attestations = &mut self.unaggregated_attestations; @@ -488,22 +469,12 @@ impl ValidatorMonitor { self.unaggregated_attestations.insert(slot, attestation); } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn get_unaggregated_attestation(&self, slot: Slot) -> Option<&Attestation> { self.unaggregated_attestations.get(&slot) } /// Reads information from the given `state`. The `state` *must* be valid (i.e, able to be /// imported). - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn process_valid_state( &mut self, current_epoch: Epoch, @@ -616,11 +587,6 @@ impl ValidatorMonitor { } /// Add missed non-finalized blocks for the monitored validators - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn add_validators_missed_blocks(&mut self, state: &BeaconState) { // Define range variables let current_slot = state.slot(); @@ -717,11 +683,6 @@ impl ValidatorMonitor { } } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn get_proposers_by_epoch_from_cache( &mut self, epoch: Epoch, @@ -735,11 +696,6 @@ impl ValidatorMonitor { /// Process the unaggregated attestations generated by the service `attestation_simulator_service` /// and check if the attestation qualifies for a reward matching the flags source/target/head - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn process_unaggregated_attestations(&mut self, state: &BeaconState, spec: &ChainSpec) { let current_slot = state.slot(); @@ -812,11 +768,6 @@ impl ValidatorMonitor { /// /// We allow disabling tracking metrics on an individual validator basis /// since it can result in untenable cardinality with high validator counts. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn aggregatable_metric(&self, individual_id: &str, func: F) { func(TOTAL_LABEL); @@ -825,11 +776,6 @@ impl ValidatorMonitor { } } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn process_validator_statuses( &self, epoch: Epoch, @@ -1107,11 +1053,6 @@ impl ValidatorMonitor { Ok(()) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn get_validator(&self, validator_index: u64) -> Option<&MonitoredValidator> { self.indices .get(&validator_index) @@ -1119,30 +1060,15 @@ impl ValidatorMonitor { } /// Returns the number of validators monitored by `self`. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn num_validators(&self) -> usize { self.validators.len() } /// Return the `id`'s of all monitored validators. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn get_all_monitored_validators(&self) -> Vec { self.validators.values().map(|val| val.id.clone()).collect() } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn get_monitored_validator(&self, index: u64) -> Option<&MonitoredValidator> { if let Some(pubkey) = self.indices.get(&index) { self.validators.get(pubkey) @@ -1151,11 +1077,6 @@ impl ValidatorMonitor { } } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn get_monitored_validator_missed_block_count(&self, validator_index: u64) -> u64 { self.missed_blocks .iter() @@ -1163,22 +1084,12 @@ impl ValidatorMonitor { .count() as u64 } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn get_beacon_proposer_cache(&self) -> Arc> { self.beacon_proposer_cache.clone() } /// If `self.auto_register == true`, add the `validator_index` to `self.monitored_validators`. /// Otherwise, do nothing. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn auto_register_local_validator(&mut self, validator_index: u64) { if !self.auto_register { return; @@ -1201,11 +1112,6 @@ impl ValidatorMonitor { } /// Process a block received on gossip. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_block( &self, seen_timestamp: Duration, @@ -1217,11 +1123,6 @@ impl ValidatorMonitor { } /// Process a block received on the HTTP API from a local validator. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_block( &self, seen_timestamp: Duration, @@ -1232,11 +1133,6 @@ impl ValidatorMonitor { self.register_beacon_block("api", seen_timestamp, block, block_root, slot_clock) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_beacon_block( &self, src: &str, @@ -1276,11 +1172,6 @@ impl ValidatorMonitor { } /// Register an attestation seen on the gossip network. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_unaggregated_attestation( &self, seen_timestamp: Duration, @@ -1296,11 +1187,6 @@ impl ValidatorMonitor { } /// Register an attestation seen on the HTTP API. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_unaggregated_attestation( &self, seen_timestamp: Duration, @@ -1315,11 +1201,6 @@ impl ValidatorMonitor { ) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_unaggregated_attestation( &self, src: &str, @@ -1406,11 +1287,6 @@ impl ValidatorMonitor { ) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_aggregated_attestation( &self, src: &str, @@ -1529,10 +1405,6 @@ impl ValidatorMonitor { /// We use the parent slot instead of block slot to ignore skip slots when calculating inclusion distance. /// /// Note: Blocks that get orphaned will skew the inclusion distance calculation. - #[instrument(parent = None, - name = "validator_monitor", - skip_all - )] pub fn register_attestation_in_block( &self, indexed_attestation: IndexedAttestationRef<'_, E>, @@ -1608,11 +1480,6 @@ impl ValidatorMonitor { } /// Register a sync committee message received over gossip. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_sync_committee_message( &self, seen_timestamp: Duration, @@ -1628,11 +1495,6 @@ impl ValidatorMonitor { } /// Register a sync committee message received over the http api. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_sync_committee_message( &self, seen_timestamp: Duration, @@ -1648,11 +1510,6 @@ impl ValidatorMonitor { } /// Register a sync committee message. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_sync_committee_message( &self, src: &str, @@ -1702,11 +1559,6 @@ impl ValidatorMonitor { } /// Register a sync committee contribution received over gossip. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_sync_committee_contribution( &self, seen_timestamp: Duration, @@ -1724,11 +1576,6 @@ impl ValidatorMonitor { } /// Register a sync committee contribution received over the http api. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_sync_committee_contribution( &self, seen_timestamp: Duration, @@ -1746,11 +1593,6 @@ impl ValidatorMonitor { } /// Register a sync committee contribution. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_sync_committee_contribution( &self, src: &str, @@ -1833,11 +1675,6 @@ impl ValidatorMonitor { } /// Register that the `sync_aggregate` was included in a *valid* `BeaconBlock`. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_sync_aggregate_in_block( &self, slot: Slot, @@ -1875,40 +1712,20 @@ impl ValidatorMonitor { } /// Register an exit from the gossip network. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_voluntary_exit(&self, exit: &VoluntaryExit) { self.register_voluntary_exit("gossip", exit) } /// Register an exit from the HTTP API. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_voluntary_exit(&self, exit: &VoluntaryExit) { self.register_voluntary_exit("api", exit) } /// Register an exit included in a *valid* beacon block. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_block_voluntary_exit(&self, exit: &VoluntaryExit) { self.register_voluntary_exit("block", exit) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_voluntary_exit(&self, src: &str, exit: &VoluntaryExit) { if let Some(validator) = self.get_validator(exit.validator_index) { let id = &validator.id; @@ -1932,40 +1749,20 @@ impl ValidatorMonitor { } /// Register a proposer slashing from the gossip network. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_proposer_slashing(&self, slashing: &ProposerSlashing) { self.register_proposer_slashing("gossip", slashing) } /// Register a proposer slashing from the HTTP API. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_proposer_slashing(&self, slashing: &ProposerSlashing) { self.register_proposer_slashing("api", slashing) } /// Register a proposer slashing included in a *valid* `BeaconBlock`. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_block_proposer_slashing(&self, slashing: &ProposerSlashing) { self.register_proposer_slashing("block", slashing) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_proposer_slashing(&self, src: &str, slashing: &ProposerSlashing) { let proposer = slashing.signed_header_1.message.proposer_index; let slot = slashing.signed_header_1.message.slot; @@ -1999,40 +1796,20 @@ impl ValidatorMonitor { } /// Register an attester slashing from the gossip network. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_gossip_attester_slashing(&self, slashing: AttesterSlashingRef<'_, E>) { self.register_attester_slashing("gossip", slashing) } /// Register an attester slashing from the HTTP API. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_api_attester_slashing(&self, slashing: AttesterSlashingRef<'_, E>) { self.register_attester_slashing("api", slashing) } /// Register an attester slashing included in a *valid* `BeaconBlock`. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn register_block_attester_slashing(&self, slashing: AttesterSlashingRef<'_, E>) { self.register_attester_slashing("block", slashing) } - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] fn register_attester_slashing(&self, src: &str, slashing: AttesterSlashingRef<'_, E>) { let data = slashing.attestation_1().data(); let attestation_1_indices: HashSet = slashing @@ -2074,11 +1851,6 @@ impl ValidatorMonitor { /// Scrape `self` for metrics. /// /// Should be called whenever Prometheus is scraping Lighthouse. - #[instrument(parent = None, - fields(service = "validator_monitor"), - name = "validator_monitor", - skip_all - )] pub fn scrape_metrics(&self, slot_clock: &S, spec: &ChainSpec) { metrics::set_gauge( &metrics::VALIDATOR_MONITOR_VALIDATORS_TOTAL, diff --git a/beacon_node/lighthouse_network/src/rpc/mod.rs b/beacon_node/lighthouse_network/src/rpc/mod.rs index 9807387a172..a086392c47c 100644 --- a/beacon_node/lighthouse_network/src/rpc/mod.rs +++ b/beacon_node/lighthouse_network/src/rpc/mod.rs @@ -17,7 +17,7 @@ use std::marker::PhantomData; use std::sync::Arc; use std::task::{Context, Poll}; use std::time::Duration; -use tracing::{debug, instrument, trace}; +use tracing::{debug, trace}; use types::{EthSpec, ForkContext}; pub(crate) use handler::{HandlerErr, HandlerEvent}; @@ -169,12 +169,6 @@ pub struct RPC { } impl RPC { - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p_rpc"), - name = "libp2p_rpc", - skip_all - )] pub fn new( fork_context: Arc, enable_light_client_server: bool, @@ -207,12 +201,6 @@ impl RPC { /// Sends an RPC response. /// Returns an `Err` if the request does exist in the active inbound requests list. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p_rpc"), - name = "libp2p_rpc", - skip_all - )] pub fn send_response( &mut self, request_id: InboundRequestId, @@ -282,12 +270,6 @@ impl RPC { /// Submits an RPC request. /// /// The peer must be connected for this to succeed. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p_rpc"), - name = "libp2p_rpc", - skip_all - )] pub fn send_request(&mut self, peer_id: PeerId, request_id: Id, req: RequestType) { match self .outbound_request_limiter @@ -306,12 +288,6 @@ impl RPC { /// Lighthouse wishes to disconnect from this peer by sending a Goodbye message. This /// gracefully terminates the RPC behaviour with a goodbye message. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p_rpc"), - name = "libp2p_rpc", - skip_all - )] pub fn shutdown(&mut self, peer_id: PeerId, id: Id, reason: GoodbyeReason) { self.events.push(ToSwarm::NotifyHandler { peer_id, @@ -320,23 +296,11 @@ impl RPC { }); } - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p_rpc"), - name = "libp2p_rpc", - skip_all - )] pub fn update_seq_number(&mut self, seq_number: u64) { self.seq_number = seq_number } /// Send a Ping request to the destination `PeerId` via `ConnectionId`. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p_rpc"), - name = "libp2p_rpc", - skip_all - )] pub fn ping(&mut self, peer_id: PeerId, id: Id) { let ping = Ping { data: self.seq_number, diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs index 4a6f34c76d3..3bdb5eb065c 100644 --- a/beacon_node/lighthouse_network/src/service/mod.rs +++ b/beacon_node/lighthouse_network/src/service/mod.rs @@ -38,7 +38,7 @@ use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; use std::time::Duration; -use tracing::{debug, error, info, instrument, trace, warn}; +use tracing::{debug, error, info, trace, warn}; use types::{ consts::altair::SYNC_COMMITTEE_SUBNET_COUNT, EnrForkId, EthSpec, ForkContext, Slot, SubnetId, }; @@ -167,12 +167,6 @@ pub struct Network { /// Implements the combined behaviour for the libp2p service. impl Network { - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub async fn new( executor: task_executor::TaskExecutor, mut ctx: ServiceContext<'_>, @@ -535,12 +529,6 @@ impl Network { /// - Starts listening in the given ports. /// - Dials boot-nodes and libp2p peers. /// - Subscribes to starting gossipsub topics. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] async fn start(&mut self, config: &crate::NetworkConfig) -> Result<(), String> { let enr = self.network_globals.local_enr(); info!( @@ -664,114 +652,48 @@ impl Network { /* Public Accessible Functions to interact with the behaviour */ /// The routing pub-sub mechanism for eth2. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn gossipsub_mut(&mut self) -> &mut Gossipsub { &mut self.swarm.behaviour_mut().gossipsub } /// The Eth2 RPC specified in the wire-0 protocol. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn eth2_rpc_mut(&mut self) -> &mut RPC { &mut self.swarm.behaviour_mut().eth2_rpc } /// Discv5 Discovery protocol. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn discovery_mut(&mut self) -> &mut Discovery { &mut self.swarm.behaviour_mut().discovery } /// Provides IP addresses and peer information. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn identify_mut(&mut self) -> &mut identify::Behaviour { &mut self.swarm.behaviour_mut().identify } /// The peer manager that keeps track of peer's reputation and status. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn peer_manager_mut(&mut self) -> &mut PeerManager { &mut self.swarm.behaviour_mut().peer_manager } /// The routing pub-sub mechanism for eth2. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn gossipsub(&self) -> &Gossipsub { &self.swarm.behaviour().gossipsub } /// The Eth2 RPC specified in the wire-0 protocol. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn eth2_rpc(&self) -> &RPC { &self.swarm.behaviour().eth2_rpc } /// Discv5 Discovery protocol. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn discovery(&self) -> &Discovery { &self.swarm.behaviour().discovery } /// Provides IP addresses and peer information. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn identify(&self) -> &identify::Behaviour { &self.swarm.behaviour().identify } /// The peer manager that keeps track of peer's reputation and status. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn peer_manager(&self) -> &PeerManager { &self.swarm.behaviour().peer_manager } /// Returns the local ENR of the node. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn local_enr(&self) -> Enr { self.network_globals.local_enr() } @@ -780,12 +702,6 @@ impl Network { /// Subscribes to a gossipsub topic kind, letting the network service determine the /// encoding and fork version. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn subscribe_kind(&mut self, kind: GossipKind) -> bool { let gossip_topic = GossipTopic::new( kind, @@ -798,12 +714,6 @@ impl Network { /// Unsubscribes from a gossipsub topic kind, letting the network service determine the /// encoding and fork version. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn unsubscribe_kind(&mut self, kind: GossipKind) -> bool { let gossip_topic = GossipTopic::new( kind, @@ -814,12 +724,6 @@ impl Network { } /// Subscribe to all required topics for the `new_fork` with the given `new_fork_digest`. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn subscribe_new_fork_topics(&mut self, new_fork: ForkName, new_fork_digest: [u8; 4]) { // Re-subscribe to non-core topics with the new fork digest let subscriptions = self.network_globals.gossipsub_subscriptions.read().clone(); @@ -844,12 +748,6 @@ impl Network { } /// Unsubscribe from all topics that doesn't have the given fork_digest - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn unsubscribe_from_fork_topics_except(&mut self, except: [u8; 4]) { let subscriptions = self.network_globals.gossipsub_subscriptions.read().clone(); for topic in subscriptions @@ -862,12 +760,6 @@ impl Network { } /// Remove topic weight from all topics that don't have the given fork digest. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn remove_topic_weight_except(&mut self, except: [u8; 4]) { let new_param = TopicScoreParams { topic_weight: 0.0, @@ -892,12 +784,6 @@ impl Network { } /// Subscribe to all data columns determined by the cgc. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn subscribe_new_data_column_subnets(&mut self, custody_column_count: u64) { self.network_globals .update_data_column_subnets(custody_column_count); @@ -909,12 +795,6 @@ impl Network { } /// Returns the scoring parameters for a topic if set. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn get_topic_params(&self, topic: GossipTopic) -> Option<&TopicScoreParams> { self.swarm .behaviour() @@ -925,12 +805,6 @@ impl Network { /// Subscribes to a gossipsub topic. /// /// Returns `true` if the subscription was successful and `false` otherwise. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn subscribe(&mut self, topic: GossipTopic) -> bool { // update the network globals self.network_globals @@ -953,12 +827,6 @@ impl Network { } /// Unsubscribe from a gossipsub topic. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn unsubscribe(&mut self, topic: GossipTopic) -> bool { // update the network globals self.network_globals @@ -974,12 +842,6 @@ impl Network { } /// Publishes a list of messages on the pubsub (gossipsub) behaviour, choosing the encoding. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn publish(&mut self, messages: Vec>) { for message in messages { for topic in message.topics(GossipEncoding::default(), self.enr_fork_id.fork_digest) { @@ -1034,12 +896,6 @@ impl Network { /// Informs the gossipsub about the result of a message validation. /// If the message is valid it will get propagated by gossipsub. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn report_message_validation_result( &mut self, propagation_source: &PeerId, @@ -1074,12 +930,6 @@ impl Network { /// Updates the current gossipsub scoring parameters based on the validator count and current /// slot. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn update_gossipsub_parameters( &mut self, active_validators: usize, @@ -1123,12 +973,7 @@ impl Network { /* Eth2 RPC behaviour functions */ /// Send a request to a peer over RPC. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] + #[allow(clippy::result_large_err)] pub fn send_request( &mut self, peer_id: PeerId, @@ -1146,12 +991,6 @@ impl Network { } /// Send a successful response to a peer over RPC. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn send_response>>( &mut self, peer_id: PeerId, @@ -1171,22 +1010,10 @@ impl Network { } /* Peer management functions */ - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn testing_dial(&mut self, addr: Multiaddr) -> Result<(), libp2p::swarm::DialError> { self.swarm.dial(addr) } - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn report_peer( &mut self, peer_id: &PeerId, @@ -1202,12 +1029,6 @@ impl Network { /// /// This will send a goodbye, disconnect and then ban the peer. /// This is fatal for a peer, and should be used in unrecoverable circumstances. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn goodbye_peer(&mut self, peer_id: &PeerId, reason: GoodbyeReason, source: ReportSource) { self.peer_manager_mut() .goodbye_peer(peer_id, reason, source); @@ -1215,34 +1036,16 @@ impl Network { /// Hard (ungraceful) disconnect for testing purposes only /// Use goodbye_peer for disconnections, do not use this function. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn __hard_disconnect_testing_only(&mut self, peer_id: PeerId) { let _ = self.swarm.disconnect_peer_id(peer_id); } /// Returns an iterator over all enr entries in the DHT. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn enr_entries(&self) -> Vec { self.discovery().table_entries_enr() } /// Add an ENR to the routing table of the discovery mechanism. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn add_enr(&mut self, enr: Enr) { self.discovery_mut().add_enr(enr); } @@ -1250,12 +1053,6 @@ impl Network { /// Updates a subnet value to the ENR attnets/syncnets bitfield. /// /// The `value` is `true` if a subnet is being added and false otherwise. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn update_enr_subnet(&mut self, subnet_id: Subnet, value: bool) { if let Err(e) = self.discovery_mut().update_enr_bitfield(subnet_id, value) { crit!(error = e, "Could not update ENR bitfield"); @@ -1265,12 +1062,6 @@ impl Network { } /// Updates the cgc value in the ENR. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn update_enr_cgc(&mut self, new_custody_group_count: u64) { if let Err(e) = self.discovery_mut().update_enr_cgc(new_custody_group_count) { crit!(error = e, "Could not update cgc in ENR"); @@ -1281,12 +1072,6 @@ impl Network { /// Attempts to discover new peers for a given subnet. The `min_ttl` gives the time at which we /// would like to retain the peers for. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn discover_subnet_peers(&mut self, subnets_to_discover: Vec) { // If discovery is not started or disabled, ignore the request if !self.discovery().started { @@ -1341,12 +1126,6 @@ impl Network { } /// Updates the local ENR's "eth2" field with the latest EnrForkId. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub fn update_fork_version(&mut self, enr_fork_id: EnrForkId) { self.discovery_mut().update_eth2_enr(enr_fork_id.clone()); @@ -1363,12 +1142,6 @@ impl Network { /* Private internal functions */ /// Updates the current meta data of the node to match the local ENR. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn update_metadata_bitfields(&mut self) { let local_attnets = self .discovery_mut() @@ -1399,12 +1172,6 @@ impl Network { utils::save_metadata_to_disk(&self.network_dir, meta_data); } - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn update_metadata_cgc(&mut self, custody_group_count: u64) { let mut meta_data_w = self.network_globals.local_metadata.write(); @@ -1422,23 +1189,11 @@ impl Network { } /// Sends a Ping request to the peer. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn ping(&mut self, peer_id: PeerId) { self.eth2_rpc_mut().ping(peer_id, AppRequestId::Internal); } /// Sends a METADATA request to a peer. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn send_meta_data_request(&mut self, peer_id: PeerId) { let event = if self.fork_context.spec.is_peer_das_scheduled() { // Nodes with higher custody will probably start advertising it @@ -1453,21 +1208,9 @@ impl Network { } /// Sends a METADATA response to a peer. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] // RPC Propagation methods /// Queues the response to be sent upwards as long at it was requested outside the Behaviour. #[must_use = "return the response"] - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn build_response( &mut self, app_request_id: AppRequestId, @@ -1486,12 +1229,6 @@ impl Network { /// Dial cached Enrs in discovery service that are in the given `subnet_id` and aren't /// in Connected, Dialing or Banned state. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn dial_cached_enrs_in_subnet(&mut self, subnet: Subnet, spec: Arc) { let predicate = subnet_predicate::(vec![subnet], spec); let peers_to_dial: Vec = self @@ -1534,12 +1271,6 @@ impl Network { /* Sub-behaviour event handling functions */ /// Handle a gossipsub event. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn inject_gs_event(&mut self, event: gossipsub::Event) -> Option> { match event { gossipsub::Event::Message { @@ -1678,12 +1409,6 @@ impl Network { } /// Handle an RPC event. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn inject_rpc_event(&mut self, event: RPCMessage) -> Option> { let peer_id = event.peer_id; @@ -1974,12 +1699,6 @@ impl Network { } /// Handle an identify event. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn inject_identify_event(&mut self, event: identify::Event) -> Option> { match event { identify::Event::Received { @@ -2002,12 +1721,6 @@ impl Network { } /// Handle a peer manager event. - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn inject_pm_event(&mut self, event: PeerManagerEvent) -> Option> { match event { PeerManagerEvent::PeerConnectedIncoming(peer_id) => { @@ -2061,12 +1774,6 @@ impl Network { } } - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn inject_upnp_event(&mut self, event: libp2p::upnp::Event) { match event { libp2p::upnp::Event::NewExternalAddr(addr) => { @@ -2110,12 +1817,6 @@ impl Network { } /* Networking polling */ - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] pub async fn next_event(&mut self) -> NetworkEvent { loop { tokio::select! { @@ -2149,12 +1850,6 @@ impl Network { } } - #[instrument(parent = None, - level = "trace", - fields(service = "libp2p"), - name = "libp2p", - skip_all - )] fn parse_swarm_event( &mut self, event: SwarmEvent>, diff --git a/beacon_node/lighthouse_network/tests/common.rs b/beacon_node/lighthouse_network/tests/common.rs index 61f48a9a6fd..96d252b53da 100644 --- a/beacon_node/lighthouse_network/tests/common.rs +++ b/beacon_node/lighthouse_network/tests/common.rs @@ -7,7 +7,7 @@ use lighthouse_network::{NetworkConfig, NetworkEvent}; use std::sync::Arc; use std::sync::Weak; use tokio::runtime::Runtime; -use tracing::{debug, error, info_span, Instrument}; +use tracing::{debug, error}; use tracing_subscriber::EnvFilter; use types::{ ChainSpec, EnrForkId, Epoch, EthSpec, FixedBytesExtended, ForkContext, ForkName, Hash256, @@ -204,8 +204,7 @@ pub async fn build_node_pair( } } } - } - .instrument(info_span!("Sender", who = "sender")); + }; let receiver_fut = async { loop { if let NetworkEvent::NewListenAddr(addr) = receiver.next_event().await { @@ -227,8 +226,7 @@ pub async fn build_node_pair( } } } - } - .instrument(info_span!("Receiver", who = "receiver")); + }; let joined = futures::future::join(sender_fut, receiver_fut); diff --git a/beacon_node/lighthouse_network/tests/rpc_tests.rs b/beacon_node/lighthouse_network/tests/rpc_tests.rs index 11fe93288f7..69c8fd22575 100644 --- a/beacon_node/lighthouse_network/tests/rpc_tests.rs +++ b/beacon_node/lighthouse_network/tests/rpc_tests.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::runtime::Runtime; use tokio::time::sleep; -use tracing::{debug, error, info_span, warn, Instrument}; +use tracing::{debug, error, warn}; use types::{ BeaconBlock, BeaconBlockAltair, BeaconBlockBase, BeaconBlockBellatrix, BlobSidecar, ChainSpec, EmptyBlock, Epoch, EthSpec, FixedBytesExtended, ForkName, Hash256, MinimalEthSpec, @@ -118,8 +118,7 @@ fn test_tcp_status_rpc() { _ => {} } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -143,8 +142,7 @@ fn test_tcp_status_rpc() { _ => {} // Ignore other events } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} @@ -248,8 +246,7 @@ fn test_tcp_blocks_by_range_chunked_rpc() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -290,8 +287,7 @@ fn test_tcp_blocks_by_range_chunked_rpc() { _ => {} // Ignore other events } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} @@ -383,8 +379,7 @@ fn test_blobs_by_range_chunked_rpc() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -418,8 +413,7 @@ fn test_blobs_by_range_chunked_rpc() { _ => {} // Ignore other events } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} @@ -491,8 +485,7 @@ fn test_tcp_blocks_by_range_over_limit() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -525,8 +518,7 @@ fn test_tcp_blocks_by_range_over_limit() { _ => {} // Ignore other events } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} @@ -615,8 +607,7 @@ fn test_tcp_blocks_by_range_chunked_rpc_terminates_correctly() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // determine messages to send (PeerId, RequestId). If some, indicates we still need to send // messages @@ -663,8 +654,7 @@ fn test_tcp_blocks_by_range_chunked_rpc_terminates_correctly() { } } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} @@ -750,8 +740,7 @@ fn test_tcp_blocks_by_range_single_empty_rpc() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -784,8 +773,7 @@ fn test_tcp_blocks_by_range_single_empty_rpc() { _ => {} // Ignore other events } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} _ = receiver_future => {} @@ -896,8 +884,7 @@ fn test_tcp_blocks_by_root_chunked_rpc() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -936,8 +923,7 @@ fn test_tcp_blocks_by_root_chunked_rpc() { _ => {} // Ignore other events } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} _ = receiver_future => {} @@ -1037,8 +1023,7 @@ fn test_tcp_blocks_by_root_chunked_rpc_terminates_correctly() { _ => {} // Ignore other behaviour events } } - } - .instrument(info_span!("Sender")); + }; // determine messages to send (PeerId, RequestId). If some, indicates we still need to send // messages @@ -1085,8 +1070,7 @@ fn test_tcp_blocks_by_root_chunked_rpc_terminates_correctly() { } } } - } - .instrument(info_span!("Receiver")); + }; tokio::select! { _ = sender_future => {} @@ -1139,8 +1123,7 @@ fn goodbye_test(log_level: &str, enable_logging: bool, protocol: Protocol) { _ => {} // Ignore other RPC messages } } - } - .instrument(info_span!("Sender")); + }; // build the receiver future let receiver_future = async { @@ -1150,8 +1133,7 @@ fn goodbye_test(log_level: &str, enable_logging: bool, protocol: Protocol) { return; } } - } - .instrument(info_span!("Receiver")); + }; let total_future = futures::future::join(sender_future, receiver_future); diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index ea46c3d0d10..25eadc6ff2e 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -28,7 +28,7 @@ use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; use tokio::sync::mpsc::{self, error::TrySendError}; -use tracing::{debug, error, trace, warn, Instrument}; +use tracing::{debug, error, trace, warn}; use types::*; pub use sync_methods::ChainSegmentProcessId; @@ -781,11 +781,6 @@ impl NetworkBeaconProcessor { custody_columns, publish_fn, ) - .instrument(tracing::info_span!( - "", - service = "fetch_engine_blobs", - block_root = format!("{:?}", block_root) - )) .await { Ok(Some(availability)) => match availability { diff --git a/beacon_node/network/src/router.rs b/beacon_node/network/src/router.rs index 5d5daae4aee..5020696f509 100644 --- a/beacon_node/network/src/router.rs +++ b/beacon_node/network/src/router.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tokio::sync::mpsc; use tokio_stream::wrappers::UnboundedReceiverStream; -use tracing::{debug, error, info_span, trace, warn, Instrument}; +use tracing::{debug, error, trace, warn}; use types::{BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, SignedBeaconBlock}; /// Handles messages from the network and routes them to the appropriate service to be handled. @@ -132,7 +132,6 @@ impl Router { debug!("Network message router started"); UnboundedReceiverStream::new(handler_recv) .for_each(move |msg| future::ready(handler.handle_message(msg))) - .instrument(info_span!("", service = "router")) .await; }, "router", diff --git a/beacon_node/network/src/service.rs b/beacon_node/network/src/service.rs index f0414b6612e..a82f94ec335 100644 --- a/beacon_node/network/src/service.rs +++ b/beacon_node/network/src/service.rs @@ -35,7 +35,7 @@ use strum::IntoStaticStr; use task_executor::ShutdownReason; use tokio::sync::mpsc; use tokio::time::Sleep; -use tracing::{debug, error, info, info_span, trace, warn, Instrument}; +use tracing::{debug, error, info, trace, warn}; use types::{ EthSpec, ForkContext, Slot, SubnetId, SyncCommitteeSubscription, SyncSubnetId, Unsigned, ValidatorSubscription, @@ -464,7 +464,7 @@ impl NetworkService { } } } - }.instrument(info_span!("", service = "network")); + }; executor.spawn(service_fut, "network"); } diff --git a/beacon_node/network/src/subnet_service/mod.rs b/beacon_node/network/src/subnet_service/mod.rs index a8ea6ed5186..2d4fa778695 100644 --- a/beacon_node/network/src/subnet_service/mod.rs +++ b/beacon_node/network/src/subnet_service/mod.rs @@ -15,7 +15,7 @@ use delay_map::HashSetDelay; use futures::prelude::*; use lighthouse_network::{discv5::enr::NodeId, NetworkConfig, Subnet, SubnetDiscovery}; use slot_clock::SlotClock; -use tracing::{debug, error, info, instrument, warn}; +use tracing::{debug, error, info, warn}; use types::{ AttestationData, EthSpec, Slot, SubnetId, SyncCommitteeSubscription, SyncSubnetId, ValidatorSubscription, @@ -113,11 +113,6 @@ impl SubnetService { /* Public functions */ /// Establish the service based on the passed configuration. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] pub fn new(beacon_chain: Arc>, node_id: NodeId, config: &NetworkConfig) -> Self { let slot_duration = beacon_chain.slot_clock.slot_duration(); @@ -227,11 +222,6 @@ impl SubnetService { /// /// This returns a result simply for the ergonomics of using ?. The result can be /// safely dropped. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] pub fn validator_subscriptions(&mut self, subscriptions: impl Iterator) { // If the node is in a proposer-only state, we ignore all subnet subscriptions. if self.proposer_only { @@ -366,11 +356,6 @@ impl SubnetService { /// Checks if we have subscribed aggregate validators for the subnet. If not, checks the gossip /// verification, re-propagates and returns false. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] pub fn should_process_attestation( &self, subnet: Subnet, @@ -395,11 +380,6 @@ impl SubnetService { /// Adds an event to the event queue and notifies that this service is ready to be polled /// again. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] fn queue_event(&mut self, ev: SubnetServiceMessage) { self.events.push_back(ev); if let Some(waker) = &self.waker { @@ -411,10 +391,6 @@ impl SubnetService { /// /// If there is sufficient time, queues a peer discovery request for all the required subnets. // NOTE: Sending early subscriptions results in early searching for peers on subnets. - #[instrument(parent = None, - name = "subnet_service", - skip_all - )] fn discover_peers_request( &mut self, subnets_to_discover: impl Iterator, @@ -462,11 +438,6 @@ impl SubnetService { } // Subscribes to the subnet if it should be done immediately, or schedules it if required. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] fn subscribe_to_subnet( &mut self, ExactSubnet { subnet, slot }: ExactSubnet, @@ -519,11 +490,6 @@ impl SubnetService { } /// Adds a subscription event to the sync subnet. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] fn subscribe_to_sync_subnet( &mut self, subnet: Subnet, @@ -573,11 +539,6 @@ impl SubnetService { /// Checks that the time in which the subscription would end is not in the past. If we are /// already subscribed, extends the timeout if necessary. If this is a new subscription, we send /// out the appropriate events. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] fn subscribe_to_subnet_immediately( &mut self, subnet: Subnet, @@ -633,11 +594,6 @@ impl SubnetService { } // Unsubscribes from a subnet that was removed. - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] fn handle_removed_subnet(&mut self, subnet: Subnet) { if !self.subscriptions.contains_key(&subnet) { // Subscription no longer exists as short lived subnet @@ -655,11 +611,6 @@ impl SubnetService { impl Stream for SubnetService { type Item = SubnetServiceMessage; - #[instrument(parent = None, - fields(service = "subnet_service"), - name = "subnet_service", - skip_all - )] fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { // Update the waker if needed. if let Some(waker) = &self.waker { diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index ba66e41aca3..0b67262cde6 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -27,7 +27,7 @@ use std::collections::{ HashSet, }; use std::sync::Arc; -use tracing::{debug, error, info, instrument, warn}; +use tracing::{debug, error, info, warn}; use types::{Epoch, EthSpec}; /// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of @@ -147,10 +147,6 @@ pub struct BackFillSync { } impl BackFillSync { - #[instrument(parent = None, - name = "backfill_sync", - skip_all - )] pub fn new( beacon_chain: Arc>, network_globals: Arc>, @@ -191,11 +187,6 @@ impl BackFillSync { } /// Pauses the backfill sync if it's currently syncing. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] pub fn pause(&mut self) { if let BackFillState::Syncing = self.state() { debug!(processed_epochs = %self.validated_batches, to_be_processed = %self.current_start,"Backfill sync paused"); @@ -207,11 +198,6 @@ impl BackFillSync { /// /// If resuming is successful, reports back the current syncing metrics. #[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"] - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] pub fn start( &mut self, network: &mut SyncNetworkContext, @@ -287,11 +273,6 @@ impl BackFillSync { /// A fully synced peer has joined us. /// If we are in a failed state, update a local variable to indicate we are able to restart /// the failed sync on the next attempt. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] pub fn fully_synced_peer_joined(&mut self) { if matches!(self.state(), BackFillState::Failed) { self.restart_failed_sync = true; @@ -300,11 +281,6 @@ impl BackFillSync { /// A peer has disconnected. /// If the peer has active batches, those are considered failed and re-requested. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] #[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"] pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), BackFillError> { if matches!(self.state(), BackFillState::Failed) { @@ -319,11 +295,6 @@ impl BackFillSync { /// An RPC error has occurred. /// /// If the batch exists it is re-requested. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] #[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"] pub fn inject_error( &mut self, @@ -361,11 +332,6 @@ impl BackFillSync { /// If this returns an error, the backfill sync has failed and will be restarted once new peers /// join the system. /// The sync manager should update the global sync state on failure. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] #[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"] pub fn on_block_response( &mut self, @@ -417,11 +383,6 @@ impl BackFillSync { /// The syncing process has failed. /// /// This resets past variables, to allow for a fresh start when resuming. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn fail_sync(&mut self, error: BackFillError) -> Result<(), BackFillError> { // Some errors shouldn't fail the chain. if matches!(error, BackFillError::Paused) { @@ -453,11 +414,6 @@ impl BackFillSync { /// Processes the batch with the given id. /// The batch must exist and be ready for processing - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn process_batch( &mut self, network: &mut SyncNetworkContext, @@ -516,11 +472,6 @@ impl BackFillSync { /// The block processor has completed processing a batch. This function handles the result /// of the batch processor. /// If an error is returned the BackFill sync has failed. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] #[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"] pub fn on_batch_process_result( &mut self, @@ -673,11 +624,6 @@ impl BackFillSync { } /// Processes the next ready batch. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn process_completed_batches( &mut self, network: &mut SyncNetworkContext, @@ -741,11 +687,6 @@ impl BackFillSync { /// /// If a previous batch has been validated and it had been re-processed, penalize the original /// peer. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn advance_chain(&mut self, network: &mut SyncNetworkContext, validating_epoch: Epoch) { // make sure this epoch produces an advancement if validating_epoch >= self.current_start { @@ -837,11 +778,6 @@ impl BackFillSync { /// These events occur when a peer has successfully responded with blocks, but the blocks we /// have received are incorrect or invalid. This indicates the peer has not performed as /// intended and can result in downvoting a peer. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn handle_invalid_batch( &mut self, network: &mut SyncNetworkContext, @@ -893,11 +829,6 @@ impl BackFillSync { } /// Requests the batch assigned to the given id from a given peer. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn send_batch( &mut self, network: &mut SyncNetworkContext, @@ -969,11 +900,6 @@ impl BackFillSync { /// When resuming a chain, this function searches for batches that need to be re-downloaded and /// transitions their state to redownload the batch. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn resume_batches(&mut self, network: &mut SyncNetworkContext) -> Result<(), BackFillError> { let batch_ids_to_retry = self .batches @@ -998,11 +924,6 @@ impl BackFillSync { /// Attempts to request the next required batches from the peer pool if the chain is syncing. It will exhaust the peer /// pool and left over batches until the batch buffer is reached or all peers are exhausted. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn request_batches( &mut self, network: &mut SyncNetworkContext, @@ -1027,11 +948,6 @@ impl BackFillSync { /// Creates the next required batch from the chain. If there are no more batches required, /// `false` is returned. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn include_next_batch(&mut self, network: &mut SyncNetworkContext) -> Option { // don't request batches beyond genesis; if self.last_batch_downloaded { @@ -1093,11 +1009,6 @@ impl BackFillSync { /// /// This errors if the beacon chain indicates that backfill sync has already completed or is /// not required. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn reset_start_epoch(&mut self) -> Result<(), ResetEpochError> { let anchor_info = self.beacon_chain.store.get_anchor_info(); if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) { @@ -1111,11 +1022,6 @@ impl BackFillSync { } /// Checks with the beacon chain if backfill sync has completed. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn check_completed(&mut self) -> bool { if self.would_complete(self.current_start) { // Check that the beacon chain agrees @@ -1131,11 +1037,6 @@ impl BackFillSync { } /// Checks if backfill would complete by syncing to `start_epoch`. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn would_complete(&self, start_epoch: Epoch) -> bool { start_epoch <= self @@ -1145,20 +1046,10 @@ impl BackFillSync { } /// Updates the global network state indicating the current state of a backfill sync. - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn set_state(&self, state: BackFillState) { *self.network_globals.backfill_state.write() = state; } - #[instrument(parent = None, - fields(service = "backfill_sync"), - name = "backfill_sync", - skip_all - )] fn state(&self) -> BackFillState { self.network_globals.backfill_state.read().clone() } diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index c8bd8c170fc..cc732a865e7 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -45,7 +45,7 @@ use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Duration; use store::Hash256; -use tracing::{debug, error, instrument, warn}; +use tracing::{debug, error, warn}; use types::{BlobSidecar, DataColumnSidecar, EthSpec, SignedBeaconBlock}; pub mod common; @@ -127,7 +127,6 @@ use lighthouse_network::service::api_types::Id; pub(crate) type BlockLookupSummary = (Id, Hash256, Option, Vec); impl BlockLookups { - #[instrument(parent = None, fields(service = "lookup_sync"), name = "lookup_sync")] pub fn new() -> Self { Self { failed_chains: LRUTimeCache::new(Duration::from_secs( @@ -138,31 +137,16 @@ impl BlockLookups { } #[cfg(test)] - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { self.failed_chains.insert(block_root); } #[cfg(test)] - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub(crate) fn get_failed_chains(&mut self) -> Vec { self.failed_chains.keys().cloned().collect() } #[cfg(test)] - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub(crate) fn active_single_lookups(&self) -> Vec { self.single_block_lookups .iter() @@ -171,11 +155,6 @@ impl BlockLookups { } /// Returns a vec of all parent lookup chains by tip, in descending slot order (tip first) - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub(crate) fn active_parent_lookups(&self) -> Vec { compute_parent_chains( &self @@ -192,11 +171,6 @@ impl BlockLookups { /// If a parent lookup exists or is triggered, a current lookup will be created. /// /// Returns true if the lookup is created or already exists - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] #[must_use = "only reference the new lookup if returns true"] pub fn search_child_and_parent( &mut self, @@ -230,11 +204,6 @@ impl BlockLookups { /// Seach a block whose parent root is unknown. /// /// Returns true if the lookup is created or already exists - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] #[must_use = "only reference the new lookup if returns true"] pub fn search_unknown_block( &mut self, @@ -251,11 +220,6 @@ impl BlockLookups { /// - `block_root_to_search` is a failed chain /// /// Returns true if the lookup is created or already exists - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] #[must_use = "only reference the new lookup if returns true"] pub fn search_parent_of_child( &mut self, @@ -358,11 +322,6 @@ impl BlockLookups { /// Searches for a single block hash. If the blocks parent is unknown, a chain of blocks is /// constructed. /// Returns true if the lookup is created or already exists - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] #[must_use = "only reference the new lookup if returns true"] fn new_current_lookup( &mut self, @@ -466,11 +425,6 @@ impl BlockLookups { /* Lookup responses */ /// Process a block or blob response received from a single lookup request. - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn on_download_response>( &mut self, id: SingleLookupReqId, @@ -556,11 +510,6 @@ impl BlockLookups { /* Error responses */ - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn peer_disconnected(&mut self, peer_id: &PeerId) { for (_, lookup) in self.single_block_lookups.iter_mut() { lookup.remove_peer(peer_id); @@ -569,11 +518,6 @@ impl BlockLookups { /* Processing responses */ - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn on_processing_result( &mut self, process_type: BlockProcessType, @@ -594,11 +538,6 @@ impl BlockLookups { self.on_lookup_result(process_type.id(), lookup_result, "processing_result", cx); } - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn on_processing_result_inner>( &mut self, lookup_id: SingleLookupId, @@ -788,11 +727,6 @@ impl BlockLookups { } } - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn on_external_processing_result( &mut self, block_root: Hash256, @@ -818,11 +752,6 @@ impl BlockLookups { } /// Makes progress on the immediate children of `block_root` - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn continue_child_lookups(&mut self, block_root: Hash256, cx: &mut SyncNetworkContext) { let mut lookup_results = vec![]; // < need to buffer lookup results to not re-borrow &mut self @@ -848,11 +777,6 @@ impl BlockLookups { /// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need /// the parent to make progress to resolve, therefore we must drop them if the parent is /// dropped. - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId) { if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) { debug!( @@ -877,11 +801,6 @@ impl BlockLookups { /// Common handler a lookup request error, drop it and update metrics /// Returns true if the lookup is created or already exists - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] fn on_lookup_result( &mut self, id: SingleLookupId, @@ -919,22 +838,12 @@ impl BlockLookups { /* Helper functions */ /// Drops all the single block requests and returns how many requests were dropped. - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn drop_single_block_requests(&mut self) -> usize { let requests_to_drop = self.single_block_lookups.len(); self.single_block_lookups.clear(); requests_to_drop } - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn update_metrics(&self) { metrics::set_gauge( &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, @@ -943,11 +852,6 @@ impl BlockLookups { } /// Perform some prune operations on lookups on some interval - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] pub fn prune_lookups(&mut self) { self.drop_lookups_without_peers(); self.drop_stuck_lookups(); @@ -971,11 +875,6 @@ impl BlockLookups { /// /// Instead there's no negative for keeping lookups with no peers around for some time. If we /// regularly prune them, it should not be a memory concern (TODO: maybe yes!). - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] fn drop_lookups_without_peers(&mut self) { for (lookup_id, block_root) in self .single_block_lookups @@ -1013,11 +912,6 @@ impl BlockLookups { /// /// - One single clear warn level log per stuck incident /// - If the original bug is sporadic, it reduces the time a node is stuck from forever to 15 min - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] fn drop_stuck_lookups(&mut self) { // While loop to find and drop all disjoint trees of potentially stuck lookups. while let Some(stuck_lookup) = self.single_block_lookups.values().find(|lookup| { @@ -1055,11 +949,6 @@ impl BlockLookups { } /// Recursively find the oldest ancestor lookup of another lookup - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] fn find_oldest_ancestor_lookup<'a>( &'a self, lookup: &'a SingleBlockLookup, @@ -1084,11 +973,6 @@ impl BlockLookups { /// Adds peers to a lookup and its ancestors recursively. /// Note: Takes a `lookup_id` as argument to allow recursion on mutable lookups, without having /// to duplicate the code to add peers to a lookup - #[instrument(parent = None, - fields(service = "lookup_sync"), - name = "lookup_sync", - skip_all - )] fn add_peers_to_lookup_and_ancestors( &mut self, lookup_id: SingleLookupId, diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 944f55dba1e..cb25c3c77bf 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -68,7 +68,7 @@ use std::ops::Sub; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; -use tracing::{debug, error, info, info_span, trace, Instrument}; +use tracing::{debug, error, info, trace}; use types::{ BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot, }; @@ -263,14 +263,7 @@ pub fn spawn( // spawn the sync manager thread debug!("Sync Manager started"); - executor.spawn( - async move { - Box::pin(sync_manager.main()) - .instrument(info_span!("", service = "sync")) - .await - }, - "sync", - ); + executor.spawn(async move { Box::pin(sync_manager.main()).await }, "sync"); } impl SyncManager { diff --git a/beacon_node/network/src/sync/peer_sampling.rs b/beacon_node/network/src/sync/peer_sampling.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 0e9178f0f81..000d274a1b0 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -12,7 +12,7 @@ use lighthouse_network::{PeerAction, PeerId}; use logging::crit; use std::collections::{btree_map::Entry, BTreeMap, HashSet}; use strum::IntoStaticStr; -use tracing::{debug, instrument, warn}; +use tracing::{debug, warn}; use types::{ColumnIndex, Epoch, EthSpec, Hash256, Slot}; /// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of @@ -205,7 +205,6 @@ impl SyncingChain { /// A block has been received for a batch on this chain. /// If the block correctly completes the batch it will be processed if possible. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn on_block_response( &mut self, network: &mut SyncNetworkContext, @@ -252,7 +251,6 @@ impl SyncingChain { /// Processes the batch with the given id. /// The batch must exist and be ready for processing - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] fn process_batch( &mut self, network: &mut SyncNetworkContext, @@ -300,7 +298,6 @@ impl SyncingChain { } /// Processes the next ready batch, prioritizing optimistic batches over the processing target. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] fn process_completed_batches( &mut self, network: &mut SyncNetworkContext, @@ -410,7 +407,6 @@ impl SyncingChain { /// The block processor has completed processing a batch. This function handles the result /// of the batch processor. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn on_batch_process_result( &mut self, network: &mut SyncNetworkContext, @@ -565,7 +561,6 @@ impl SyncingChain { } } - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] fn reject_optimistic_batch( &mut self, network: &mut SyncNetworkContext, @@ -600,7 +595,6 @@ impl SyncingChain { /// If a previous batch has been validated and it had been re-processed, penalize the original /// peer. #[allow(clippy::modulo_one)] - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] fn advance_chain(&mut self, network: &mut SyncNetworkContext, validating_epoch: Epoch) { // make sure this epoch produces an advancement if validating_epoch <= self.start_epoch { @@ -704,7 +698,6 @@ impl SyncingChain { /// These events occur when a peer has successfully responded with blocks, but the blocks we /// have received are incorrect or invalid. This indicates the peer has not performed as /// intended and can result in downvoting a peer. - #[instrument(parent = None, fields(service = self.id, network), skip_all)] fn handle_invalid_batch( &mut self, network: &mut SyncNetworkContext, @@ -764,7 +757,6 @@ impl SyncingChain { /// This chain has been requested to start syncing. /// /// This could be new chain, or an old chain that is being resumed. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn start_syncing( &mut self, network: &mut SyncNetworkContext, @@ -803,7 +795,6 @@ impl SyncingChain { /// Add a peer to the chain. /// /// If the chain is active, this starts requesting batches from this peer. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn add_peer( &mut self, network: &mut SyncNetworkContext, @@ -816,7 +807,6 @@ impl SyncingChain { /// An RPC error has occurred. /// /// If the batch exists it is re-requested. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn inject_error( &mut self, network: &mut SyncNetworkContext, @@ -900,7 +890,6 @@ impl SyncingChain { } /// Requests the batch assigned to the given id from a given peer. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn send_batch( &mut self, network: &mut SyncNetworkContext, @@ -979,7 +968,6 @@ impl SyncingChain { } /// Retries partial column requests within the batch by creating new requests for the failed columns. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn retry_partial_batch( &mut self, network: &mut SyncNetworkContext, @@ -1032,7 +1020,6 @@ impl SyncingChain { /// Kickstarts the chain by sending for processing batches that are ready and requesting more /// batches if needed. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] pub fn resume( &mut self, network: &mut SyncNetworkContext, @@ -1045,7 +1032,6 @@ impl SyncingChain { /// Attempts to request the next required batches from the peer pool if the chain is syncing. It will exhaust the peer /// pool and left over batches until the batch buffer is reached or all peers are exhausted. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] fn request_batches(&mut self, network: &mut SyncNetworkContext) -> ProcessingResult { if !matches!(self.state, ChainSyncingState::Syncing) { return Ok(KeepChain); @@ -1114,7 +1100,6 @@ impl SyncingChain { /// Creates the next required batch from the chain. If there are no more batches required, /// `false` is returned. - #[instrument(parent = None, fields(chain = self.id , service = "range_sync"), skip_all)] fn include_next_batch(&mut self, network: &mut SyncNetworkContext) -> Option { // don't request batches beyond the target head slot if self diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs index f34816d1de2..9259db2db6f 100644 --- a/beacon_node/network/src/sync/range_sync/range.rs +++ b/beacon_node/network/src/sync/range_sync/range.rs @@ -55,7 +55,7 @@ use logging::crit; use lru_cache::LRUTimeCache; use std::collections::HashMap; use std::sync::Arc; -use tracing::{debug, instrument, trace, warn}; +use tracing::{debug, trace, warn}; use types::{Epoch, EthSpec, Hash256}; /// For how long we store failed finalized chains to prevent retries. @@ -81,11 +81,6 @@ impl RangeSync where T: BeaconChainTypes, { - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn new(beacon_chain: Arc>) -> Self { RangeSync { beacon_chain: beacon_chain.clone(), @@ -102,11 +97,6 @@ where self.failed_chains.keys().copied().collect() } - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn state(&self) -> SyncChainStatus { self.chains.state() } @@ -116,11 +106,6 @@ where /// may need to be synced as a result. A new peer, may increase the peer pool of a finalized /// chain, this may result in a different finalized chain from syncing as finalized chains are /// prioritised by peer-pool size. - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn add_peer( &mut self, network: &mut SyncNetworkContext, @@ -215,11 +200,6 @@ where /// /// This function finds the chain that made this request. Once found, processes the result. /// This request could complete a chain or simply add to its progress. - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn blocks_by_range_response( &mut self, network: &mut SyncNetworkContext, @@ -250,11 +230,6 @@ where } } - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn handle_block_process_result( &mut self, network: &mut SyncNetworkContext, @@ -287,11 +262,6 @@ where /// A peer has disconnected. This removes the peer from any ongoing chains and mappings. A /// disconnected peer could remove a chain - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn peer_disconnect(&mut self, network: &mut SyncNetworkContext, peer_id: &PeerId) { // if the peer is in the awaiting head mapping, remove it self.awaiting_head_peers.remove(peer_id); @@ -304,11 +274,6 @@ where /// which pool the peer is in. The chain may also have a batch or batches awaiting /// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum /// retries. In this case, we need to remove the chain. - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] fn remove_peer(&mut self, network: &mut SyncNetworkContext, peer_id: &PeerId) { for (removed_chain, sync_type, remove_reason) in self.chains.call_all(|chain| chain.remove_peer(peer_id)) @@ -327,11 +292,6 @@ where /// /// Check to see if the request corresponds to a pending batch. If so, re-request it if possible, if there have /// been too many failed attempts for the batch, remove the chain. - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn inject_error( &mut self, network: &mut SyncNetworkContext, @@ -362,11 +322,6 @@ where } } - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] fn on_chain_removed( &mut self, chain: SyncingChain, @@ -415,11 +370,6 @@ where } /// Kickstarts sync. - #[instrument(parent = None, - fields(component = "range_sync"), - name = "range_sync", - skip_all - )] pub fn resume(&mut self, network: &mut SyncNetworkContext) { for (removed_chain, sync_type, remove_reason) in self.chains.call_all(|chain| chain.resume(network)) diff --git a/common/task_executor/src/lib.rs b/common/task_executor/src/lib.rs index e75aead6568..3b9e9c43210 100644 --- a/common/task_executor/src/lib.rs +++ b/common/task_executor/src/lib.rs @@ -5,7 +5,7 @@ use futures::channel::mpsc::Sender; use futures::prelude::*; use std::sync::Weak; use tokio::runtime::{Handle, Runtime}; -use tracing::{debug, instrument}; +use tracing::debug; pub use tokio::task::JoinHandle; @@ -81,6 +81,8 @@ pub struct TaskExecutor { signal_tx: Sender, /// The name of the service for inclusion in the logger output. + // FIXME(sproul): delete? + #[allow(dead_code)] service_name: String, } @@ -92,7 +94,6 @@ impl TaskExecutor { /// This function should only be used during testing. In production, prefer to obtain an /// instance of `Self` via a `environment::RuntimeContext` (see the `lighthouse/environment` /// crate). - #[instrument(parent = None,fields(service = service_name), name = "task_executor", skip_all)] pub fn new>( handle: T, exit: async_channel::Receiver<()>, @@ -108,7 +109,6 @@ impl TaskExecutor { } /// Clones the task executor adding a service name. - #[instrument(parent = None, fields(service = service_name), name = "task_executor", skip_all)] pub fn clone_with_name(&self, service_name: String) -> Self { TaskExecutor { handle_provider: self.handle_provider.clone(), @@ -124,7 +124,6 @@ impl TaskExecutor { /// The purpose of this function is to create a compile error if some function which previously /// returned `()` starts returning something else. Such a case may otherwise result in /// accidental error suppression. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn spawn_ignoring_error( &self, task: impl Future> + Send + 'static, @@ -136,7 +135,6 @@ impl TaskExecutor { /// Spawn a task to monitor the completion of another task. /// /// If the other task exits by panicking, then the monitor task will shut down the executor. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] fn spawn_monitor( &self, task_handle: impl Future> + Send + 'static, @@ -175,7 +173,6 @@ impl TaskExecutor { /// of a panic, the executor will be shut down via `self.signal_tx`. /// /// This function generates prometheus metrics on number of tasks and task duration. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn spawn(&self, task: impl Future + Send + 'static, name: &'static str) { if let Some(task_handle) = self.spawn_handle(task, name) { self.spawn_monitor(task_handle, name) @@ -191,7 +188,6 @@ impl TaskExecutor { /// This is useful in cases where the future to be spawned needs to do additional cleanup work when /// the task is completed/canceled (e.g. writing local variables to disk) or the task is created from /// some framework which does its own cleanup (e.g. a hyper server). - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn spawn_without_exit( &self, task: impl Future + Send + 'static, @@ -235,7 +231,6 @@ impl TaskExecutor { /// The task is cancelled when the corresponding async-channel is dropped. /// /// This function generates prometheus metrics on number of tasks and task duration. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn spawn_handle( &self, task: impl Future + Send + 'static, @@ -283,7 +278,6 @@ impl TaskExecutor { /// The Future returned behaves like the standard JoinHandle which can return an error if the /// task failed. /// This function generates prometheus metrics on number of tasks and task duration. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn spawn_blocking_handle( &self, task: F, @@ -332,7 +326,6 @@ impl TaskExecutor { /// a `tokio` context present in the thread-local storage due to some `rayon` funkiness. Talk to /// @paulhauner if you plan to use this function in production. He has put metrics in here to /// track any use of it, so don't think you can pull a sneaky one on him. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn block_on_dangerous( &self, future: F, @@ -368,7 +361,6 @@ impl TaskExecutor { } /// Returns a `Handle` to the current runtime. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn handle(&self) -> Option { self.handle_provider.handle() } @@ -383,7 +375,6 @@ impl TaskExecutor { } /// Get a channel to request shutting down. - #[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)] pub fn shutdown_sender(&self) -> Sender { self.signal_tx.clone() } diff --git a/slasher/service/src/service.rs b/slasher/service/src/service.rs index 2409a24c789..994e9d1211a 100644 --- a/slasher/service/src/service.rs +++ b/slasher/service/src/service.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use task_executor::TaskExecutor; use tokio::sync::mpsc::UnboundedSender; use tokio::time::{interval_at, Duration, Instant}; -use tracing::{debug, error, info, info_span, trace, warn, Instrument}; +use tracing::{debug, error, info, info_span, trace, warn}; use types::{AttesterSlashing, Epoch, EthSpec, ProposerSlashing}; pub struct SlasherService { @@ -64,8 +64,7 @@ impl SlasherService { update_period, slot_offset, notif_sender, - ) - .instrument(tracing::info_span!("slasher", service = "slasher")), + ), "slasher_server_notifier", ); From 04c23d907a3cc6be81f576cf208c1e6d9af93682 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 14:25:40 +1000 Subject: [PATCH 04/35] Remove all spans. --- .../network/src/sync/network_context.rs | 113 +----------------- slasher/service/src/service.rs | 4 +- 2 files changed, 2 insertions(+), 115 deletions(-) diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index a62b8f7382d..76c8dff4626 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -45,7 +45,7 @@ use std::time::Duration; #[cfg(test)] use task_executor::TaskExecutor; use tokio::sync::mpsc; -use tracing::{debug, error, span, warn, Level}; +use tracing::{debug, error, warn, Level}; use types::blob_sidecar::FixedBlobSidecarList; use types::{ BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext, @@ -267,12 +267,6 @@ impl SyncNetworkContext { chain: Arc>, fork_context: Arc, ) -> Self { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); SyncNetworkContext { network_send, execution_engine_state: EngineState::Online, // always assume `Online` at the start @@ -374,13 +368,6 @@ impl SyncNetworkContext { } pub fn status_peers(&self, chain: &C, peers: impl Iterator) { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let status_message = chain.status_message(); for peer_id in peers { debug!( @@ -774,13 +761,6 @@ impl SyncNetworkContext { return Ok(LookupRequestResult::Pending("no peers")); }; - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - match self.chain.get_block_process_status(&block_root) { // Unknown block, continue request to download BlockProcessStatus::Unknown => {} @@ -880,13 +860,6 @@ impl SyncNetworkContext { return Ok(LookupRequestResult::Pending("no peers")); }; - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let imported_blob_indexes = self .chain .data_availability_checker @@ -951,13 +924,6 @@ impl SyncNetworkContext { request: DataColumnsByRootSingleBlockRequest, expect_max_responses: bool, ) -> Result, &'static str> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let id = DataColumnsByRootRequestId { id: self.next_id(), requester, @@ -1002,13 +968,6 @@ impl SyncNetworkContext { block_root: Hash256, lookup_peers: Arc>>, ) -> Result { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let custody_indexes_imported = self .chain .data_availability_checker @@ -1205,26 +1164,12 @@ impl SyncNetworkContext { } pub fn update_execution_engine_state(&mut self, engine_state: EngineState) { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - debug!(past_state = ?self.execution_engine_state, new_state = ?engine_state, "Sync's view on execution engine state updated"); self.execution_engine_state = engine_state; } /// Terminates the connection with the peer and bans them. pub fn goodbye_peer(&mut self, peer_id: PeerId, reason: GoodbyeReason) { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - self.network_send .send(NetworkMessage::GoodbyePeer { peer_id, @@ -1238,13 +1183,6 @@ impl SyncNetworkContext { /// Reports to the scoring algorithm the behaviour of a peer. pub fn report_peer(&self, peer_id: PeerId, action: PeerAction, msg: &'static str) { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - debug!(%peer_id, %action, %msg, "Sync reporting peer"); self.network_send .send(NetworkMessage::ReportPeer { @@ -1260,13 +1198,6 @@ impl SyncNetworkContext { /// Subscribes to core topics. pub fn subscribe_core_topics(&self) { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - self.network_send .send(NetworkMessage::SubscribeCoreTopics) .unwrap_or_else(|e| { @@ -1276,13 +1207,6 @@ impl SyncNetworkContext { /// Sends an arbitrary network message. fn send_network_msg(&self, msg: NetworkMessage) -> Result<(), &'static str> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - self.network_send.send(msg).map_err(|_| { debug!("Could not send message to the network service"); "Network channel send Failed" @@ -1507,13 +1431,6 @@ impl SyncNetworkContext { peer_id: PeerId, resp: RpcResponseResult>>>, ) -> Option> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - // Note: need to remove the request to borrow self again below. Otherwise we can't // do nested requests let Some(mut request) = self.custody_by_root_requests.remove(&id.requester) else { @@ -1533,13 +1450,6 @@ impl SyncNetworkContext { request: ActiveCustodyRequest, result: CustodyRequestResult, ) -> Option> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let result = result .map_err(RpcResponseError::CustodyRequestError) .transpose(); @@ -1567,13 +1477,6 @@ impl SyncNetworkContext { block: Arc>, seen_timestamp: Duration, ) -> Result<(), SendErrorProcessor> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let beacon_processor = self .beacon_processor_if_enabled() .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; @@ -1606,13 +1509,6 @@ impl SyncNetworkContext { blobs: FixedBlobSidecarList, seen_timestamp: Duration, ) -> Result<(), SendErrorProcessor> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let beacon_processor = self .beacon_processor_if_enabled() .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; @@ -1644,13 +1540,6 @@ impl SyncNetworkContext { seen_timestamp: Duration, process_type: BlockProcessType, ) -> Result<(), SendErrorProcessor> { - let span = span!( - Level::INFO, - "SyncNetworkContext", - service = "network_context" - ); - let _enter = span.enter(); - let beacon_processor = self .beacon_processor_if_enabled() .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; diff --git a/slasher/service/src/service.rs b/slasher/service/src/service.rs index 994e9d1211a..013ef03fc0e 100644 --- a/slasher/service/src/service.rs +++ b/slasher/service/src/service.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use task_executor::TaskExecutor; use tokio::sync::mpsc::UnboundedSender; use tokio::time::{interval_at, Duration, Instant}; -use tracing::{debug, error, info, info_span, trace, warn}; +use tracing::{debug, error, info, trace, warn}; use types::{AttesterSlashing, Epoch, EthSpec, ProposerSlashing}; pub struct SlasherService { @@ -70,8 +70,6 @@ impl SlasherService { executor.spawn_blocking( || { - let span = info_span!("slasher", service = "slasher"); - let _ = span.enter(); Self::run_processor(beacon_chain, slasher, notif_receiver, network_sender); }, "slasher_server_processor", From 03c647a74a87ef1b9946a0924365affecea504bc Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 14:43:29 +1000 Subject: [PATCH 05/35] load_parent test. --- beacon_node/beacon_chain/src/block_verification.rs | 6 +++++- beacon_node/network/src/sync/network_context.rs | 2 +- beacon_node/store/src/hot_cold_store.rs | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 9acab8d360c..7c785e309c9 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, error}; +use tracing::{debug, error, info_span, instrument, span}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -1854,6 +1854,7 @@ fn verify_parent_block_is_known( /// Returns `Err(BlockError::ParentUnknown)` if the parent is not found, or if an error occurs /// whilst attempting the operation. #[allow(clippy::type_complexity)] +#[instrument(skip_all, fields(parent_root = %block.parent_root()))] fn load_parent>( block: B, chain: &BeaconChain, @@ -1878,6 +1879,9 @@ fn load_parent>( }); } + let db_read_span = info_span!("block_processing_db_read"); + let _guard = db_read_span.enter(); + let db_read_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_READ); let result = { diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index 76c8dff4626..8e5e63fc52d 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -45,7 +45,7 @@ use std::time::Duration; #[cfg(test)] use task_executor::TaskExecutor; use tokio::sync::mpsc; -use tracing::{debug, error, warn, Level}; +use tracing::{debug, error, warn}; use types::blob_sidecar::FixedBlobSidecarList; use types::{ BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext, diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 9c9374e7fe5..da75f714a9b 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -37,7 +37,7 @@ use std::num::NonZeroUsize; use std::path::Path; use std::sync::Arc; use std::time::Duration; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, instrument, warn}; use types::data_column_sidecar::{ColumnIndex, DataColumnSidecar, DataColumnSidecarList}; use types::*; use zstd::{Decoder, Encoder}; @@ -1040,6 +1040,7 @@ impl, Cold: ItemStore> HotColdDB /// - `result_state_root == state.canonical_root()` /// - `state.slot() <= max_slot` /// - `state.get_latest_block_root(result_state_root) == block_root` + #[instrument(skip(self, max_slot))] pub fn get_advanced_hot_state( &self, block_root: Hash256, @@ -1111,6 +1112,7 @@ impl, Cold: ItemStore> HotColdDB /// If this function returns `Some(state)` then that `state` will always have /// `latest_block_header` matching `block_root` but may not be advanced all the way through to /// `max_slot`. + #[instrument(skip(self))] pub fn get_advanced_hot_state_from_cache( &self, block_root: Hash256, From c0804dcb8e98f83ae1f3e49333f4d3311d4fd917 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 15:26:00 +1000 Subject: [PATCH 06/35] Add gossip block instrumentation. --- .../beacon_chain/src/block_verification.rs | 58 ++++++++++++------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 7c785e309c9..3caa87dcc13 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, error, info_span, instrument, span}; +use tracing::{debug, debug_span, error, info_span, instrument}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -827,6 +827,9 @@ impl GossipVerifiedBlock { block: Arc>, chain: &BeaconChain, ) -> Result { + let span = debug_span!("GossipVerifiedBlock::new", block_root = ""); + let _guard = span.enter(); + // If the block is valid for gossip we don't supply it to the slasher here because // we assume it will be transformed into a fully verified block. We *do* need to supply // it to the slasher if an error occurs, because that's the end of this block's journey, @@ -835,12 +838,16 @@ impl GossipVerifiedBlock { // The `SignedBeaconBlock` and `SignedBeaconBlockHeader` have the same canonical root, // but it's way quicker to calculate root of the header since the hash of the tree rooted // at `BeaconBlockBody` is already computed in the header. - Self::new_without_slasher_checks(block, &header, chain).map_err(|e| { - process_block_slash_info::<_, BlockError>( - chain, - BlockSlashInfo::from_early_error_block(header, e), - ) - }) + Self::new_without_slasher_checks(block, &header, chain) + .map_err(|e| { + process_block_slash_info::<_, BlockError>( + chain, + BlockSlashInfo::from_early_error_block(header, e), + ) + }) + .inspect(|block| { + span.record("block_root", block.block_root.to_string()); + }) } /// As for new, but doesn't pass the block to the slasher. @@ -1185,6 +1192,7 @@ impl SignatureVerifiedBlock { /// Finishes signature verification on the provided `GossipVerifedBlock`. Does not re-verify /// the proposer signature. + #[instrument(skip_all, fields(block_root = %from.block_root))] pub fn from_gossip_verified_block( from: GossipVerifiedBlock, chain: &BeaconChain, @@ -1212,20 +1220,28 @@ impl SignatureVerifiedBlock { signature_verifier .include_all_signatures_except_proposal(block.as_ref(), &mut consensus_context)?; - if signature_verifier.verify().is_ok() { - Ok(Self { - block: MaybeAvailableBlock::AvailabilityPending { + let sig_verify_span = info_span!("signature_verify", result = "started"); + let _guard = sig_verify_span.enter(); + let result = signature_verifier.verify(); + match result { + Ok(_) => { + sig_verify_span.record("result", &"ok"); + Ok(Self { + block: MaybeAvailableBlock::AvailabilityPending { + block_root: from.block_root, + block, + }, block_root: from.block_root, - block, - }, - block_root: from.block_root, - parent: Some(parent), - consensus_context, - }) - } else { - Err(BlockError::InvalidSignature( - InvalidSignature::BlockBodySignatures, - )) + parent: Some(parent), + consensus_context, + }) + } + Err(_) => { + sig_verify_span.record("result", &"fail"); + Err(BlockError::InvalidSignature( + InvalidSignature::BlockBodySignatures, + )) + } } } @@ -2035,6 +2051,7 @@ impl BlockBlobError for GossipDataColumnError { /// and `Cow::Borrowed(state)` will be returned. Otherwise, the state will be cloned, cheaply /// advanced and then returned as a `Cow::Owned`. The end result is that the given `state` is never /// mutated to be invalid (in fact, it is never changed beyond a simple committee cache build). +#[instrument(skip(state, spec))] pub fn cheap_state_advance_to_obtain_committees<'a, E: EthSpec, Err: BlockBlobError>( state: &'a mut BeaconState, state_root_opt: Option, @@ -2069,6 +2086,7 @@ pub fn cheap_state_advance_to_obtain_committees<'a, E: EthSpec, Err: BlockBlobEr } /// Obtains a read-locked `ValidatorPubkeyCache` from the `chain`. +#[instrument(skip(chain), level = "debug")] pub fn get_validator_pubkey_cache( chain: &BeaconChain, ) -> Result>, BeaconChainError> { From 0a8d22f9d5683b08bd332c198f95a27a1a4a2c5b Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 15:51:34 +1000 Subject: [PATCH 07/35] more instrument. --- beacon_node/store/src/state_cache.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/beacon_node/store/src/state_cache.rs b/beacon_node/store/src/state_cache.rs index f8acd28f323..ff77c05cc13 100644 --- a/beacon_node/store/src/state_cache.rs +++ b/beacon_node/store/src/state_cache.rs @@ -6,6 +6,7 @@ use crate::{ use lru::LruCache; use std::collections::{BTreeMap, HashMap, HashSet}; use std::num::NonZeroUsize; +use tracing::instrument; use types::{BeaconState, ChainSpec, Epoch, EthSpec, Hash256, Slot}; /// Fraction of the LRU cache to leave intact during culling. @@ -292,6 +293,7 @@ impl StateCache { None } + #[instrument(skip(self))] pub fn get_by_block_root( &mut self, block_root: Hash256, From 172d85974acfe091e2675040f663f3bdb928c10e Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 16:27:29 +1000 Subject: [PATCH 08/35] Instrument `process_block` and `import_block`. --- beacon_node/beacon_chain/src/beacon_chain.rs | 27 ++++++++++++++++++-- consensus/fork_choice/src/fork_choice.rs | 7 ++++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index decc38e22cf..561a8e00c5a 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -126,7 +126,7 @@ use store::{ }; use task_executor::{ShutdownReason, TaskExecutor}; use tokio_stream::Stream; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument}; use tree_hash::TreeHash; use types::blob_sidecar::FixedBlobSidecarList; use types::data_column_sidecar::ColumnIndex; @@ -3383,6 +3383,7 @@ impl BeaconChain { /// /// Returns an `Err` if the given block was invalid, or an error was encountered during /// verification. + #[instrument(skip_all, fields(block_root = ?block_root, block_source = %block_source))] pub async fn process_block>( self: &Arc, block_root: Hash256, @@ -3445,7 +3446,8 @@ impl BeaconChain { self.check_block_availability_and_import(block).await } } - }; + } + .instrument(info_span!("import_block_future")); // Verify and import the block. match import_block.await { @@ -3499,6 +3501,7 @@ impl BeaconChain { /// get a fully `ExecutedBlock`. /// /// An error is returned if the verification handle couldn't be awaited. + #[instrument(skip_all)] pub async fn into_executed_block( self: Arc, execution_pending_block: ExecutionPendingBlock, @@ -3547,6 +3550,7 @@ impl BeaconChain { /// Checks if the block is available, and imports immediately if so, otherwise caches the block /// in the data availability checker. + #[instrument(skip_all)] async fn check_block_availability_and_import( self: &Arc, block: AvailabilityPendingExecutedBlock, @@ -3813,6 +3817,13 @@ impl BeaconChain { /// An error is returned if the block was unable to be imported. It may be partially imported /// (i.e., this function is not atomic). #[allow(clippy::too_many_arguments)] + #[instrument( + skip_all, + fields( + block_root = ?block_root, + slot = %signed_block.message().slot() + ) + )] fn import_block( &self, signed_block: AvailableBlock, @@ -3850,6 +3861,13 @@ impl BeaconChain { // Only take a write lock if there are new keys to import. if state.validators().len() > pubkey_cache.len() { + let _pubkey_span = tracing::debug_span!( + "pubkey_cache_update", + new_validators = tracing::field::Empty, + cache_len_before = pubkey_cache.len() + ) + .entered(); + parking_lot::RwLockUpgradableReadGuard::upgrade(pubkey_cache) .import_new_pubkeys(&state)? } else { @@ -3863,6 +3881,7 @@ impl BeaconChain { // However, latency between the VC and the BN might cause the VC to produce attestations at // a previous slot. if state.current_epoch().saturating_add(1_u64) >= current_epoch { + let _attester_span = tracing::debug_span!("attester_cache_update").entered(); self.attester_cache .maybe_cache_state(&state, block_root, &self.spec) .map_err(BeaconChainError::from)?; @@ -4007,6 +4026,8 @@ impl BeaconChain { ops.push(StoreOp::PutBlock(block_root, signed_block.clone())); ops.push(StoreOp::PutState(block.state_root(), &state)); + let db_span = tracing::info_span!("persist_blocks_and_blobs",).entered(); + if let Err(e) = self.store.do_atomically_with_block_and_blobs_cache(ops) { error!( msg = "Restoring fork choice from disk", @@ -4019,6 +4040,8 @@ impl BeaconChain { .unwrap_or(e.into())); } + drop(db_span); + // The fork choice write-lock is dropped *after* the on-disk database has been updated. // This prevents inconsistency between the two at the expense of concurrency. drop(fork_choice); diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index 91b44c7af10..9d6f63c62c7 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -13,7 +13,7 @@ use std::cmp::Ordering; use std::collections::BTreeSet; use std::marker::PhantomData; use std::time::Duration; -use tracing::{debug, warn}; +use tracing::{debug, instrument, warn}; use types::{ consts::bellatrix::INTERVALS_PER_SLOT, AbstractExecPayload, AttestationShufflingId, AttesterSlashingRef, BeaconBlockRef, BeaconState, BeaconStateError, ChainSpec, Checkpoint, @@ -646,6 +646,11 @@ where /// The supplied block **must** pass the `state_transition` function as it will not be run /// here. #[allow(clippy::too_many_arguments)] + #[instrument( + skip_all, + fields( + block_root = %block_root, slot = %block.slot(), block_delay = ?block_delay, payload_verification_status = ?payload_verification_status + ))] pub fn on_block>( &mut self, system_time_current_slot: Slot, From 386b4259cb5d1359b613298a055dc7676171852a Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 16:48:46 +1000 Subject: [PATCH 09/35] Add more detailed instruments to `import_block` --- beacon_node/beacon_chain/src/beacon_chain.rs | 1 + beacon_node/beacon_chain/src/block_verification.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 561a8e00c5a..ef412cfab19 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3749,6 +3749,7 @@ impl BeaconChain { } } + #[instrument(skip_all, fields(block_root = %block.import_data.block_root))] pub async fn import_available_block( self: &Arc, block: Box>, diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 3caa87dcc13..a9af1845e7d 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -790,6 +790,7 @@ pub fn build_blob_data_column_sidecars( /// /// Used to allow functions to accept blocks at various stages of verification. pub trait IntoExecutionPendingBlock: Sized { + #[instrument(skip_all, fields(block_root = %block_root))] fn into_execution_pending_block( self, block_root: Hash256, @@ -1087,6 +1088,7 @@ impl GossipVerifiedBlock { impl IntoExecutionPendingBlock for GossipVerifiedBlock { /// Completes verification of the wrapped `block`. + #[instrument(skip_all, fields(block_root = %block_root))] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1225,7 +1227,7 @@ impl SignatureVerifiedBlock { let result = signature_verifier.verify(); match result { Ok(_) => { - sig_verify_span.record("result", &"ok"); + sig_verify_span.record("result", "ok"); Ok(Self { block: MaybeAvailableBlock::AvailabilityPending { block_root: from.block_root, @@ -1237,7 +1239,7 @@ impl SignatureVerifiedBlock { }) } Err(_) => { - sig_verify_span.record("result", &"fail"); + sig_verify_span.record("result", "fail"); Err(BlockError::InvalidSignature( InvalidSignature::BlockBodySignatures, )) @@ -1266,6 +1268,7 @@ impl SignatureVerifiedBlock { impl IntoExecutionPendingBlock for SignatureVerifiedBlock { /// Completes verification of the wrapped `block`. + #[instrument(skip_all, fields(block_root = %block_root))] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1342,6 +1345,7 @@ impl ExecutionPendingBlock { /// verification must be done upstream (e.g., via a `SignatureVerifiedBlock` /// /// Returns an error if the block is invalid, or if the block was unable to be verified. + #[instrument(skip_all, fields(block_root = %block_root))] pub fn from_signature_verified_components( block: MaybeAvailableBlock, block_root: Hash256, From 4268ae58a0a8eaabb377f63149d14dac9cffcf5a Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 16:57:19 +1000 Subject: [PATCH 10/35] Add more detailed instruments to `import_block` --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 ++ consensus/fork_choice/src/fork_choice.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index ef412cfab19..0e13b0ed26e 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -4271,6 +4271,7 @@ impl BeaconChain { /// Iterate through the attestations in the block and register them as "observed". /// /// This will stop us from propagating them on the gossip network. + #[instrument(skip_all)] fn import_block_observe_attestations( &self, block: BeaconBlockRef, @@ -4431,6 +4432,7 @@ impl BeaconChain { // For the current and next epoch of this state, ensure we have the shuffling from this // block in our cache. + #[instrument(skip_all)] fn import_block_update_shuffling_cache( &self, block_root: Hash256, diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index 9d6f63c62c7..adcee841ecd 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -472,6 +472,7 @@ where /// Is equivalent to: /// /// https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/fork-choice.md#get_head + #[instrument(skip_all)] pub fn get_head( &mut self, system_time_current_slot: Slot, From 3f55de0345a4d325e57b67d7ed7d1707cc637daa Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 17:19:01 +1000 Subject: [PATCH 11/35] Add more detailed instruments to `import_block` --- beacon_node/beacon_chain/src/beacon_chain.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 0e13b0ed26e..f2929b6a337 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3778,11 +3778,14 @@ impl BeaconChain { // TODO(das) record custody column available timestamp - // import - let chain = self.clone(); - let block_root = self - .spawn_blocking_handle( + let block_root = { + // Capture the current span before moving into the blocking task + let current_span = tracing::Span::current(); + let chain = self.clone(); + self.spawn_blocking_handle( move || { + // Enter the captured span in the blocking thread + let _guard = current_span.enter(); chain.import_block( block, block_root, @@ -3794,7 +3797,8 @@ impl BeaconChain { }, "payload_verification_handle", ) - .await??; + .await?? + }; // Remove block components from da_checker AFTER completing block import. Then we can assert // the following invariant: From 26e30c3275f3e90e78c524409eac319330b3e971 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 17:40:32 +1000 Subject: [PATCH 12/35] More instruments. --- beacon_node/beacon_chain/src/block_verification.rs | 5 +++-- beacon_node/beacon_chain/src/data_availability_checker.rs | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index a9af1845e7d..61c8300054c 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -1088,7 +1088,7 @@ impl GossipVerifiedBlock { impl IntoExecutionPendingBlock for GossipVerifiedBlock { /// Completes verification of the wrapped `block`. - #[instrument(skip_all, fields(block_root = %block_root))] + #[instrument(name = "gossip_block_into_execution_pending_block_slashable", skip_all, fields(block_root = %block_root))] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1268,7 +1268,7 @@ impl SignatureVerifiedBlock { impl IntoExecutionPendingBlock for SignatureVerifiedBlock { /// Completes verification of the wrapped `block`. - #[instrument(skip_all, fields(block_root = %block_root))] + #[instrument(name = "sig_verified_block_into_execution_pending_block_slashable", skip_all, fields(block_root = %block_root))] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1306,6 +1306,7 @@ impl IntoExecutionPendingBlock for SignatureVerifiedBloc impl IntoExecutionPendingBlock for RpcBlock { /// Verifies the `SignedBeaconBlock` by first transforming it into a `SignatureVerifiedBlock` /// and then using that implementation of `IntoExecutionPendingBlock` to complete verification. + #[instrument(name = "rpc_block_into_execution_pending_block_slashable", skip_all, fields(block_root = %block_root))] fn into_execution_pending_block_slashable( self, block_root: Hash256, diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index e2947bb2d6f..b8573e6f224 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -16,7 +16,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; -use tracing::{debug, error}; +use tracing::{debug, error, instrument}; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, @@ -328,6 +328,7 @@ impl DataAvailabilityChecker { /// /// WARNING: This function assumes all required blobs are already present, it does NOT /// check if there are any missing blobs. + #[instrument(skip_all)] pub fn verify_kzg_for_rpc_block( &self, block: RpcBlock, From db4d342f574d78ccc33a18ae07870365b5269a36 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 18:01:55 +1000 Subject: [PATCH 13/35] More instruments. --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index f2929b6a337..17ad505232e 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -4181,6 +4181,7 @@ impl BeaconChain { } /// Process a block for the validator monitor, including all its constituent messages. + #[instrument(skip_all)] fn import_block_update_validator_monitor( &self, block: BeaconBlockRef, @@ -4338,6 +4339,7 @@ impl BeaconChain { } /// If a slasher is configured, provide the attestations from the block. + #[instrument(skip_all)] fn import_block_update_slasher( &self, block: BeaconBlockRef, From 62940aa13ea21da735a8b9b8f1ef164b87038ad9 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 20:21:27 +1000 Subject: [PATCH 14/35] More instruments. --- Cargo.lock | 1 + .../beacon_chain/src/block_verification.rs | 16 ++++++++++++---- .../beacon_chain/src/execution_payload.rs | 2 +- consensus/state_processing/Cargo.toml | 1 + .../state_processing/src/per_block_processing.rs | 2 ++ .../state_processing/src/per_epoch_processing.rs | 2 ++ .../state_processing/src/per_slot_processing.rs | 3 +++ consensus/types/src/beacon_state.rs | 5 +++++ 8 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba192a151b0..4dc09718419 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8967,6 +8967,7 @@ dependencies = [ "ssz_types", "test_random_derive", "tokio", + "tracing", "tree_hash", "types", ] diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 61c8300054c..7046cc4ae4d 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, debug_span, error, info_span, instrument}; +use tracing::{debug, debug_span, error, info_span, instrument, Instrument}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -1440,7 +1440,10 @@ impl ExecutionPendingBlock { started_execution, ); } - let payload_verification_status = payload_notifier.notify_new_payload().await?; + let payload_verification_status = payload_notifier + .notify_new_payload() + .instrument(info_span!("notify_new_payload", block_root = %block_root)) + .await?; Ok(PayloadVerificationOutcome { payload_verification_status, @@ -1450,10 +1453,14 @@ impl ExecutionPendingBlock { // Spawn the payload verification future as a new task, but don't wait for it to complete. // The `payload_verification_future` will be awaited later to ensure verification completed // successfully. + let current_span = tracing::Span::current(); let payload_verification_handle = chain .task_executor .spawn_handle( - payload_verification_future, + async move { + let _guard = current_span.enter(); + payload_verification_future.await + }, "execution_payload_verification", ) .ok_or(BeaconChainError::RuntimeShutdown)?; @@ -1463,7 +1470,7 @@ impl ExecutionPendingBlock { */ let catchup_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CATCHUP_STATE); - + let catchup_span = info_span!("catchup_state", outcome = "").entered(); let mut state = parent.pre_state; // The block must have a higher slot than its parent. @@ -1537,6 +1544,7 @@ impl ExecutionPendingBlock { } } metrics::stop_timer(catchup_timer); + drop(catchup_span); let block_slot = block.slot(); let state_current_epoch = state.current_epoch(); diff --git a/beacon_node/beacon_chain/src/execution_payload.rs b/beacon_node/beacon_chain/src/execution_payload.rs index aa98310c121..23770a89366 100644 --- a/beacon_node/beacon_chain/src/execution_payload.rs +++ b/beacon_node/beacon_chain/src/execution_payload.rs @@ -24,7 +24,7 @@ use state_processing::per_block_processing::{ }; use std::sync::Arc; use tokio::task::JoinHandle; -use tracing::{debug, warn}; +use tracing::{debug, instrument, warn}; use tree_hash::TreeHash; use types::payload::BlockProductionVersion; use types::*; diff --git a/consensus/state_processing/Cargo.toml b/consensus/state_processing/Cargo.toml index 7ada4488f29..3acf491f23e 100644 --- a/consensus/state_processing/Cargo.toml +++ b/consensus/state_processing/Cargo.toml @@ -35,6 +35,7 @@ safe_arith = { workspace = true } smallvec = { workspace = true } ssz_types = { workspace = true } test_random_derive = { path = "../../common/test_random_derive" } +tracing = { workspace = true } tree_hash = { workspace = true } types = { workspace = true } diff --git a/consensus/state_processing/src/per_block_processing.rs b/consensus/state_processing/src/per_block_processing.rs index 6339f9003d3..93892ecf5b6 100644 --- a/consensus/state_processing/src/per_block_processing.rs +++ b/consensus/state_processing/src/per_block_processing.rs @@ -47,6 +47,7 @@ use crate::common::update_progressive_balances_cache::{ use crate::epoch_cache::initialize_epoch_cache; #[cfg(feature = "arbitrary-fuzz")] use arbitrary::Arbitrary; +use tracing::instrument; /// The strategy to be used when validating the block's signatures. #[cfg_attr(feature = "arbitrary-fuzz", derive(Arbitrary))] @@ -97,6 +98,7 @@ pub enum VerifyBlockRoot { /// re-calculating the root when it is already known. Note `block_root` should be equal to the /// tree hash root of the block, NOT the signing root of the block. This function takes /// care of mixing in the domain. +#[instrument(skip_all)] pub fn per_block_processing>( state: &mut BeaconState, signed_block: &SignedBeaconBlock, diff --git a/consensus/state_processing/src/per_epoch_processing.rs b/consensus/state_processing/src/per_epoch_processing.rs index 41c30c49318..8de6054bd2e 100644 --- a/consensus/state_processing/src/per_epoch_processing.rs +++ b/consensus/state_processing/src/per_epoch_processing.rs @@ -5,6 +5,7 @@ pub use epoch_processing_summary::{EpochProcessingSummary, ParticipationEpochSum use errors::EpochProcessingError as Error; pub use justification_and_finalization_state::JustificationAndFinalizationState; use safe_arith::SafeArith; +use tracing::instrument; use types::{BeaconState, ChainSpec, EthSpec}; pub use registry_updates::{process_registry_updates, process_registry_updates_slow}; @@ -30,6 +31,7 @@ pub mod weigh_justification_and_finalization; /// /// Mutates the given `BeaconState`, returning early if an error is encountered. If an error is /// returned, a state might be "half-processed" and therefore in an invalid state. +#[instrument(skip_all)] pub fn process_epoch( state: &mut BeaconState, spec: &ChainSpec, diff --git a/consensus/state_processing/src/per_slot_processing.rs b/consensus/state_processing/src/per_slot_processing.rs index af1cce602c3..7fccf78c874 100644 --- a/consensus/state_processing/src/per_slot_processing.rs +++ b/consensus/state_processing/src/per_slot_processing.rs @@ -4,6 +4,7 @@ use crate::upgrade::{ }; use crate::{per_epoch_processing::EpochProcessingSummary, *}; use safe_arith::{ArithError, SafeArith}; +use tracing::instrument; use types::*; #[derive(Debug, PartialEq)] @@ -25,6 +26,7 @@ impl From for Error { /// If the root of the supplied `state` is known, then it can be passed as `state_root`. If /// `state_root` is `None`, the root of `state` will be computed using a cached tree hash. /// Providing the `state_root` makes this function several orders of magnitude faster. +#[instrument(skip_all)] pub fn per_slot_processing( state: &mut BeaconState, state_root: Option, @@ -85,6 +87,7 @@ pub fn per_slot_processing( Ok(summary) } +#[instrument(skip_all)] fn cache_state( state: &mut BeaconState, state_root: Option, diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index bddfb6445af..1658d3f4378 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -34,6 +34,7 @@ pub use crate::beacon_state::slashings_cache::SlashingsCache; pub use eth_spec::*; pub use iter::BlockRootsIter; pub use milhouse::{interface::Interface, List, Vector}; +use tracing::instrument; #[macro_use] mod committee_cache; @@ -1931,6 +1932,7 @@ impl BeaconState { } /// Build all caches (except the tree hash cache), if they need to be built. + #[instrument(skip_all)] pub fn build_caches(&mut self, spec: &ChainSpec) -> Result<(), Error> { self.build_all_committee_caches(spec)?; self.update_pubkey_cache()?; @@ -1941,6 +1943,7 @@ impl BeaconState { } /// Build all committee caches, if they need to be built. + #[instrument(skip_all)] pub fn build_all_committee_caches(&mut self, spec: &ChainSpec) -> Result<(), Error> { self.build_committee_cache(RelativeEpoch::Previous, spec)?; self.build_committee_cache(RelativeEpoch::Current, spec)?; @@ -1994,6 +1997,7 @@ impl BeaconState { } /// Build a committee cache, unless it is has already been built. + #[instrument(skip_all, fields(relative_epoch))] pub fn build_committee_cache( &mut self, relative_epoch: RelativeEpoch, @@ -2194,6 +2198,7 @@ impl BeaconState { /// Compute the tree hash root of the state using the tree hash cache. /// /// Initialize the tree hash cache if it isn't already initialized. + #[instrument(skip_all)] pub fn update_tree_hash_cache<'a>(&'a mut self) -> Result { self.apply_pending_mutations()?; map_beacon_state_ref!(&'a _, self.to_ref(), |inner, cons| { From e3f6273d75b9c79a17d53f7820499dea96055e50 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 20:41:24 +1000 Subject: [PATCH 15/35] Add service name --- beacon_node/beacon_chain/src/block_verification.rs | 8 +++----- beacon_node/beacon_chain/src/execution_payload.rs | 2 +- lighthouse/src/main.rs | 5 +++++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 7046cc4ae4d..ed3cab02327 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -1412,7 +1412,9 @@ impl ExecutionPendingBlock { )?; let is_valid_merge_transition_block = is_merge_transition_block(&parent.pre_state, block.message().body()); + let current_span = tracing::Span::current(); let payload_verification_future = async move { + let _guard = current_span.enter(); let chain = payload_notifier.chain.clone(); let block = payload_notifier.block.clone(); @@ -1453,14 +1455,10 @@ impl ExecutionPendingBlock { // Spawn the payload verification future as a new task, but don't wait for it to complete. // The `payload_verification_future` will be awaited later to ensure verification completed // successfully. - let current_span = tracing::Span::current(); let payload_verification_handle = chain .task_executor .spawn_handle( - async move { - let _guard = current_span.enter(); - payload_verification_future.await - }, + payload_verification_future, "execution_payload_verification", ) .ok_or(BeaconChainError::RuntimeShutdown)?; diff --git a/beacon_node/beacon_chain/src/execution_payload.rs b/beacon_node/beacon_chain/src/execution_payload.rs index 23770a89366..aa98310c121 100644 --- a/beacon_node/beacon_chain/src/execution_payload.rs +++ b/beacon_node/beacon_chain/src/execution_payload.rs @@ -24,7 +24,7 @@ use state_processing::per_block_processing::{ }; use std::sync::Arc; use tokio::task::JoinHandle; -use tracing::{debug, instrument, warn}; +use tracing::{debug, warn}; use tree_hash::TreeHash; use types::payload::BlockProductionVersion; use types::*; diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index 08c838cf66d..e8c21f5fdfb 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -689,6 +689,11 @@ fn run( let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() .with_batch_exporter(exporter) + .with_resource( + opentelemetry_sdk::Resource::builder() + .with_service_name("lighthouse") + .build(), + ) .build(); let tracer = provider.tracer("lighthouse"); From 2f6c32bd7ca1ac86b8b00086709c682f1b0dd02d Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 20:47:33 +1000 Subject: [PATCH 16/35] More instruments --- consensus/types/src/beacon_state.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index 1658d3f4378..d1abbf552a2 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -1952,6 +1952,7 @@ impl BeaconState { } /// Build the exit cache, if it needs to be built. + #[instrument(skip_all)] pub fn build_exit_cache(&mut self, spec: &ChainSpec) -> Result<(), Error> { if self.exit_cache().check_initialized().is_err() { *self.exit_cache_mut() = ExitCache::new(self.validators(), spec)?; @@ -1960,6 +1961,7 @@ impl BeaconState { } /// Build the slashings cache if it needs to be built. + #[instrument(skip_all)] pub fn build_slashings_cache(&mut self) -> Result<(), Error> { let latest_block_slot = self.latest_block_header().slot; if !self.slashings_cache().is_initialized(latest_block_slot) { @@ -2118,6 +2120,7 @@ impl BeaconState { /// /// Adds all `pubkeys` from the `validators` which are not already in the cache. Will /// never re-add a pubkey. + #[instrument(skip_all)] pub fn update_pubkey_cache(&mut self) -> Result<(), Error> { let mut pubkey_cache = mem::take(self.pubkey_cache_mut()); let start_index = pubkey_cache.len(); From e7fcb4c502ff355382993e13d52b82efead49091 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 30 Jul 2025 23:22:31 +1000 Subject: [PATCH 17/35] Attempt to fix orphan spans --- beacon_node/beacon_chain/src/beacon_chain.rs | 4 ++-- beacon_node/beacon_chain/src/blob_verification.rs | 3 ++- beacon_node/beacon_chain/src/block_verification.rs | 4 ++-- beacon_node/beacon_chain/src/data_availability_checker.rs | 3 +-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 17ad505232e..1e767b1b0c8 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -126,7 +126,7 @@ use store::{ }; use task_executor::{ShutdownReason, TaskExecutor}; use tokio_stream::Stream; -use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument}; +use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument, Span}; use tree_hash::TreeHash; use types::blob_sidecar::FixedBlobSidecarList; use types::data_column_sidecar::ColumnIndex; @@ -3447,7 +3447,7 @@ impl BeaconChain { } } } - .instrument(info_span!("import_block_future")); + .instrument(info_span!(parent: &Span::current(), "import_block_future")); // Verify and import the block. match import_block.await { diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 958416552db..86581195986 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -14,7 +14,7 @@ use crate::{metrics, BeaconChainError}; use kzg::{Error as KzgError, Kzg, KzgCommitment}; use ssz_derive::{Decode, Encode}; use std::time::Duration; -use tracing::debug; +use tracing::{debug, instrument}; use tree_hash::TreeHash; use types::blob_sidecar::BlobIdentifier; use types::{ @@ -374,6 +374,7 @@ impl IntoIterator for KzgVerifiedBlobList { /// /// Note: This function should be preferred over calling `verify_kzg_for_blob` /// in a loop since this function kzg verifies a list of blobs more efficiently. +#[instrument(skip_all)] pub fn verify_kzg_for_blob_list<'a, E: EthSpec, I>( blob_iter: I, kzg: &'a Kzg, diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index ed3cab02327..59399be56c4 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, debug_span, error, info_span, instrument, Instrument}; +use tracing::{debug, debug_span, error, info_span, instrument, Instrument, Span}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -1444,7 +1444,7 @@ impl ExecutionPendingBlock { } let payload_verification_status = payload_notifier .notify_new_payload() - .instrument(info_span!("notify_new_payload", block_root = %block_root)) + .instrument(info_span!(parent: &Span::current(), "notify_new_payload", block_root = %block_root)) .await?; Ok(PayloadVerificationOutcome { diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index b8573e6f224..e2947bb2d6f 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -16,7 +16,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; -use tracing::{debug, error, instrument}; +use tracing::{debug, error}; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, @@ -328,7 +328,6 @@ impl DataAvailabilityChecker { /// /// WARNING: This function assumes all required blobs are already present, it does NOT /// check if there are any missing blobs. - #[instrument(skip_all)] pub fn verify_kzg_for_rpc_block( &self, block: RpcBlock, From 868320a68594d8d43e72d1f5d3477bbe7185b482 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 10:12:07 +1000 Subject: [PATCH 18/35] A few more --- .../src/common/update_progressive_balances_cache.rs | 2 ++ consensus/state_processing/src/epoch_cache.rs | 2 ++ .../state_processing/src/per_epoch_processing/single_pass.rs | 2 ++ consensus/types/src/beacon_state.rs | 1 + 4 files changed, 7 insertions(+) diff --git a/consensus/state_processing/src/common/update_progressive_balances_cache.rs b/consensus/state_processing/src/common/update_progressive_balances_cache.rs index 1fdfe802c47..82ec4322479 100644 --- a/consensus/state_processing/src/common/update_progressive_balances_cache.rs +++ b/consensus/state_processing/src/common/update_progressive_balances_cache.rs @@ -5,12 +5,14 @@ use crate::metrics::{ }; use crate::{BlockProcessingError, EpochProcessingError}; use metrics::set_gauge; +use tracing::instrument; use types::{ is_progressive_balances_enabled, BeaconState, BeaconStateError, ChainSpec, Epoch, EpochTotalBalances, EthSpec, ParticipationFlags, ProgressiveBalancesCache, Validator, }; /// Initializes the `ProgressiveBalancesCache` if it is unbuilt. +#[instrument(skip_all)] pub fn initialize_progressive_balances_cache( state: &mut BeaconState, spec: &ChainSpec, diff --git a/consensus/state_processing/src/epoch_cache.rs b/consensus/state_processing/src/epoch_cache.rs index dc1d79709e7..1fa119068b6 100644 --- a/consensus/state_processing/src/epoch_cache.rs +++ b/consensus/state_processing/src/epoch_cache.rs @@ -3,6 +3,7 @@ use crate::common::base::SqrtTotalActiveBalance; use crate::common::{altair, base}; use crate::metrics; use safe_arith::SafeArith; +use tracing::instrument; use types::epoch_cache::{EpochCache, EpochCacheError, EpochCacheKey}; use types::{ ActivationQueue, BeaconState, ChainSpec, EthSpec, FixedBytesExtended, ForkName, Hash256, @@ -130,6 +131,7 @@ pub fn is_epoch_cache_initialized( .is_ok()) } +#[instrument(skip_all)] pub fn initialize_epoch_cache( state: &mut BeaconState, spec: &ChainSpec, diff --git a/consensus/state_processing/src/per_epoch_processing/single_pass.rs b/consensus/state_processing/src/per_epoch_processing/single_pass.rs index ae1e3300435..e3c25fff074 100644 --- a/consensus/state_processing/src/per_epoch_processing/single_pass.rs +++ b/consensus/state_processing/src/per_epoch_processing/single_pass.rs @@ -11,6 +11,7 @@ use itertools::izip; use safe_arith::{SafeArith, SafeArithIter}; use std::cmp::{max, min}; use std::collections::{BTreeSet, HashMap}; +use tracing::instrument; use types::{ consts::altair::{ NUM_FLAG_INDICES, PARTICIPATION_FLAG_WEIGHTS, TIMELY_HEAD_FLAG_INDEX, @@ -134,6 +135,7 @@ impl ValidatorInfo { } } +#[instrument(skip_all)] pub fn process_epoch_single_pass( state: &mut BeaconState, spec: &ChainSpec, diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index d1abbf552a2..98c9e858b6f 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -1874,6 +1874,7 @@ impl BeaconState { } /// Build the total active balance cache for the current epoch if it is not already built. + #[instrument(skip_all)] pub fn build_total_active_balance_cache(&mut self, spec: &ChainSpec) -> Result<(), Error> { if self .get_total_active_balance_at_epoch(self.current_epoch()) From a8055ae44ef26dd5c245b55f0445c34d8f3dec47 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 10:30:08 +1000 Subject: [PATCH 19/35] disable resource to test root span not yet received issue --- lighthouse/src/main.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index e8c21f5fdfb..0bf325ab200 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -689,11 +689,11 @@ fn run( let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() .with_batch_exporter(exporter) - .with_resource( - opentelemetry_sdk::Resource::builder() - .with_service_name("lighthouse") - .build(), - ) + // .with_resource( + // opentelemetry_sdk::Resource::builder() + // .with_service_name("lighthouse") + // .build(), + // ) .build(); let tracer = provider.tracer("lighthouse"); From e4e974686662e53ca4e6590c9114219b59212e99 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 11:20:24 +1000 Subject: [PATCH 20/35] Clean ups. --- Cargo.toml | 8 ++--- book/src/help_bn.md | 3 ++ book/src/help_general.md | 3 ++ book/src/help_vc.md | 3 ++ book/src/help_vm.md | 3 ++ book/src/help_vm_create.md | 3 ++ book/src/help_vm_import.md | 3 ++ book/src/help_vm_move.md | 3 ++ lighthouse/Cargo.toml | 8 ++--- lighthouse/src/main.rs | 62 +++++++++++++++++++++++--------------- 10 files changed, 67 insertions(+), 32 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 82d63d42d88..33b79f3518a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -193,6 +193,9 @@ network = { path = "beacon_node/network" } node_test_rig = { path = "testing/node_test_rig" } num_cpus = "1" once_cell = "1.17.1" +opentelemetry = "0.30.0" +opentelemetry-otlp = { version = "0.30.0", features = ["grpc-tonic"] } +opentelemetry_sdk = "0.30.0" operation_pool = { path = "beacon_node/operation_pool" } parking_lot = "0.12" paste = "1" @@ -253,6 +256,7 @@ tracing = "0.1.40" tracing-appender = "0.2" tracing-core = "0.1" tracing-log = "0.2" +tracing-opentelemetry = "0.31.0" tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } tree_hash = "0.10.0" tree_hash_derive = "0.10.0" @@ -275,10 +279,6 @@ xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb zeroize = { version = "1", features = ["zeroize_derive", "serde"] } zip = "0.6" zstd = "0.13" -tracing-opentelemetry = "0.31.0" -opentelemetry_sdk = "0.30.0" -opentelemetry-otlp = { version = "0.30.0", features = ["grpc-tonic"] } -opentelemetry = "0.30.0" [profile.maxperf] inherits = "release" diff --git a/book/src/help_bn.md b/book/src/help_bn.md index 642add152e0..475c2e43adc 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -392,6 +392,9 @@ Options: database. --target-peers The target number of peers. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. --trusted-peers One or more comma-delimited trusted peer ids which always have the highest score according to the peer scoring system. diff --git a/book/src/help_general.md b/book/src/help_general.md index fbc3ca25578..2beaf78884c 100644 --- a/book/src/help_general.md +++ b/book/src/help_general.md @@ -76,6 +76,9 @@ Options: Path to directory containing eth2_testnet specs. Defaults to a hard-coded Lighthouse testnet. Only effective if there is no existing database. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. -V, --version Print version diff --git a/book/src/help_vc.md b/book/src/help_vc.md index 0bc4bbf53d9..ed0aaca2b18 100644 --- a/book/src/help_vc.md +++ b/book/src/help_vc.md @@ -134,6 +134,9 @@ Options: Path to directory containing eth2_testnet specs. Defaults to a hard-coded Lighthouse testnet. Only effective if there is no existing database. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. --validator-registration-batch-size Defines the number of validators per validator/register_validator request sent to the BN. This value can be reduced to avoid timeouts diff --git a/book/src/help_vm.md b/book/src/help_vm.md index f58537ae1ca..206fcbf47a7 100644 --- a/book/src/help_vm.md +++ b/book/src/help_vm.md @@ -77,6 +77,9 @@ Options: Path to directory containing eth2_testnet specs. Defaults to a hard-coded Lighthouse testnet. Only effective if there is no existing database. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. Flags: --disable-log-timestamp diff --git a/book/src/help_vm_create.md b/book/src/help_vm_create.md index 96ae2612525..ff4e53849a4 100644 --- a/book/src/help_vm_create.md +++ b/book/src/help_vm_create.md @@ -93,6 +93,9 @@ Options: Path to directory containing eth2_testnet specs. Defaults to a hard-coded Lighthouse testnet. Only effective if there is no existing database. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. Flags: --disable-deposits diff --git a/book/src/help_vm_import.md b/book/src/help_vm_import.md index ca635be5f15..9e22c4dad04 100644 --- a/book/src/help_vm_import.md +++ b/book/src/help_vm_import.md @@ -73,6 +73,9 @@ Options: Path to directory containing eth2_testnet specs. Defaults to a hard-coded Lighthouse testnet. Only effective if there is no existing database. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. --validators-file The path to a JSON file containing a list of validators to be imported to the validator client. This file is usually named "validators.json". diff --git a/book/src/help_vm_move.md b/book/src/help_vm_move.md index b7320ca2905..50d65d8122a 100644 --- a/book/src/help_vm_move.md +++ b/book/src/help_vm_move.md @@ -82,6 +82,9 @@ Options: Path to directory containing eth2_testnet specs. Defaults to a hard-coded Lighthouse testnet. Only effective if there is no existing database. + --telemetry-collector-url + URL of the OpenTelemetry collector to export tracing spans (e.g., + http://localhost:4317). If not set, tracing export is disabled. --validators The validators to be moved. Either a list of 0x-prefixed validator pubkeys or the keyword "all". diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index 08741d72a57..820b4753491 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -60,6 +60,9 @@ lighthouse_version = { workspace = true } logging = { workspace = true } malloc_utils = { workspace = true } metrics = { workspace = true } +opentelemetry = { workspace = true } +opentelemetry-otlp = { workspace = true } +opentelemetry_sdk = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } @@ -67,15 +70,12 @@ slasher = { workspace = true } store = { workspace = true } task_executor = { workspace = true } tracing = { workspace = true } +tracing-opentelemetry = { workspace = true } tracing-subscriber = { workspace = true } types = { workspace = true } unused_port = { workspace = true } validator_client = { workspace = true } validator_manager = { path = "../validator_manager" } -tracing-opentelemetry = { workspace = true } -opentelemetry_sdk = { workspace = true } -opentelemetry-otlp = { workspace = true } -opentelemetry = { workspace = true } [target.'cfg(any(target_os = "windows", features = "heaptrack"))'.dependencies] malloc_utils = { workspace = true } diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index 0bf325ab200..c160b12288c 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -280,6 +280,18 @@ fn main() { .default_value("info") .display_order(0) ) + .arg( + Arg::new("telemetry-collector-url") + .long("telemetry-collector-url") + .value_name("URL") + .help( + "URL of the OpenTelemetry collector to export tracing spans \ + (e.g., http://localhost:4317). If not set, tracing export is disabled.", + ) + .action(ArgAction::Set) + .global(true) + .display_order(0) + ) .arg( Arg::new("datadir") .long("datadir") @@ -680,31 +692,33 @@ fn run( .eth2_network_config(eth2_network_config)? .build()?; - let telemetry_layer = environment.runtime().block_on(async { - let exporter = opentelemetry_otlp::SpanExporter::builder() - .with_tonic() - .with_endpoint("http://localhost:4317") - .build() - .map_err(|e| format!("Failed to create OTLP exporter: {:?}", e))?; - - let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() - .with_batch_exporter(exporter) - // .with_resource( - // opentelemetry_sdk::Resource::builder() - // .with_service_name("lighthouse") - // .build(), - // ) - .build(); - - let tracer = provider.tracer("lighthouse"); - Ok::<_, String>( - tracing_opentelemetry::layer() - .with_tracer(tracer) - .with_filter(workspace_filter), - ) - })?; + if let Some(telemetry_collector_url) = matches.get_one::("telemetry-collector-url") { + let telemetry_layer = environment.runtime().block_on(async { + let exporter = opentelemetry_otlp::SpanExporter::builder() + .with_tonic() + .with_endpoint(telemetry_collector_url) + .build() + .map_err(|e| format!("Failed to create OTLP exporter: {:?}", e))?; + + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .with_resource( + opentelemetry_sdk::Resource::builder() + .with_service_name("lighthouse") + .build(), + ) + .build(); + + let tracer = provider.tracer("lighthouse"); + Ok::<_, String>( + tracing_opentelemetry::layer() + .with_tracer(tracer) + .with_filter(workspace_filter), + ) + })?; - logging_layers.push(telemetry_layer.boxed()); + logging_layers.push(telemetry_layer.boxed()); + } #[cfg(feature = "console-subscriber")] { From 533ecc0349d4a2fb9276d813ad372b4928b98185 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 12:44:02 +1000 Subject: [PATCH 21/35] Revert potential bad change from 62940aa1 (resulted in missing root spans) --- beacon_node/beacon_chain/src/block_verification.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 59399be56c4..d29833cd627 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, debug_span, error, info_span, instrument, Instrument, Span}; +use tracing::{debug, debug_span, error, info_span, instrument}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -1442,10 +1442,7 @@ impl ExecutionPendingBlock { started_execution, ); } - let payload_verification_status = payload_notifier - .notify_new_payload() - .instrument(info_span!(parent: &Span::current(), "notify_new_payload", block_root = %block_root)) - .await?; + let payload_verification_status = payload_notifier.notify_new_payload().await?; Ok(PayloadVerificationOutcome { payload_verification_status, @@ -1468,7 +1465,7 @@ impl ExecutionPendingBlock { */ let catchup_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CATCHUP_STATE); - let catchup_span = info_span!("catchup_state", outcome = "").entered(); + let mut state = parent.pre_state; // The block must have a higher slot than its parent. @@ -1542,7 +1539,6 @@ impl ExecutionPendingBlock { } } metrics::stop_timer(catchup_timer); - drop(catchup_span); let block_slot = block.slot(); let state_current_epoch = state.current_epoch(); From 29d65bc5839c719d97af48102e05ba5aa62ce97b Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 13:07:18 +1000 Subject: [PATCH 22/35] Add instrument back in. --- beacon_node/beacon_chain/src/block_verification.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 59399be56c4..d29833cd627 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, debug_span, error, info_span, instrument, Instrument, Span}; +use tracing::{debug, debug_span, error, info_span, instrument}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -1442,10 +1442,7 @@ impl ExecutionPendingBlock { started_execution, ); } - let payload_verification_status = payload_notifier - .notify_new_payload() - .instrument(info_span!(parent: &Span::current(), "notify_new_payload", block_root = %block_root)) - .await?; + let payload_verification_status = payload_notifier.notify_new_payload().await?; Ok(PayloadVerificationOutcome { payload_verification_status, @@ -1468,7 +1465,7 @@ impl ExecutionPendingBlock { */ let catchup_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CATCHUP_STATE); - let catchup_span = info_span!("catchup_state", outcome = "").entered(); + let mut state = parent.pre_state; // The block must have a higher slot than its parent. @@ -1542,7 +1539,6 @@ impl ExecutionPendingBlock { } } metrics::stop_timer(catchup_timer); - drop(catchup_span); let block_slot = block.slot(); let state_current_epoch = state.current_epoch(); From 3ac075fb862152deaab83a76cd49378e807291d9 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 13:10:41 +1000 Subject: [PATCH 23/35] Make `process_block` a root span. --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 1e767b1b0c8..1249244bb3e 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3383,7 +3383,7 @@ impl BeaconChain { /// /// Returns an `Err` if the given block was invalid, or an error was encountered during /// verification. - #[instrument(skip_all, fields(block_root = ?block_root, block_source = %block_source))] + #[instrument(skip_all, fields(block_root = ?block_root, block_source = %block_source), parent = None)] pub async fn process_block>( self: &Arc, block_root: Hash256, From fb3bf311e755ff3534e67d4a9e3918d2fbfaa4d4 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 14:38:41 +1000 Subject: [PATCH 24/35] More fixes --- beacon_node/beacon_chain/src/beacon_chain.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 1249244bb3e..3d48c0e3cc6 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3383,7 +3383,7 @@ impl BeaconChain { /// /// Returns an `Err` if the given block was invalid, or an error was encountered during /// verification. - #[instrument(skip_all, fields(block_root = ?block_root, block_source = %block_source), parent = None)] + #[instrument(skip_all, fields(block_root = ?block_root, block_source = %block_source))] pub async fn process_block>( self: &Arc, block_root: Hash256, @@ -3414,7 +3414,9 @@ impl BeaconChain { // A small closure to group the verification and import errors. let chain = self.clone(); + let current_span = tracing::Span::current(); let import_block = async move { + let _ = current_span.enter(); let execution_pending = unverified_block.into_execution_pending_block( block_root, &chain, @@ -3446,8 +3448,7 @@ impl BeaconChain { self.check_block_availability_and_import(block).await } } - } - .instrument(info_span!(parent: &Span::current(), "import_block_future")); + }; // Verify and import the block. match import_block.await { @@ -3749,7 +3750,7 @@ impl BeaconChain { } } - #[instrument(skip_all, fields(block_root = %block.import_data.block_root))] + #[instrument(skip_all)] pub async fn import_available_block( self: &Arc, block: Box>, @@ -3822,13 +3823,7 @@ impl BeaconChain { /// An error is returned if the block was unable to be imported. It may be partially imported /// (i.e., this function is not atomic). #[allow(clippy::too_many_arguments)] - #[instrument( - skip_all, - fields( - block_root = ?block_root, - slot = %signed_block.message().slot() - ) - )] + #[instrument(skip_all)] fn import_block( &self, signed_block: AvailableBlock, @@ -4031,7 +4026,7 @@ impl BeaconChain { ops.push(StoreOp::PutBlock(block_root, signed_block.clone())); ops.push(StoreOp::PutState(block.state_root(), &state)); - let db_span = tracing::info_span!("persist_blocks_and_blobs",).entered(); + let db_span = tracing::info_span!("persist_blocks_and_blobs").entered(); if let Err(e) = self.store.do_atomically_with_block_and_blobs_cache(ops) { error!( From 333773f1421c0383d348b5d019d80a85fc81ae87 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 14:44:42 +1000 Subject: [PATCH 25/35] More fixes --- beacon_node/beacon_chain/src/beacon_chain.rs | 16 ++++++++-------- consensus/fork_choice/src/fork_choice.rs | 4 ++-- consensus/types/src/beacon_state.rs | 14 +++++++------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 3d48c0e3cc6..bdda66cd3bb 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3414,7 +3414,7 @@ impl BeaconChain { // A small closure to group the verification and import errors. let chain = self.clone(); - let current_span = tracing::Span::current(); + let current_span = Span::current(); let import_block = async move { let _ = current_span.enter(); let execution_pending = unverified_block.into_execution_pending_block( @@ -3502,7 +3502,7 @@ impl BeaconChain { /// get a fully `ExecutedBlock`. /// /// An error is returned if the verification handle couldn't be awaited. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub async fn into_executed_block( self: Arc, execution_pending_block: ExecutionPendingBlock, @@ -3881,7 +3881,7 @@ impl BeaconChain { // However, latency between the VC and the BN might cause the VC to produce attestations at // a previous slot. if state.current_epoch().saturating_add(1_u64) >= current_epoch { - let _attester_span = tracing::debug_span!("attester_cache_update").entered(); + let _attester_span = debug_span!("attester_cache_update").entered(); self.attester_cache .maybe_cache_state(&state, block_root, &self.spec) .map_err(BeaconChainError::from)?; @@ -4026,7 +4026,7 @@ impl BeaconChain { ops.push(StoreOp::PutBlock(block_root, signed_block.clone())); ops.push(StoreOp::PutState(block.state_root(), &state)); - let db_span = tracing::info_span!("persist_blocks_and_blobs").entered(); + let db_span = info_span!("persist_blocks_and_blobs").entered(); if let Err(e) = self.store.do_atomically_with_block_and_blobs_cache(ops) { error!( @@ -4176,7 +4176,7 @@ impl BeaconChain { } /// Process a block for the validator monitor, including all its constituent messages. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] fn import_block_update_validator_monitor( &self, block: BeaconBlockRef, @@ -4271,7 +4271,7 @@ impl BeaconChain { /// Iterate through the attestations in the block and register them as "observed". /// /// This will stop us from propagating them on the gossip network. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] fn import_block_observe_attestations( &self, block: BeaconBlockRef, @@ -4334,7 +4334,7 @@ impl BeaconChain { } /// If a slasher is configured, provide the attestations from the block. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] fn import_block_update_slasher( &self, block: BeaconBlockRef, @@ -4433,7 +4433,7 @@ impl BeaconChain { // For the current and next epoch of this state, ensure we have the shuffling from this // block in our cache. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] fn import_block_update_shuffling_cache( &self, block_root: Hash256, diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index adcee841ecd..7d3b521044c 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -472,7 +472,7 @@ where /// Is equivalent to: /// /// https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/fork-choice.md#get_head - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn get_head( &mut self, system_time_current_slot: Slot, @@ -650,7 +650,7 @@ where #[instrument( skip_all, fields( - block_root = %block_root, slot = %block.slot(), block_delay = ?block_delay, payload_verification_status = ?payload_verification_status + fork_choice_block_delay = ?block_delay, payload_verification_status = ?payload_verification_status ))] pub fn on_block>( &mut self, diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index 98c9e858b6f..78abcaf71e1 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -1933,7 +1933,7 @@ impl BeaconState { } /// Build all caches (except the tree hash cache), if they need to be built. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn build_caches(&mut self, spec: &ChainSpec) -> Result<(), Error> { self.build_all_committee_caches(spec)?; self.update_pubkey_cache()?; @@ -1944,7 +1944,7 @@ impl BeaconState { } /// Build all committee caches, if they need to be built. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn build_all_committee_caches(&mut self, spec: &ChainSpec) -> Result<(), Error> { self.build_committee_cache(RelativeEpoch::Previous, spec)?; self.build_committee_cache(RelativeEpoch::Current, spec)?; @@ -1953,7 +1953,7 @@ impl BeaconState { } /// Build the exit cache, if it needs to be built. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn build_exit_cache(&mut self, spec: &ChainSpec) -> Result<(), Error> { if self.exit_cache().check_initialized().is_err() { *self.exit_cache_mut() = ExitCache::new(self.validators(), spec)?; @@ -1962,7 +1962,7 @@ impl BeaconState { } /// Build the slashings cache if it needs to be built. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn build_slashings_cache(&mut self) -> Result<(), Error> { let latest_block_slot = self.latest_block_header().slot; if !self.slashings_cache().is_initialized(latest_block_slot) { @@ -2000,7 +2000,7 @@ impl BeaconState { } /// Build a committee cache, unless it is has already been built. - #[instrument(skip_all, fields(relative_epoch))] + #[instrument(skip_all, level = "debug")] pub fn build_committee_cache( &mut self, relative_epoch: RelativeEpoch, @@ -2121,7 +2121,7 @@ impl BeaconState { /// /// Adds all `pubkeys` from the `validators` which are not already in the cache. Will /// never re-add a pubkey. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn update_pubkey_cache(&mut self) -> Result<(), Error> { let mut pubkey_cache = mem::take(self.pubkey_cache_mut()); let start_index = pubkey_cache.len(); @@ -2202,7 +2202,7 @@ impl BeaconState { /// Compute the tree hash root of the state using the tree hash cache. /// /// Initialize the tree hash cache if it isn't already initialized. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn update_tree_hash_cache<'a>(&'a mut self) -> Result { self.apply_pending_mutations()?; map_beacon_state_ref!(&'a _, self.to_ref(), |inner, cons| { From e1f584ec30b31012e76ee8db84a7bdea87c7fa41 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 16:14:46 +1000 Subject: [PATCH 26/35] Fix span levels and add tracing to gossip/rpc block and blob paths. --- beacon_node/beacon_chain/src/beacon_chain.rs | 12 +++- .../beacon_chain/src/blob_verification.rs | 2 +- .../beacon_chain/src/block_verification.rs | 45 +++++++++------ .../beacon_chain/src/canonical_head.rs | 3 +- .../src/data_availability_checker.rs | 8 ++- .../src/data_column_verification.rs | 4 +- .../beacon_chain/src/fetch_blobs/mod.rs | 7 ++- beacon_node/beacon_chain/src/kzg_utils.rs | 2 + .../beacon_chain/src/state_advance_timer.rs | 6 +- .../lighthouse_network/tests/common.rs | 8 ++- .../lighthouse_network/tests/rpc_tests.rs | 56 ++++++++++++------- .../gossip_methods.rs | 15 +++-- .../network_beacon_processor/sync_methods.rs | 7 ++- beacon_node/network/src/sync/peer_sampling.rs | 0 beacon_node/store/src/hot_cold_store.rs | 4 +- beacon_node/store/src/state_cache.rs | 2 +- consensus/fork_choice/src/fork_choice.rs | 3 +- .../update_progressive_balances_cache.rs | 2 +- consensus/state_processing/src/epoch_cache.rs | 2 +- consensus/types/src/beacon_state.rs | 2 +- 20 files changed, 128 insertions(+), 62 deletions(-) delete mode 100644 beacon_node/network/src/sync/peer_sampling.rs diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index bdda66cd3bb..2ad2aeb41d2 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -126,7 +126,7 @@ use store::{ }; use task_executor::{ShutdownReason, TaskExecutor}; use tokio_stream::Stream; -use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument, Span}; +use tracing::{debug, debug_span, error, info, info_span, instrument, trace, warn, Span}; use tree_hash::TreeHash; use types::blob_sidecar::FixedBlobSidecarList; use types::data_column_sidecar::ColumnIndex; @@ -2203,6 +2203,7 @@ impl BeaconChain { }) } + #[instrument(skip_all, level = "trace")] pub fn verify_data_column_sidecar_for_gossip( self: &Arc, data_column_sidecar: Arc>, @@ -2215,6 +2216,7 @@ impl BeaconChain { }) } + #[instrument(skip_all, level = "trace")] pub fn verify_blob_sidecar_for_gossip( self: &Arc, blob_sidecar: Arc>, @@ -2974,10 +2976,12 @@ impl BeaconChain { block: Arc>, ) -> Result, BlockError> { let chain = self.clone(); + let span = Span::current(); self.task_executor .clone() .spawn_blocking_handle( move || { + let _guard = span.enter(); let slot = block.slot(); let graffiti_string = block.message().body().graffiti().as_utf8_lossy(); @@ -3006,7 +3010,7 @@ impl BeaconChain { } } }, - "payload_verification_handle", + "gossip_block_verification_handle", ) .ok_or(BeaconChainError::RuntimeShutdown)? .await @@ -3015,6 +3019,7 @@ impl BeaconChain { /// Cache the blob in the processing cache, process it, then evict it from the cache if it was /// imported or errors. + #[instrument(skip_all, level = "debug")] pub async fn process_gossip_blob( self: &Arc, blob: GossipVerifiedBlob, @@ -3088,6 +3093,7 @@ impl BeaconChain { /// Cache the blobs in the processing cache, process it, then evict it from the cache if it was /// imported or errors. + #[instrument(skip_all, level = "debug")] pub async fn process_rpc_blobs( self: &Arc, slot: Slot, @@ -3861,7 +3867,7 @@ impl BeaconChain { // Only take a write lock if there are new keys to import. if state.validators().len() > pubkey_cache.len() { - let _pubkey_span = tracing::debug_span!( + let _pubkey_span = debug_span!( "pubkey_cache_update", new_validators = tracing::field::Empty, cache_len_before = pubkey_cache.len() diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 86581195986..3ac30b74619 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -374,7 +374,7 @@ impl IntoIterator for KzgVerifiedBlobList { /// /// Note: This function should be preferred over calling `verify_kzg_for_blob` /// in a loop since this function kzg verifies a list of blobs more efficiently. -#[instrument(skip_all)] +#[instrument(skip_all, level = "debug")] pub fn verify_kzg_for_blob_list<'a, E: EthSpec, I>( blob_iter: I, kzg: &'a Kzg, diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index d29833cd627..85b41cd5878 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, debug_span, error, info_span, instrument}; +use tracing::{debug, debug_span, error, info_span, instrument, Span}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -790,7 +790,7 @@ pub fn build_blob_data_column_sidecars( /// /// Used to allow functions to accept blocks at various stages of verification. pub trait IntoExecutionPendingBlock: Sized { - #[instrument(skip_all, fields(block_root = %block_root))] + #[instrument(skip_all, level = "debug")] fn into_execution_pending_block( self, block_root: Hash256, @@ -824,13 +824,12 @@ impl GossipVerifiedBlock { /// on the p2p network. /// /// Returns an error if the block is invalid, or if the block was unable to be verified. + #[instrument(name = "verify_gossip_block", skip_all)] + #[allow(clippy::too_many_arguments)] pub fn new( block: Arc>, chain: &BeaconChain, ) -> Result { - let span = debug_span!("GossipVerifiedBlock::new", block_root = ""); - let _guard = span.enter(); - // If the block is valid for gossip we don't supply it to the slasher here because // we assume it will be transformed into a fully verified block. We *do* need to supply // it to the slasher if an error occurs, because that's the end of this block's journey, @@ -847,7 +846,8 @@ impl GossipVerifiedBlock { ) }) .inspect(|block| { - span.record("block_root", block.block_root.to_string()); + let current_span = Span::current(); + current_span.record("block_root", block.block_root.to_string()); }) } @@ -1088,7 +1088,11 @@ impl GossipVerifiedBlock { impl IntoExecutionPendingBlock for GossipVerifiedBlock { /// Completes verification of the wrapped `block`. - #[instrument(name = "gossip_block_into_execution_pending_block_slashable", skip_all, fields(block_root = %block_root))] + #[instrument( + name = "gossip_block_into_execution_pending_block_slashable", + level = "debug" + skip_all, + )] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1194,7 +1198,7 @@ impl SignatureVerifiedBlock { /// Finishes signature verification on the provided `GossipVerifedBlock`. Does not re-verify /// the proposer signature. - #[instrument(skip_all, fields(block_root = %from.block_root))] + #[instrument(skip_all, level = "debug")] pub fn from_gossip_verified_block( from: GossipVerifiedBlock, chain: &BeaconChain, @@ -1222,8 +1226,7 @@ impl SignatureVerifiedBlock { signature_verifier .include_all_signatures_except_proposal(block.as_ref(), &mut consensus_context)?; - let sig_verify_span = info_span!("signature_verify", result = "started"); - let _guard = sig_verify_span.enter(); + let sig_verify_span = info_span!("signature_verify", result = "started").entered(); let result = signature_verifier.verify(); match result { Ok(_) => { @@ -1268,7 +1271,11 @@ impl SignatureVerifiedBlock { impl IntoExecutionPendingBlock for SignatureVerifiedBlock { /// Completes verification of the wrapped `block`. - #[instrument(name = "sig_verified_block_into_execution_pending_block_slashable", skip_all, fields(block_root = %block_root))] + #[instrument( + name = "sig_verified_block_into_execution_pending_block_slashable", + level = "debug" + skip_all, + )] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1306,7 +1313,11 @@ impl IntoExecutionPendingBlock for SignatureVerifiedBloc impl IntoExecutionPendingBlock for RpcBlock { /// Verifies the `SignedBeaconBlock` by first transforming it into a `SignatureVerifiedBlock` /// and then using that implementation of `IntoExecutionPendingBlock` to complete verification. - #[instrument(name = "rpc_block_into_execution_pending_block_slashable", skip_all, fields(block_root = %block_root))] + #[instrument( + name = "rpc_block_into_execution_pending_block_slashable", + level = "debug" + skip_all, + )] fn into_execution_pending_block_slashable( self, block_root: Hash256, @@ -1346,7 +1357,7 @@ impl ExecutionPendingBlock { /// verification must be done upstream (e.g., via a `SignatureVerifiedBlock` /// /// Returns an error if the block is invalid, or if the block was unable to be verified. - #[instrument(skip_all, fields(block_root = %block_root))] + #[instrument(skip_all, level = "debug")] pub fn from_signature_verified_components( block: MaybeAvailableBlock, block_root: Hash256, @@ -1877,7 +1888,7 @@ fn verify_parent_block_is_known( /// Returns `Err(BlockError::ParentUnknown)` if the parent is not found, or if an error occurs /// whilst attempting the operation. #[allow(clippy::type_complexity)] -#[instrument(skip_all, fields(parent_root = %block.parent_root()))] +#[instrument(skip_all, level = "debug", fields(parent_root = %block.parent_root()))] fn load_parent>( block: B, chain: &BeaconChain, @@ -1902,9 +1913,7 @@ fn load_parent>( }); } - let db_read_span = info_span!("block_processing_db_read"); - let _guard = db_read_span.enter(); - + let _db_read_span = debug_span!("block_processing_db_read").entered(); let db_read_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_READ); let result = { @@ -2058,7 +2067,7 @@ impl BlockBlobError for GossipDataColumnError { /// and `Cow::Borrowed(state)` will be returned. Otherwise, the state will be cloned, cheaply /// advanced and then returned as a `Cow::Owned`. The end result is that the given `state` is never /// mutated to be invalid (in fact, it is never changed beyond a simple committee cache build). -#[instrument(skip(state, spec))] +#[instrument(skip(state, spec), level = "debug")] pub fn cheap_state_advance_to_obtain_committees<'a, E: EthSpec, Err: BlockBlobError>( state: &'a mut BeaconState, state_root_opt: Option, diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index f80a2d4089a..238262bd7fb 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -55,7 +55,7 @@ use std::sync::Arc; use std::time::Duration; use store::{iter::StateRootsIterator, KeyValueStore, KeyValueStoreOp, StoreItem}; use task_executor::{JoinHandle, ShutdownReason}; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, instrument, warn}; use types::*; /// Simple wrapper around `RwLock` that uses private visibility to prevent any other modules from @@ -471,6 +471,7 @@ impl BeaconChain { /// Execute the fork choice algorithm and enthrone the result as the canonical head. /// /// This method replaces the old `BeaconChain::fork_choice` method. + #[instrument(skip_all, level = "debug")] pub async fn recompute_head_at_current_slot(self: &Arc) { match self.slot() { Ok(current_slot) => self.recompute_head_at_slot(current_slot).await, diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index e2947bb2d6f..d974445ebc2 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -16,7 +16,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; -use tracing::{debug, error}; +use tracing::{debug, error, instrument}; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, @@ -203,6 +203,7 @@ impl DataAvailabilityChecker { /// Put a list of blobs received via RPC into the availability cache. This performs KZG /// verification on the blobs in the list. + #[instrument(skip_all, level = "trace")] pub fn put_rpc_blobs( &self, block_root: Hash256, @@ -230,6 +231,7 @@ impl DataAvailabilityChecker { /// Put a list of custody columns received via RPC into the availability cache. This performs KZG /// verification on the blobs in the list. #[allow(clippy::type_complexity)] + #[instrument(skip_all, level = "trace")] pub fn put_rpc_custody_columns( &self, block_root: Hash256, @@ -254,6 +256,7 @@ impl DataAvailabilityChecker { /// Otherwise cache the blob sidecar. /// /// This should only accept gossip verified blobs, so we should not have to worry about dupes. + #[instrument(skip_all, level = "trace")] pub fn put_gossip_verified_blobs< I: IntoIterator>, O: ObservationStrategy, @@ -266,6 +269,7 @@ impl DataAvailabilityChecker { .put_kzg_verified_blobs(block_root, blobs.into_iter().map(|b| b.into_inner())) } + #[instrument(skip_all, level = "trace")] pub fn put_kzg_verified_blobs>>( &self, block_root: Hash256, @@ -280,6 +284,7 @@ impl DataAvailabilityChecker { /// Otherwise cache the data column sidecar. /// /// This should only accept gossip verified data columns, so we should not have to worry about dupes. + #[instrument(skip_all, level = "trace")] pub fn put_gossip_verified_data_columns< O: ObservationStrategy, I: IntoIterator>, @@ -297,6 +302,7 @@ impl DataAvailabilityChecker { .put_kzg_verified_data_columns(block_root, custody_columns) } + #[instrument(skip_all, level = "trace")] pub fn put_kzg_verified_custody_data_columns< I: IntoIterator>, >( diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index e079b5ab78c..4ededac5b8f 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -15,7 +15,7 @@ use ssz_derive::{Decode, Encode}; use std::iter; use std::marker::PhantomData; use std::sync::Arc; -use tracing::debug; +use tracing::{debug, instrument}; use types::data_column_sidecar::ColumnIndex; use types::{ BeaconStateError, ChainSpec, DataColumnSidecar, DataColumnSubnetId, EthSpec, Hash256, @@ -412,6 +412,7 @@ impl KzgVerifiedCustodyDataColumn { /// Complete kzg verification for a `DataColumnSidecar`. /// /// Returns an error if the kzg verification check fails. +#[instrument(skip_all, level = "debug")] pub fn verify_kzg_for_data_column( data_column: Arc>, kzg: &Kzg, @@ -426,6 +427,7 @@ pub fn verify_kzg_for_data_column( /// /// Note: This function should be preferred over calling `verify_kzg_for_data_column` /// in a loop since this function kzg verifies a list of data columns more efficiently. +#[instrument(skip_all, level = "debug")] pub fn verify_kzg_for_data_column_list<'a, E: EthSpec, I>( data_column_iter: I, kzg: &'a Kzg, diff --git a/beacon_node/beacon_chain/src/fetch_blobs/mod.rs b/beacon_node/beacon_chain/src/fetch_blobs/mod.rs index efc7854fa5f..b9360eb9def 100644 --- a/beacon_node/beacon_chain/src/fetch_blobs/mod.rs +++ b/beacon_node/beacon_chain/src/fetch_blobs/mod.rs @@ -33,7 +33,7 @@ use ssz_types::FixedVector; use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash; use std::collections::HashSet; use std::sync::Arc; -use tracing::{debug, warn}; +use tracing::{debug, instrument, warn, Span}; use types::blob_sidecar::BlobSidecarError; use types::data_column_sidecar::DataColumnSidecarError; use types::{ @@ -69,6 +69,7 @@ pub enum FetchEngineBlobError { /// Fetches blobs from the EL mempool and processes them. It also broadcasts unseen blobs or /// data columns (PeerDAS onwards) to the network, using the supplied `publish_fn`. +#[instrument(skip_all)] pub async fn fetch_and_process_engine_blobs( chain: Arc>, block_root: Hash256, @@ -141,6 +142,7 @@ async fn fetch_and_process_engine_blobs_inner( } } +#[instrument(skip_all, level = "debug")] async fn fetch_and_process_blobs_v1( chain_adapter: FetchBlobsBeaconAdapter, block_root: Hash256, @@ -233,6 +235,7 @@ async fn fetch_and_process_blobs_v1( Ok(Some(availability_processing_status)) } +#[instrument(skip_all, level = "debug")] async fn fetch_and_process_blobs_v2( chain_adapter: FetchBlobsBeaconAdapter, block_root: Hash256, @@ -342,10 +345,12 @@ async fn compute_custody_columns_to_import( let kzg = chain_adapter.kzg().clone(); let spec = chain_adapter.spec().clone(); let chain_adapter_cloned = chain_adapter.clone(); + let current_span = Span::current(); chain_adapter .executor() .spawn_blocking_handle( move || { + let _guard = current_span.enter(); let mut timer = metrics::start_timer_vec( &metrics::DATA_COLUMN_SIDECAR_COMPUTATION, &[&blobs.len().to_string()], diff --git a/beacon_node/beacon_chain/src/kzg_utils.rs b/beacon_node/beacon_chain/src/kzg_utils.rs index 4ac00a6e9b4..18159a59e5d 100644 --- a/beacon_node/beacon_chain/src/kzg_utils.rs +++ b/beacon_node/beacon_chain/src/kzg_utils.rs @@ -5,6 +5,7 @@ use kzg::{ use rayon::prelude::*; use ssz_types::{FixedVector, VariableList}; use std::sync::Arc; +use tracing::instrument; use types::beacon_block_body::KzgCommitments; use types::data_column_sidecar::{Cell, DataColumn, DataColumnSidecarError}; use types::{ @@ -163,6 +164,7 @@ pub fn verify_kzg_proof( } /// Build data column sidecars from a signed beacon block and its blobs. +#[instrument(skip_all, level = "debug", fields(blob_count = blobs.len()))] pub fn blobs_to_data_column_sidecars( blobs: &[&Blob], cell_proofs: Vec, diff --git a/beacon_node/beacon_chain/src/state_advance_timer.rs b/beacon_node/beacon_chain/src/state_advance_timer.rs index ad7e31a8f00..e5472fcf52f 100644 --- a/beacon_node/beacon_chain/src/state_advance_timer.rs +++ b/beacon_node/beacon_chain/src/state_advance_timer.rs @@ -25,7 +25,7 @@ use std::sync::{ }; use task_executor::TaskExecutor; use tokio::time::{sleep, sleep_until, Instant}; -use tracing::{debug, error, warn}; +use tracing::{debug, debug_span, error, instrument, warn, Instrument}; use types::{AttestationShufflingId, BeaconStateError, EthSpec, Hash256, RelativeEpoch, Slot}; /// If the head slot is more than `MAX_ADVANCE_DISTANCE` from the current slot, then don't perform @@ -253,7 +253,8 @@ async fn state_advance_timer( }, "fork_choice_advance_signal_tx", ); - }, + } + .instrument(debug_span!("fork_choice_advance")), "fork_choice_advance", ); } @@ -264,6 +265,7 @@ async fn state_advance_timer( /// slot then placed in the `state_cache` to be used for block verification. /// /// See the module-level documentation for rationale. +#[instrument(skip_all)] fn advance_head(beacon_chain: &Arc>) -> Result<(), Error> { let current_slot = beacon_chain.slot()?; diff --git a/beacon_node/lighthouse_network/tests/common.rs b/beacon_node/lighthouse_network/tests/common.rs index 96d252b53da..61f48a9a6fd 100644 --- a/beacon_node/lighthouse_network/tests/common.rs +++ b/beacon_node/lighthouse_network/tests/common.rs @@ -7,7 +7,7 @@ use lighthouse_network::{NetworkConfig, NetworkEvent}; use std::sync::Arc; use std::sync::Weak; use tokio::runtime::Runtime; -use tracing::{debug, error}; +use tracing::{debug, error, info_span, Instrument}; use tracing_subscriber::EnvFilter; use types::{ ChainSpec, EnrForkId, Epoch, EthSpec, FixedBytesExtended, ForkContext, ForkName, Hash256, @@ -204,7 +204,8 @@ pub async fn build_node_pair( } } } - }; + } + .instrument(info_span!("Sender", who = "sender")); let receiver_fut = async { loop { if let NetworkEvent::NewListenAddr(addr) = receiver.next_event().await { @@ -226,7 +227,8 @@ pub async fn build_node_pair( } } } - }; + } + .instrument(info_span!("Receiver", who = "receiver")); let joined = futures::future::join(sender_fut, receiver_fut); diff --git a/beacon_node/lighthouse_network/tests/rpc_tests.rs b/beacon_node/lighthouse_network/tests/rpc_tests.rs index 69c8fd22575..11fe93288f7 100644 --- a/beacon_node/lighthouse_network/tests/rpc_tests.rs +++ b/beacon_node/lighthouse_network/tests/rpc_tests.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::runtime::Runtime; use tokio::time::sleep; -use tracing::{debug, error, warn}; +use tracing::{debug, error, info_span, warn, Instrument}; use types::{ BeaconBlock, BeaconBlockAltair, BeaconBlockBase, BeaconBlockBellatrix, BlobSidecar, ChainSpec, EmptyBlock, Epoch, EthSpec, FixedBytesExtended, ForkName, Hash256, MinimalEthSpec, @@ -118,7 +118,8 @@ fn test_tcp_status_rpc() { _ => {} } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -142,7 +143,8 @@ fn test_tcp_status_rpc() { _ => {} // Ignore other events } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} @@ -246,7 +248,8 @@ fn test_tcp_blocks_by_range_chunked_rpc() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -287,7 +290,8 @@ fn test_tcp_blocks_by_range_chunked_rpc() { _ => {} // Ignore other events } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} @@ -379,7 +383,8 @@ fn test_blobs_by_range_chunked_rpc() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -413,7 +418,8 @@ fn test_blobs_by_range_chunked_rpc() { _ => {} // Ignore other events } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} @@ -485,7 +491,8 @@ fn test_tcp_blocks_by_range_over_limit() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -518,7 +525,8 @@ fn test_tcp_blocks_by_range_over_limit() { _ => {} // Ignore other events } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} @@ -607,7 +615,8 @@ fn test_tcp_blocks_by_range_chunked_rpc_terminates_correctly() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // determine messages to send (PeerId, RequestId). If some, indicates we still need to send // messages @@ -654,7 +663,8 @@ fn test_tcp_blocks_by_range_chunked_rpc_terminates_correctly() { } } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} @@ -740,7 +750,8 @@ fn test_tcp_blocks_by_range_single_empty_rpc() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -773,7 +784,8 @@ fn test_tcp_blocks_by_range_single_empty_rpc() { _ => {} // Ignore other events } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} _ = receiver_future => {} @@ -884,7 +896,8 @@ fn test_tcp_blocks_by_root_chunked_rpc() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -923,7 +936,8 @@ fn test_tcp_blocks_by_root_chunked_rpc() { _ => {} // Ignore other events } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} _ = receiver_future => {} @@ -1023,7 +1037,8 @@ fn test_tcp_blocks_by_root_chunked_rpc_terminates_correctly() { _ => {} // Ignore other behaviour events } } - }; + } + .instrument(info_span!("Sender")); // determine messages to send (PeerId, RequestId). If some, indicates we still need to send // messages @@ -1070,7 +1085,8 @@ fn test_tcp_blocks_by_root_chunked_rpc_terminates_correctly() { } } } - }; + } + .instrument(info_span!("Receiver")); tokio::select! { _ = sender_future => {} @@ -1123,7 +1139,8 @@ fn goodbye_test(log_level: &str, enable_logging: bool, protocol: Protocol) { _ => {} // Ignore other RPC messages } } - }; + } + .instrument(info_span!("Sender")); // build the receiver future let receiver_future = async { @@ -1133,7 +1150,8 @@ fn goodbye_test(log_level: &str, enable_logging: bool, protocol: Protocol) { return; } } - }; + } + .instrument(info_span!("Receiver")); let total_future = futures::future::join(sender_future, receiver_future); diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 47d15465063..df8885688ab 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -32,7 +32,7 @@ use std::sync::Arc; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::HotColdDBError; use tokio::sync::mpsc::error::TrySendError; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, instrument, trace, warn, Span}; use types::{ beacon_block::BlockImportSource, Attestation, AttestationData, AttestationRef, AttesterSlashing, BlobSidecar, DataColumnSidecar, DataColumnSubnetId, EthSpec, Hash256, @@ -603,6 +603,7 @@ impl NetworkBeaconProcessor { } } + #[instrument(skip_all, level = "trace", fields(slot = ?column_sidecar.slot(), block_root = ?column_sidecar.block_root(), index = column_sidecar.index))] pub async fn process_gossip_data_column_sidecar( self: &Arc, message_id: MessageId, @@ -760,6 +761,7 @@ impl NetworkBeaconProcessor { } #[allow(clippy::too_many_arguments)] + #[instrument(skip_all, level = "trace", fields(slot = ?blob_sidecar.slot(), block_root = ?blob_sidecar.block_root(), index = blob_sidecar.index))] pub async fn process_gossip_blob( self: &Arc, message_id: MessageId, @@ -927,7 +929,7 @@ impl NetworkBeaconProcessor { } } - pub async fn process_gossip_verified_blob( + async fn process_gossip_verified_blob( self: &Arc, peer_id: PeerId, verified_blob: GossipVerifiedBlob, @@ -995,7 +997,7 @@ impl NetworkBeaconProcessor { } } - pub async fn process_gossip_verified_data_column( + async fn process_gossip_verified_data_column( self: &Arc, peer_id: PeerId, verified_data_column: GossipVerifiedDataColumn, @@ -1097,6 +1099,7 @@ impl NetworkBeaconProcessor { /// /// Raises a log if there are errors. #[allow(clippy::too_many_arguments)] + #[instrument(skip_all, fields(block_root = tracing::field::Empty))] pub async fn process_gossip_block( self: Arc, message_id: MessageId, @@ -1118,6 +1121,7 @@ impl NetworkBeaconProcessor { .await { let block_root = gossip_verified_block.block_root; + Span::current().record("block_root", block_root.to_string()); if let Some(handle) = duplicate_cache.check_and_insert(block_root) { self.process_gossip_verified_block( @@ -1142,7 +1146,7 @@ impl NetworkBeaconProcessor { /// if it passes gossip propagation criteria, tell the network thread to forward it. /// /// Returns the `GossipVerifiedBlock` if verification passes and raises a log if there are errors. - pub async fn process_gossip_unverified_block( + async fn process_gossip_unverified_block( self: &Arc, message_id: MessageId, peer_id: PeerId, @@ -1425,6 +1429,7 @@ impl NetworkBeaconProcessor { /// Process the beacon block that has already passed gossip verification. /// /// Raises a log if there are errors. + #[instrument(skip_all)] pub async fn process_gossip_verified_block( self: Arc, peer_id: PeerId, @@ -1441,8 +1446,10 @@ impl NetworkBeaconProcessor { let publish_blobs = true; let self_clone = self.clone(); let block_clone = block.clone(); + let current_span = Span::current(); self.executor.spawn( async move { + let _guard = current_span.enter(); self_clone .fetch_engine_blobs_and_publish(block_clone, block_root, publish_blobs) .await diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 32c4705ea89..ea3a5bff1e6 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -21,7 +21,7 @@ use lighthouse_network::PeerAction; use std::sync::Arc; use std::time::Duration; use store::KzgCommitment; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, instrument, warn, Span}; use types::beacon_block_body::format_kzg_commitments; use types::blob_sidecar::FixedBlobSidecarList; use types::{BlockImportSource, DataColumnSidecarList, Epoch, Hash256}; @@ -97,6 +97,7 @@ impl NetworkBeaconProcessor { /// Attempt to process a block received from a direct RPC request. #[allow(clippy::too_many_arguments)] + #[instrument(skip_all, fields(?block_root))] pub async fn process_rpc_block( self: Arc>, block_root: Hash256, @@ -243,6 +244,7 @@ impl NetworkBeaconProcessor { } /// Attempt to process a list of blobs received from a direct RPC request. + #[instrument(skip_all, fields(?block_root, outcome = tracing::field::Empty))] pub async fn process_rpc_blobs( self: Arc>, block_root: Hash256, @@ -291,6 +293,7 @@ impl NetworkBeaconProcessor { match &result { Ok(AvailabilityProcessingStatus::Imported(hash)) => { + Span::current().record("outcome", "imported"); debug!( result = "imported block and blobs", %slot, @@ -300,6 +303,7 @@ impl NetworkBeaconProcessor { self.chain.recompute_head_at_current_slot().await; } Ok(AvailabilityProcessingStatus::MissingComponents(_, _)) => { + Span::current().record("outcome", "missing_components"); debug!( block_hash = %block_root, %slot, @@ -330,6 +334,7 @@ impl NetworkBeaconProcessor { }); } + #[instrument(skip_all, fields(?block_root))] pub async fn process_rpc_custody_columns( self: Arc>, block_root: Hash256, diff --git a/beacon_node/network/src/sync/peer_sampling.rs b/beacon_node/network/src/sync/peer_sampling.rs deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index da75f714a9b..355f95f5576 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -1040,7 +1040,7 @@ impl, Cold: ItemStore> HotColdDB /// - `result_state_root == state.canonical_root()` /// - `state.slot() <= max_slot` /// - `state.get_latest_block_root(result_state_root) == block_root` - #[instrument(skip(self, max_slot))] + #[instrument(skip(self, max_slot), level = "debug")] pub fn get_advanced_hot_state( &self, block_root: Hash256, @@ -1112,7 +1112,7 @@ impl, Cold: ItemStore> HotColdDB /// If this function returns `Some(state)` then that `state` will always have /// `latest_block_header` matching `block_root` but may not be advanced all the way through to /// `max_slot`. - #[instrument(skip(self))] + #[instrument(skip(self), level = "debug")] pub fn get_advanced_hot_state_from_cache( &self, block_root: Hash256, diff --git a/beacon_node/store/src/state_cache.rs b/beacon_node/store/src/state_cache.rs index ff77c05cc13..30d0e2b2d58 100644 --- a/beacon_node/store/src/state_cache.rs +++ b/beacon_node/store/src/state_cache.rs @@ -293,7 +293,7 @@ impl StateCache { None } - #[instrument(skip(self))] + #[instrument(skip(self), level = "debug")] pub fn get_by_block_root( &mut self, block_root: Hash256, diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index 7d3b521044c..a894f26ebe5 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -648,9 +648,10 @@ where /// here. #[allow(clippy::too_many_arguments)] #[instrument( + name = "fork_choice_on_block", skip_all, fields( - fork_choice_block_delay = ?block_delay, payload_verification_status = ?payload_verification_status + fork_choice_block_delay = ?block_delay ))] pub fn on_block>( &mut self, diff --git a/consensus/state_processing/src/common/update_progressive_balances_cache.rs b/consensus/state_processing/src/common/update_progressive_balances_cache.rs index 82ec4322479..f34ee83e6d6 100644 --- a/consensus/state_processing/src/common/update_progressive_balances_cache.rs +++ b/consensus/state_processing/src/common/update_progressive_balances_cache.rs @@ -12,7 +12,7 @@ use types::{ }; /// Initializes the `ProgressiveBalancesCache` if it is unbuilt. -#[instrument(skip_all)] +#[instrument(skip_all, level = "debug")] pub fn initialize_progressive_balances_cache( state: &mut BeaconState, spec: &ChainSpec, diff --git a/consensus/state_processing/src/epoch_cache.rs b/consensus/state_processing/src/epoch_cache.rs index 1fa119068b6..6654c6a7ef8 100644 --- a/consensus/state_processing/src/epoch_cache.rs +++ b/consensus/state_processing/src/epoch_cache.rs @@ -131,7 +131,7 @@ pub fn is_epoch_cache_initialized( .is_ok()) } -#[instrument(skip_all)] +#[instrument(skip_all, level = "debug")] pub fn initialize_epoch_cache( state: &mut BeaconState, spec: &ChainSpec, diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index 78abcaf71e1..c7dfabbfee0 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -1874,7 +1874,7 @@ impl BeaconState { } /// Build the total active balance cache for the current epoch if it is not already built. - #[instrument(skip_all)] + #[instrument(skip_all, level = "debug")] pub fn build_total_active_balance_cache(&mut self, spec: &ChainSpec) -> Result<(), Error> { if self .get_total_active_balance_at_epoch(self.current_epoch()) From c0991f52d428e13c484d27a7ef0dc06b821700fa Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 16:54:21 +1000 Subject: [PATCH 27/35] Add process chain segment --- beacon_node/beacon_chain/src/block_verification.rs | 1 + beacon_node/beacon_chain/src/data_availability_checker.rs | 1 + beacon_node/beacon_chain/src/data_column_verification.rs | 1 + beacon_node/beacon_chain/src/historical_blocks.rs | 3 ++- .../network/src/network_beacon_processor/sync_methods.rs | 7 +++++++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 85b41cd5878..e118abd752d 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -618,6 +618,7 @@ pub(crate) fn process_block_slash_info( mut chain_segment: Vec<(Hash256, RpcBlock)>, chain: &BeaconChain, diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index d974445ebc2..1126807c0f0 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -395,6 +395,7 @@ impl DataAvailabilityChecker { /// /// WARNING: This function assumes all required blobs are already present, it does NOT /// check if there are any missing blobs. + #[instrument(skip_all)] pub fn verify_kzg_for_rpc_blocks( &self, blocks: Vec>, diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index 4ededac5b8f..04edd7f343f 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -472,6 +472,7 @@ where Err(errors) } +#[instrument(skip_all, level = "debug")] pub fn validate_data_column_sidecar_for_gossip( data_column: Arc>, subnet: u64, diff --git a/beacon_node/beacon_chain/src/historical_blocks.rs b/beacon_node/beacon_chain/src/historical_blocks.rs index 57e19393165..453cc32cbb4 100644 --- a/beacon_node/beacon_chain/src/historical_blocks.rs +++ b/beacon_node/beacon_chain/src/historical_blocks.rs @@ -11,7 +11,7 @@ use std::time::Duration; use store::metadata::DataColumnInfo; use store::{AnchorInfo, BlobInfo, DBColumn, Error as StoreError, KeyValueStore, KeyValueStoreOp}; use strum::IntoStaticStr; -use tracing::debug; +use tracing::{debug, instrument}; use types::{FixedBytesExtended, Hash256, Slot}; /// Use a longer timeout on the pubkey cache. @@ -63,6 +63,7 @@ impl BeaconChain { /// would violate consistency then an `AnchorInfoConcurrentMutation` error will be returned. /// /// Return the number of blocks successfully imported. + #[instrument(skip_all)] pub fn import_historical_block_batch( &self, mut blocks: Vec>, diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index ea3a5bff1e6..e79db5d714e 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -420,6 +420,7 @@ impl NetworkBeaconProcessor { /// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync /// thread if more blocks are needed to process it. + #[instrument(skip_all, fields(sync_type = ?sync_type, downloaded_blocks = downloaded_blocks.len(), imported_blocks = tracing::field::Empty))] pub async fn process_chain_segment( &self, sync_type: ChainSegmentProcessId, @@ -438,6 +439,7 @@ impl NetworkBeaconProcessor { .await { (imported_blocks, Ok(_)) => { + Span::current().record("imported_blocks", imported_blocks); debug!( batch_epoch = %epoch, first_block_slot = start_slot, @@ -452,6 +454,7 @@ impl NetworkBeaconProcessor { } } (imported_blocks, Err(e)) => { + Span::current().record("imported_blocks", imported_blocks); debug!( batch_epoch = %epoch, first_block_slot = start_slot, @@ -487,6 +490,7 @@ impl NetworkBeaconProcessor { match self.process_backfill_blocks(downloaded_blocks) { (imported_blocks, Ok(_)) => { + Span::current().record("imported_blocks", imported_blocks); debug!( batch_epoch = %epoch, first_block_slot = start_slot, @@ -528,6 +532,7 @@ impl NetworkBeaconProcessor { } /// Helper function to process blocks batches which only consumes the chain and blocks to process. + #[instrument(skip_all, fields(result = tracing::field::Empty))] async fn process_blocks<'a>( &self, downloaded_blocks: impl Iterator>, @@ -540,6 +545,7 @@ impl NetworkBeaconProcessor { .await { ChainSegmentResult::Successful { imported_blocks } => { + Span::current().record("outcome", "success"); metrics::inc_counter(&metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_SUCCESS_TOTAL); if !imported_blocks.is_empty() { self.chain.recompute_head_at_current_slot().await; @@ -550,6 +556,7 @@ impl NetworkBeaconProcessor { imported_blocks, error, } => { + Span::current().record("outcome", "failed"); metrics::inc_counter(&metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_FAILED_TOTAL); let r = self.handle_failed_chain_segment(error); if !imported_blocks.is_empty() { From d340231eeec9f95617785d80e1c580e55fbe161c Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 21:35:26 +1000 Subject: [PATCH 28/35] Set parent to None on network beacon processors to prevent inheriting libp2p spans. --- .../src/network_beacon_processor/gossip_methods.rs | 8 ++++---- .../network/src/network_beacon_processor/sync_methods.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index df8885688ab..12517bf3df6 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -603,7 +603,7 @@ impl NetworkBeaconProcessor { } } - #[instrument(skip_all, level = "trace", fields(slot = ?column_sidecar.slot(), block_root = ?column_sidecar.block_root(), index = column_sidecar.index))] + #[instrument(skip_all, level = "trace", fields(slot = ?column_sidecar.slot(), block_root = ?column_sidecar.block_root(), index = column_sidecar.index), parent = None)] pub async fn process_gossip_data_column_sidecar( self: &Arc, message_id: MessageId, @@ -761,7 +761,7 @@ impl NetworkBeaconProcessor { } #[allow(clippy::too_many_arguments)] - #[instrument(skip_all, level = "trace", fields(slot = ?blob_sidecar.slot(), block_root = ?blob_sidecar.block_root(), index = blob_sidecar.index))] + #[instrument(skip_all, level = "trace", fields(slot = ?blob_sidecar.slot(), block_root = ?blob_sidecar.block_root(), index = blob_sidecar.index), parent = None)] pub async fn process_gossip_blob( self: &Arc, message_id: MessageId, @@ -1099,7 +1099,7 @@ impl NetworkBeaconProcessor { /// /// Raises a log if there are errors. #[allow(clippy::too_many_arguments)] - #[instrument(skip_all, fields(block_root = tracing::field::Empty))] + #[instrument(skip_all, fields(block_root = tracing::field::Empty), parent = None)] pub async fn process_gossip_block( self: Arc, message_id: MessageId, @@ -1430,7 +1430,7 @@ impl NetworkBeaconProcessor { /// /// Raises a log if there are errors. #[instrument(skip_all)] - pub async fn process_gossip_verified_block( + async fn process_gossip_verified_block( self: Arc, peer_id: PeerId, verified_block: GossipVerifiedBlock, diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index e79db5d714e..7f1453067a2 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -97,7 +97,7 @@ impl NetworkBeaconProcessor { /// Attempt to process a block received from a direct RPC request. #[allow(clippy::too_many_arguments)] - #[instrument(skip_all, fields(?block_root))] + #[instrument(skip_all, fields(?block_root), parent = None)] pub async fn process_rpc_block( self: Arc>, block_root: Hash256, @@ -244,7 +244,7 @@ impl NetworkBeaconProcessor { } /// Attempt to process a list of blobs received from a direct RPC request. - #[instrument(skip_all, fields(?block_root, outcome = tracing::field::Empty))] + #[instrument(skip_all, fields(?block_root, outcome = tracing::field::Empty), parent = None)] pub async fn process_rpc_blobs( self: Arc>, block_root: Hash256, @@ -334,7 +334,7 @@ impl NetworkBeaconProcessor { }); } - #[instrument(skip_all, fields(?block_root))] + #[instrument(skip_all, fields(?block_root), parent = None)] pub async fn process_rpc_custody_columns( self: Arc>, block_root: Hash256, @@ -420,7 +420,7 @@ impl NetworkBeaconProcessor { /// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync /// thread if more blocks are needed to process it. - #[instrument(skip_all, fields(sync_type = ?sync_type, downloaded_blocks = downloaded_blocks.len(), imported_blocks = tracing::field::Empty))] + #[instrument(skip_all, fields(sync_type = ?sync_type, downloaded_blocks = downloaded_blocks.len(), imported_blocks = tracing::field::Empty), parent = None)] pub async fn process_chain_segment( &self, sync_type: ChainSegmentProcessId, From 91a2400ff090c2f48a4f2f5425957c1aa4bf6542 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 31 Jul 2025 23:03:38 +1000 Subject: [PATCH 29/35] Add RPC method instruments. --- .../network_beacon_processor/rpc_methods.rs | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index 4004305f83c..e5541b1a291 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -15,7 +15,7 @@ use slot_clock::SlotClock; use std::collections::{hash_map::Entry, HashMap}; use std::sync::Arc; use tokio_stream::StreamExt; -use tracing::{debug, error, warn}; +use tracing::{debug, error, instrument, warn}; use types::blob_sidecar::BlobIdentifier; use types::{Epoch, EthSpec, Hash256, Slot}; @@ -155,6 +155,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlocksByRoot` request from the peer. + #[instrument(skip_all, level = "debug")] pub async fn handle_blocks_by_root_request( self: Arc, peer_id: PeerId, @@ -172,7 +173,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlocksByRoot` request from the peer. - pub async fn handle_blocks_by_root_request_inner( + async fn handle_blocks_by_root_request_inner( self: Arc, peer_id: PeerId, inbound_request_id: InboundRequestId, @@ -245,6 +246,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlobsByRoot` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_blobs_by_root_request( self: Arc, peer_id: PeerId, @@ -260,7 +262,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlobsByRoot` request from the peer. - pub fn handle_blobs_by_root_request_inner( + fn handle_blobs_by_root_request_inner( &self, peer_id: PeerId, inbound_request_id: InboundRequestId, @@ -339,6 +341,7 @@ impl NetworkBeaconProcessor { } /// Handle a `DataColumnsByRoot` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_data_columns_by_root_request( self: Arc, peer_id: PeerId, @@ -354,7 +357,7 @@ impl NetworkBeaconProcessor { } /// Handle a `DataColumnsByRoot` request from the peer. - pub fn handle_data_columns_by_root_request_inner( + fn handle_data_columns_by_root_request_inner( &self, peer_id: PeerId, inbound_request_id: InboundRequestId, @@ -400,6 +403,7 @@ impl NetworkBeaconProcessor { Ok(()) } + #[instrument(skip_all, level = "debug")] pub fn handle_light_client_updates_by_range( self: &Arc, peer_id: PeerId, @@ -420,7 +424,7 @@ impl NetworkBeaconProcessor { } /// Handle a `LightClientUpdatesByRange` request from the peer. - pub fn handle_light_client_updates_by_range_request_inner( + fn handle_light_client_updates_by_range_request_inner( self: Arc, peer_id: PeerId, inbound_request_id: InboundRequestId, @@ -491,6 +495,7 @@ impl NetworkBeaconProcessor { } /// Handle a `LightClientBootstrap` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_light_client_bootstrap( self: &Arc, peer_id: PeerId, @@ -521,6 +526,7 @@ impl NetworkBeaconProcessor { } /// Handle a `LightClientOptimisticUpdate` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_light_client_optimistic_update( self: &Arc, peer_id: PeerId, @@ -545,6 +551,7 @@ impl NetworkBeaconProcessor { } /// Handle a `LightClientFinalityUpdate` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_light_client_finality_update( self: &Arc, peer_id: PeerId, @@ -569,6 +576,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlocksByRange` request from the peer. + #[instrument(skip_all, level = "debug")] pub async fn handle_blocks_by_range_request( self: Arc, peer_id: PeerId, @@ -586,7 +594,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlocksByRange` request from the peer. - pub async fn handle_blocks_by_range_request_inner( + async fn handle_blocks_by_range_request_inner( self: Arc, peer_id: PeerId, inbound_request_id: InboundRequestId, @@ -855,6 +863,7 @@ impl NetworkBeaconProcessor { } /// Handle a `BlobsByRange` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_blobs_by_range_request( self: Arc, peer_id: PeerId, @@ -982,6 +991,7 @@ impl NetworkBeaconProcessor { } /// Handle a `DataColumnsByRange` request from the peer. + #[instrument(skip_all, level = "debug")] pub fn handle_data_columns_by_range_request( &self, peer_id: PeerId, @@ -997,7 +1007,7 @@ impl NetworkBeaconProcessor { } /// Handle a `DataColumnsByRange` request from the peer. - pub fn handle_data_columns_by_range_request_inner( + fn handle_data_columns_by_range_request_inner( &self, peer_id: PeerId, inbound_request_id: InboundRequestId, From 4a3689d98a5ce25517731b834adef6823eb63043 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 1 Aug 2025 17:04:48 +1000 Subject: [PATCH 30/35] Remove unnecessary enter() --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 -- beacon_node/beacon_chain/src/block_verification.rs | 8 ++++---- .../src/network_beacon_processor/gossip_methods.rs | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 2ad2aeb41d2..9521623403c 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3420,9 +3420,7 @@ impl BeaconChain { // A small closure to group the verification and import errors. let chain = self.clone(); - let current_span = Span::current(); let import_block = async move { - let _ = current_span.enter(); let execution_pending = unverified_block.into_execution_pending_block( block_root, &chain, diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index e118abd752d..d58cfe252dd 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -92,7 +92,7 @@ use std::sync::Arc; use store::{Error as DBError, KeyValueStore}; use strum::AsRefStr; use task_executor::JoinHandle; -use tracing::{debug, debug_span, error, info_span, instrument, Span}; +use tracing::{debug, debug_span, error, info_span, instrument, Instrument, Span}; use types::{ data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload, @@ -1424,9 +1424,8 @@ impl ExecutionPendingBlock { )?; let is_valid_merge_transition_block = is_merge_transition_block(&parent.pre_state, block.message().body()); - let current_span = tracing::Span::current(); + let payload_verification_future = async move { - let _guard = current_span.enter(); let chain = payload_notifier.chain.clone(); let block = payload_notifier.block.clone(); @@ -1464,10 +1463,11 @@ impl ExecutionPendingBlock { // Spawn the payload verification future as a new task, but don't wait for it to complete. // The `payload_verification_future` will be awaited later to ensure verification completed // successfully. + let current_span = Span::current(); let payload_verification_handle = chain .task_executor .spawn_handle( - payload_verification_future, + payload_verification_future.instrument(current_span), "execution_payload_verification", ) .ok_or(BeaconChainError::RuntimeShutdown)?; diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 12517bf3df6..570582f7356 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -32,7 +32,7 @@ use std::sync::Arc; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::HotColdDBError; use tokio::sync::mpsc::error::TrySendError; -use tracing::{debug, error, info, instrument, trace, warn, Span}; +use tracing::{debug, error, info, instrument, trace, warn, Instrument, Span}; use types::{ beacon_block::BlockImportSource, Attestation, AttestationData, AttestationRef, AttesterSlashing, BlobSidecar, DataColumnSidecar, DataColumnSubnetId, EthSpec, Hash256, @@ -1449,11 +1449,11 @@ impl NetworkBeaconProcessor { let current_span = Span::current(); self.executor.spawn( async move { - let _guard = current_span.enter(); self_clone .fetch_engine_blobs_and_publish(block_clone, block_root, publish_blobs) .await - }, + } + .instrument(current_span), "fetch_blobs_gossip", ); From d1751f1a986486341b78400614c1679c37515a9f Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 1 Aug 2025 17:33:59 +1000 Subject: [PATCH 31/35] Fill more instrument gaps in `process_chain_segment` --- beacon_node/beacon_chain/src/beacon_chain.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 9521623403c..bf11453a703 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2853,8 +2853,12 @@ impl BeaconChain { // Filter uninteresting blocks from the chain segment in a blocking task. let chain = self.clone(); + let filter_chain_segment = debug_span!("filter_chain_segment"); let filtered_chain_segment_future = self.spawn_blocking_handle( - move || chain.filter_chain_segment(chain_segment), + move || { + let _guard = filter_chain_segment.enter(); + chain.filter_chain_segment(chain_segment) + }, "filter_chain_segment", ); let mut filtered_chain_segment = match filtered_chain_segment_future.await { @@ -2885,8 +2889,12 @@ impl BeaconChain { std::mem::swap(&mut blocks, &mut filtered_chain_segment); let chain = self.clone(); + let sig_verify_span = info_span!("signature_verify_chain_segment"); let signature_verification_future = self.spawn_blocking_handle( - move || signature_verify_chain_segment(blocks, &chain), + move || { + let _guard = sig_verify_span.enter(); + signature_verify_chain_segment(blocks, &chain) + }, "signature_verify_chain_segment", ); From 61a772f7048c178e6b23800167e6210ebee5a39f Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 1 Aug 2025 17:41:53 +1000 Subject: [PATCH 32/35] Fill more instrument gaps in `process_chain_segment` --- beacon_node/beacon_chain/src/beacon_chain.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index bf11453a703..dca18c9a92b 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2889,10 +2889,10 @@ impl BeaconChain { std::mem::swap(&mut blocks, &mut filtered_chain_segment); let chain = self.clone(); - let sig_verify_span = info_span!("signature_verify_chain_segment"); + let current_span = Span::current(); let signature_verification_future = self.spawn_blocking_handle( move || { - let _guard = sig_verify_span.enter(); + let _guard = current_span.enter(); signature_verify_chain_segment(blocks, &chain) }, "signature_verify_chain_segment", From b358a1a108cea403c5d8ed58d645e457798c916a Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Sat, 2 Aug 2025 01:17:13 +1000 Subject: [PATCH 33/35] Instrument `PendingComponents`. --- .../overflow_lru_cache.rs | 39 ++++++++++++++----- .../state_lru_cache.rs | 9 ++++- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 3c1fd1e7bcb..53a73b8c850 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -14,7 +14,7 @@ use parking_lot::RwLock; use std::cmp::Ordering; use std::num::NonZeroUsize; use std::sync::Arc; -use tracing::debug; +use tracing::{debug, debug_span, Span}; use types::blob_sidecar::BlobIdentifier; use types::{ BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, @@ -31,6 +31,7 @@ pub struct PendingComponents { pub verified_data_columns: Vec>, pub executed_block: Option>, pub reconstruction_started: bool, + span: Span, } impl PendingComponents { @@ -87,6 +88,8 @@ impl PendingComponents { /// Inserts a block into the cache. pub fn insert_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { + let _guard = self.span.clone().entered(); + debug!("Block added to pending components"); *self.get_cached_block_mut() = Some(block) } @@ -94,7 +97,9 @@ impl PendingComponents { /// /// Existing blob at the index will be replaced. pub fn insert_blob_at_index(&mut self, blob_index: usize, blob: KzgVerifiedBlob) { + let _guard = self.span.clone().entered(); if let Some(b) = self.get_cached_blobs_mut().get_mut(blob_index) { + debug!(blob_index, "Blob added to pending components"); *b = Some(blob); } } @@ -134,11 +139,17 @@ impl PendingComponents { &mut self, kzg_verified_data_columns: I, ) -> Result<(), AvailabilityCheckError> { + let _guard = self.span.clone().entered(); for data_column in kzg_verified_data_columns { if self.get_cached_data_column(data_column.index()).is_none() { + debug!( + column_index = data_column.index(), + "Data column added to pending components" + ); self.verified_data_columns.push(data_column); } } + Ok(()) } @@ -165,6 +176,7 @@ impl PendingComponents { where R: FnOnce( DietAvailabilityPendingExecutedBlock, + &Span, ) -> Result, AvailabilityCheckError>, { let Some(block) = &self.executed_block else { @@ -255,7 +267,7 @@ impl PendingComponents { block, import_data, payload_verification_outcome, - } = recover(block.clone())?; + } = recover(block.clone(), &self.span)?; let available_block = AvailableBlock { block_root: self.block_root, @@ -264,6 +276,10 @@ impl PendingComponents { blobs_available_timestamp, spec: spec.clone(), }; + + self.span.in_scope(|| { + debug!("Block and all data components are available"); + }); Ok(Some(AvailableExecutedBlock::new( available_block, import_data, @@ -273,12 +289,15 @@ impl PendingComponents { /// Returns an empty `PendingComponents` object with the given block root. pub fn empty(block_root: Hash256, max_len: usize) -> Self { + let span = debug_span!("pending_components", %block_root); + let _guard = span.clone().entered(); Self { block_root, verified_blobs: RuntimeFixedVector::new(vec![None; max_len]), verified_data_columns: vec![], executed_block: None, reconstruction_started: false, + span, } } @@ -483,7 +502,7 @@ impl DataAvailabilityCheckerInner { &self.spec, self.custody_context .num_of_data_columns_to_sample(Some(epoch), &self.spec), - |block| self.state_cache.recover_pending_executed_block(block), + |block, span| self.state_cache.recover_pending_executed_block(block, span), )? { // We keep the pending components in the availability cache during block import (#5845). write_lock.put(block_root, pending_components); @@ -538,8 +557,8 @@ impl DataAvailabilityCheckerInner { ); if let Some(available_block) = - pending_components.make_available(&self.spec, num_expected_columns, |block| { - self.state_cache.recover_pending_executed_block(block) + pending_components.make_available(&self.spec, num_expected_columns, |block, span| { + self.state_cache.recover_pending_executed_block(block, span) })? { // We keep the pending components in the availability cache during block import (#5845). @@ -637,8 +656,8 @@ impl DataAvailabilityCheckerInner { // Check if we have all components and entire set is consistent. if let Some(available_block) = - pending_components.make_available(&self.spec, num_expected_columns, |block| { - self.state_cache.recover_pending_executed_block(block) + pending_components.make_available(&self.spec, num_expected_columns, |block, span| { + self.state_cache.recover_pending_executed_block(block, span) })? { // We keep the pending components in the availability cache during block import (#5845). @@ -712,7 +731,7 @@ mod test { use std::collections::VecDeque; use store::{database::interface::BeaconNodeBackend, HotColdDB, ItemStore, StoreConfig}; use tempfile::{tempdir, TempDir}; - use tracing::info; + use tracing::{debug_span, info}; use types::non_zero_usize::new_non_zero_usize; use types::{ExecPayload, MinimalEthSpec}; @@ -1107,7 +1126,7 @@ mod test { // reconstruct the pending block by replaying the block on the parent state let recovered_pending_block = cache .state_lru_cache() - .recover_pending_executed_block(diet_block) + .recover_pending_executed_block(diet_block, &debug_span!("test")) .expect("should reconstruct pending block"); // assert the recovered state is the same as the original @@ -1133,7 +1152,7 @@ mod test { // recover the pending block from the cache let recovered_pending_block = cache .state_lru_cache() - .recover_pending_executed_block(diet_block) + .recover_pending_executed_block(diet_block, &debug_span!("test")) .expect("should reconstruct pending block"); // assert the recovered state is the same as the original assert_eq!( diff --git a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs index f16e1383839..547bb160be7 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs @@ -9,6 +9,7 @@ use parking_lot::RwLock; use state_processing::BlockReplayer; use std::sync::Arc; use store::OnDiskConsensusContext; +use tracing::{debug_span, instrument, Span}; use types::beacon_block_body::KzgCommitments; use types::{BeaconState, BlindedPayload, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock}; @@ -106,9 +107,11 @@ impl StateLRUCache { /// This method will first check the cache and if the state is not found /// it will reconstruct the state by loading the parent state from disk and /// replaying the block. + #[instrument(skip_all, parent = _span, level = "debug")] pub fn recover_pending_executed_block( &self, diet_executed_block: DietAvailabilityPendingExecutedBlock, + _span: &Span, ) -> Result, AvailabilityCheckError> { let state = if let Some(state) = self.states.write().pop(&diet_executed_block.state_root) { state @@ -132,6 +135,7 @@ impl StateLRUCache { /// Reconstruct the state by loading the parent state from disk and replaying /// the block. + #[instrument(skip_all, level = "debug")] fn reconstruct_state( &self, diet_executed_block: &DietAvailabilityPendingExecutedBlock, @@ -164,8 +168,11 @@ impl StateLRUCache { .state_root_iter(state_roots.into_iter()) .minimal_block_root_verification(); + let block_replayer = debug_span!("reconstruct_state_apply_blocks").in_scope(|| { + block_replayer.apply_blocks(vec![diet_executed_block.block.clone_as_blinded()], None) + }); + block_replayer - .apply_blocks(vec![diet_executed_block.block.clone_as_blinded()], None) .map(|block_replayer| block_replayer.into_state()) .and_then(|mut state| { state From 4af28557d9d94c32904951de7b7b0889d95ad2c6 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Sat, 2 Aug 2025 01:29:23 +1000 Subject: [PATCH 34/35] Instrument `PendingComponents` set parent to `None` to avoid incorrectly impacting component processing traces --- .../src/data_availability_checker/overflow_lru_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 53a73b8c850..ebb934d19ee 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -289,7 +289,7 @@ impl PendingComponents { /// Returns an empty `PendingComponents` object with the given block root. pub fn empty(block_root: Hash256, max_len: usize) -> Self { - let span = debug_span!("pending_components", %block_root); + let span = debug_span!(parent: None, "pending_components", %block_root); let _guard = span.clone().entered(); Self { block_root, From 1cd968354a0969cb756869e8895892e002913068 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 6 Aug 2025 16:12:37 +1000 Subject: [PATCH 35/35] Remove unnecessary `allow` macro usage --- beacon_node/beacon_chain/src/block_verification.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index d58cfe252dd..39d0a4b280f 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -826,7 +826,6 @@ impl GossipVerifiedBlock { /// /// Returns an error if the block is invalid, or if the block was unable to be verified. #[instrument(name = "verify_gossip_block", skip_all)] - #[allow(clippy::too_many_arguments)] pub fn new( block: Arc>, chain: &BeaconChain,