Skip to content

Commit f8f19ba

Browse files
time to establish connection (#3134)
Implementing #2745 , adding a metric to break down time from connection pending to connection established, per protocol stack. ```` $curl -s http://127.0.0.1:42183/metrics | grep nt_duration # HELP libp2p_swarm_connection_establishment_duration Time it took (locally) to finish establishing connections. # TYPE libp2p_swarm_connection_establishment_duration histogram libp2p_swarm_connection_establishment_duration_sum{role="Listener",protocols="/ip4/tcp"} 0.007 libp2p_swarm_connection_establishment_duration_count{role="Listener",protocols="/ip4/tcp"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.001"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.002"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.004"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.008"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.016"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.032"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.064"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.128"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.256"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.512"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="+Inf"} 1 lbl@chomp:~lbl $curl -s http://127.0.0.1:34283/metrics | grep nt_duration # HELP libp2p_swarm_connection_establishment_duration Time it took (locally) to finish establishing connections. # TYPE libp2p_swarm_connection_establishment_duration histogram libp2p_swarm_connection_establishment_duration_sum{role="Dialer",protocols="/ip4/tcp"} 0.009 libp2p_swarm_connection_establishment_duration_count{role="Dialer",protocols="/ip4/tcp"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.001"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.002"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.004"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.008"} 0 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.016"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.032"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.064"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.128"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.256"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.512"} 1 libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="+Inf"} 1 ````
1 parent 63ffc7f commit f8f19ba

File tree

6 files changed

+58
-10
lines changed

6 files changed

+58
-10
lines changed

misc/metrics/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# 0.12.0 [unreleased]
22

3+
- Add `connections_establishment_duration` metric. See [PR 3134].
4+
35
- Update to `libp2p-dcutr` `v0.9.0`.
46

57
- Update to `libp2p-ping` `v0.42.0`.
@@ -12,6 +14,8 @@
1214

1315
- Update to `libp2p-swarm` `v0.42.0`.
1416

17+
[PR 3134]: https://github.com/libp2p/rust-libp2p/pull/3134/
18+
1519
# 0.11.0
1620

1721
- Update to `libp2p-dcutr` `v0.8.0`.

misc/metrics/src/swarm.rs

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,19 @@
2020

2121
use crate::protocol_stack;
2222
use prometheus_client::encoding::text::Encode;
23-
use prometheus_client::metrics::counter::Counter;
24-
use prometheus_client::metrics::family::Family;
23+
use prometheus_client::metrics::{
24+
counter::Counter,
25+
family::Family,
26+
histogram::{exponential_buckets, Histogram},
27+
};
2528
use prometheus_client::registry::Registry;
2629

2730
pub struct Metrics {
2831
connections_incoming: Family<AddressLabels, Counter>,
2932
connections_incoming_error: Family<IncomingConnectionErrorLabels, Counter>,
3033

3134
connections_established: Family<ConnectionEstablishedLabels, Counter>,
35+
connections_establishment_duration: Family<ConnectionEstablishmentDurationLabels, Histogram>,
3236
connections_closed: Family<ConnectionClosedLabels, Counter>,
3337

3438
new_listen_addr: Family<AddressLabels, Counter>,
@@ -123,6 +127,15 @@ impl Metrics {
123127
Box::new(connections_closed.clone()),
124128
);
125129

130+
let connections_establishment_duration = Family::new_with_constructor(
131+
create_connection_establishment_duration_histogram as fn() -> Histogram,
132+
);
133+
sub_registry.register(
134+
"connections_establishment_duration",
135+
"Time it took (locally) to establish connections",
136+
Box::new(connections_establishment_duration.clone()),
137+
);
138+
126139
Self {
127140
connections_incoming,
128141
connections_incoming_error,
@@ -135,6 +148,7 @@ impl Metrics {
135148
dial_attempt,
136149
outgoing_connection_error,
137150
connected_to_banned_peer,
151+
connections_establishment_duration,
138152
}
139153
}
140154
}
@@ -143,13 +157,19 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
143157
fn record(&self, event: &libp2p_swarm::SwarmEvent<TBvEv, THandleErr>) {
144158
match event {
145159
libp2p_swarm::SwarmEvent::Behaviour(_) => {}
146-
libp2p_swarm::SwarmEvent::ConnectionEstablished { endpoint, .. } => {
147-
self.connections_established
148-
.get_or_create(&ConnectionEstablishedLabels {
149-
role: endpoint.into(),
150-
protocols: protocol_stack::as_string(endpoint.get_remote_address()),
151-
})
152-
.inc();
160+
libp2p_swarm::SwarmEvent::ConnectionEstablished {
161+
endpoint,
162+
established_in: time_taken,
163+
..
164+
} => {
165+
let labels = ConnectionEstablishedLabels {
166+
role: endpoint.into(),
167+
protocols: protocol_stack::as_string(endpoint.get_remote_address()),
168+
};
169+
self.connections_established.get_or_create(&labels).inc();
170+
self.connections_establishment_duration
171+
.get_or_create(&labels)
172+
.observe(time_taken.as_secs_f64());
153173
}
154174
libp2p_swarm::SwarmEvent::ConnectionClosed { endpoint, .. } => {
155175
self.connections_closed
@@ -279,6 +299,8 @@ struct ConnectionEstablishedLabels {
279299
protocols: String,
280300
}
281301

302+
type ConnectionEstablishmentDurationLabels = ConnectionEstablishedLabels;
303+
282304
#[derive(Encode, Hash, Clone, Eq, PartialEq)]
283305
struct ConnectionClosedLabels {
284306
role: Role,
@@ -372,3 +394,7 @@ impl<TTransErr> From<&libp2p_swarm::PendingInboundConnectionError<TTransErr>>
372394
}
373395
}
374396
}
397+
398+
fn create_connection_establishment_duration_histogram() -> Histogram {
399+
Histogram::new(exponential_buckets(1e-3, 2., 10))
400+
}

protocols/autonat/tests/test_server.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ async fn test_dial_back() {
195195
},
196196
num_established,
197197
concurrent_dial_errors,
198+
established_in: _,
198199
} => {
199200
assert_eq!(peer_id, client_id);
200201
assert_eq!(num_established, NonZeroU32::new(2).unwrap());

swarm/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
- Removed deprecated Swarm constructors. For transition notes see [0.41.0](#0.41.0). See [PR 3170].
44

5+
- Add `estblished_in` to `SwarmEvent::ConnectionEstablished`. See [PR 3134].
6+
57
[PR 3170]: https://github.com/libp2p/rust-libp2p/pull/3170
8+
[PR 3134]: https://github.com/libp2p/rust-libp2p/pull/3134
69

710
# 0.41.1
811

swarm/src/connection/pool.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use futures::{
3838
ready,
3939
stream::FuturesUnordered,
4040
};
41+
use instant::Instant;
4142
use libp2p_core::connection::{ConnectionId, Endpoint, PendingPoint};
4243
use libp2p_core::muxing::{StreamMuxerBox, StreamMuxerExt};
4344
use std::{
@@ -195,6 +196,8 @@ struct PendingConnection<THandler> {
195196
endpoint: PendingPoint,
196197
/// When dropped, notifies the task which then knows to terminate.
197198
abort_notifier: Option<oneshot::Sender<Void>>,
199+
/// The moment we became aware of this possible connection, useful for timing metrics.
200+
accepted_at: Instant,
198201
}
199202

200203
impl<THandler> PendingConnection<THandler> {
@@ -237,6 +240,8 @@ where
237240
/// Addresses are dialed in parallel. Contains the addresses and errors
238241
/// of dial attempts that failed before the one successful dial.
239242
concurrent_dial_errors: Option<Vec<(Multiaddr, TransportError<TTrans::Error>)>>,
243+
/// How long it took to establish this connection.
244+
established_in: std::time::Duration,
240245
},
241246

242247
/// An established connection was closed.
@@ -493,6 +498,7 @@ where
493498
handler,
494499
endpoint,
495500
abort_notifier: Some(abort_notifier),
501+
accepted_at: Instant::now(),
496502
},
497503
);
498504
Ok(connection_id)
@@ -540,6 +546,7 @@ where
540546
handler,
541547
endpoint: endpoint.into(),
542548
abort_notifier: Some(abort_notifier),
549+
accepted_at: Instant::now(),
543550
},
544551
);
545552
Ok(connection_id)
@@ -634,6 +641,7 @@ where
634641
handler,
635642
endpoint,
636643
abort_notifier: _,
644+
accepted_at,
637645
} = self
638646
.pending
639647
.remove(&id)
@@ -783,13 +791,14 @@ where
783791
)
784792
.boxed(),
785793
);
786-
794+
let established_in = accepted_at.elapsed();
787795
return Poll::Ready(PoolEvent::ConnectionEstablished {
788796
peer_id: obtained_peer_id,
789797
endpoint,
790798
id,
791799
other_established_connection_ids,
792800
concurrent_dial_errors,
801+
established_in,
793802
});
794803
}
795804
task::PendingConnectionEvent::PendingFailed { id, error } => {
@@ -798,6 +807,7 @@ where
798807
handler,
799808
endpoint,
800809
abort_notifier: _,
810+
accepted_at: _, // Ignoring the time it took for the connection to fail.
801811
}) = self.pending.remove(&id)
802812
{
803813
self.counters.dec_pending(&endpoint);

swarm/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ pub enum SwarmEvent<TBehaviourOutEvent, THandlerErr> {
191191
/// Addresses are dialed concurrently. Contains the addresses and errors
192192
/// of dial attempts that failed before the one successful dial.
193193
concurrent_dial_errors: Option<Vec<(Multiaddr, TransportError<io::Error>)>>,
194+
/// How long it took to establish this connection
195+
established_in: std::time::Duration,
194196
},
195197
/// A connection with the given peer has been closed,
196198
/// possibly as a result of an error.
@@ -808,6 +810,7 @@ where
808810
endpoint,
809811
other_established_connection_ids,
810812
concurrent_dial_errors,
813+
established_in,
811814
} => {
812815
if self.banned_peers.contains(&peer_id) {
813816
// Mark the connection for the banned peer as banned, thus withholding any
@@ -848,6 +851,7 @@ where
848851
num_established,
849852
endpoint,
850853
concurrent_dial_errors,
854+
established_in,
851855
});
852856
}
853857
}

0 commit comments

Comments
 (0)