@@ -1349,6 +1349,7 @@ impl Dialer {
1349
1349
mod test {
1350
1350
use std:: time:: Duration ;
1351
1351
1352
+ use anyhow:: { anyhow, bail} ;
1352
1353
use bytes:: Bytes ;
1353
1354
use futures_concurrency:: future:: TryJoin ;
1354
1355
use iroh:: { protocol:: Router , RelayMap , RelayMode , SecretKey } ;
@@ -2017,20 +2018,32 @@ mod test {
2017
2018
Ok ( ( ) )
2018
2019
}
2019
2020
2020
- #[ tokio:: test]
2021
- // #[traced_test]
2021
+ #[ tokio:: test( flavor = "multi_thread" ) ]
2022
2022
async fn gossip_net_big ( ) -> TestResult {
2023
- tracing_subscriber:: fmt:: try_init ( ) . ok ( ) ;
2024
2023
let mut rng = rand_chacha:: ChaCha12Rng :: seed_from_u64 ( 1 ) ;
2025
2024
let ( relay_map, _relay_url, _guard) = iroh:: test_utils:: run_relay_server ( ) . await . unwrap ( ) ;
2026
2025
let dns = iroh:: test_utils:: DnsPkarrServer :: run ( ) . await ?;
2027
2026
2028
- let node_count = std:: env:: var ( "NODE_COUNT" )
2029
- . map ( |x| x. parse ( ) . unwrap ( ) )
2030
- . unwrap_or ( 10 ) ;
2031
- let message_count = std:: env:: var ( "MESSAGE_COUNT" )
2027
+ let node_count: usize = std:: env:: var ( "NODE_COUNT" )
2032
2028
. map ( |x| x. parse ( ) . unwrap ( ) )
2033
2029
. unwrap_or ( 100 ) ;
2030
+ let message_count: usize = std:: env:: var ( "MESSAGE_COUNT" )
2031
+ . map ( |x| x. parse ( ) . unwrap ( ) )
2032
+ . unwrap_or ( 2 ) ;
2033
+
2034
+ let warmup_sleep_s = std:: env:: var ( "WARMUP_SLEEP" )
2035
+ . map ( |x| x. parse ( ) . unwrap ( ) )
2036
+ . unwrap_or ( 1 ) ;
2037
+
2038
+ let send_interval_ms = std:: env:: var ( "SEND_INTERVAL" )
2039
+ . map ( |x| x. parse ( ) . unwrap ( ) )
2040
+ . unwrap_or ( 50 ) ;
2041
+
2042
+ let timeout_ms = std:: env:: var ( "TIMEOUT" )
2043
+ . map ( |x| x. parse ( ) . unwrap ( ) )
2044
+ . unwrap_or ( 10000 ) ;
2045
+ let timeout = Duration :: from_millis ( timeout_ms) ;
2046
+ info ! ( "recv timeout: {timeout:?}" ) ;
2034
2047
2035
2048
// spawn
2036
2049
info ! ( "spawn {node_count} nodes" ) ;
@@ -2052,8 +2065,7 @@ mod test {
2052
2065
let gossip = Gossip :: builder ( ) . spawn ( endpoint. clone ( ) ) . await ?;
2053
2066
let router = Router :: builder ( endpoint)
2054
2067
. accept ( GOSSIP_ALPN , gossip. clone ( ) )
2055
- . spawn ( )
2056
- . await ?;
2068
+ . spawn ( ) ;
2057
2069
anyhow:: Ok ( ( router, gossip) )
2058
2070
} )
2059
2071
} ) ) ;
@@ -2075,86 +2087,190 @@ mod test {
2075
2087
let bootstrap_node = routers[ 0 ] . endpoint ( ) . node_id ( ) ;
2076
2088
2077
2089
let mut senders = vec ! [ ] ;
2078
- let mut receivers = FuturesUnordered :: new ( ) ;
2079
2090
2091
+ let bootstrap_count = node_count. min ( 10 ) . max ( node_count / 50 ) ;
2092
+ info ! ( "start with {bootstrap_count} bootstrap nodes" ) ;
2093
+ let mut joining = FuturesUnordered :: new ( ) ;
2080
2094
#[ allow( clippy:: needless_range_loop) ]
2081
- for i in 0 ..node_count {
2095
+ for i in 0 ..bootstrap_count {
2082
2096
let bootstrap = if i == 0 { vec ! [ ] } else { vec ! [ bootstrap_node] } ;
2083
2097
let ( sender, mut receiver) = gossips[ i] . subscribe ( topic_id, bootstrap) ?. split ( ) ;
2084
- senders. push ( sender) ;
2085
- receivers. push ( async move {
2086
- receiver. joined ( ) . await ?;
2087
- Ok ( receiver)
2088
- } ) ;
2098
+ let endpoint = routers[ i] . endpoint ( ) . clone ( ) ;
2099
+ senders. push ( ( sender, endpoint. node_id ( ) ) ) ;
2100
+ joining. push (
2101
+ async move {
2102
+ receiver. joined ( ) . await ?;
2103
+ Ok ( ( receiver, endpoint) )
2104
+ }
2105
+ . boxed ( ) ,
2106
+ ) ;
2089
2107
}
2090
2108
2091
- let receivers: anyhow:: Result < Vec < GossipReceiver > > = receivers. try_collect ( ) . await ;
2092
- let receivers = receivers. context ( "failed to join all nodes" ) ?;
2093
- info ! ( "all joined" ) ;
2109
+ let joined: anyhow:: Result < Vec < _ > > = joining. try_collect ( ) . await ;
2110
+ let mut receivers = joined. context ( "failed to join all nodes" ) ?;
2111
+ info ! ( "bootstrap nodes joined" ) ;
2112
+
2113
+ info ! ( "sleep {warmup_sleep_s}s for swarm to stabilize" ) ;
2114
+ tokio:: time:: sleep ( Duration :: from_secs ( warmup_sleep_s) ) . await ;
2115
+
2116
+ info ! ( "join {} remaining nodes" , node_count - bootstrap_count) ;
2117
+ let chunks = node_count / bootstrap_count;
2118
+ for chunk in 1 ..chunks {
2119
+ let mut joining = FuturesUnordered :: new ( ) ;
2120
+ #[ allow( clippy:: needless_range_loop) ]
2121
+ for j in 0 ..bootstrap_count {
2122
+ let i = ( chunk * bootstrap_count) + j;
2123
+ if i >= node_count {
2124
+ break ;
2125
+ }
2126
+ let bootstrap = vec ! [ routers[ i % bootstrap_count] . endpoint( ) . node_id( ) ] ;
2127
+ let ( sender, mut receiver) = gossips[ i] . subscribe ( topic_id, bootstrap) ?. split ( ) ;
2128
+ let endpoint = routers[ i] . endpoint ( ) . clone ( ) ;
2129
+ senders. push ( ( sender, endpoint. node_id ( ) ) ) ;
2130
+ joining. push (
2131
+ async move {
2132
+ receiver. joined ( ) . await ?;
2133
+ Ok ( ( receiver, endpoint) )
2134
+ }
2135
+ . boxed ( ) ,
2136
+ ) ;
2137
+ }
2094
2138
2095
- let sleep_seconds = std:: env:: var ( "WARMUP_SLEEP" )
2096
- . map ( |x| x. parse ( ) . unwrap ( ) )
2097
- . unwrap_or ( 1 ) ;
2098
- info ! ( "sleep {sleep_seconds}s for swarm to stabilize" ) ;
2099
- tokio:: time:: sleep ( Duration :: from_secs ( sleep_seconds) ) . await ;
2139
+ let joined: anyhow:: Result < Vec < _ > > = joining. try_collect ( ) . await ;
2140
+ receivers. extend ( joined. context ( "failed to join all nodes" ) ?) ;
2141
+ info ! ( "joined chunk {chunk} of {chunks} with {bootstrap_count}" ) ;
2142
+ }
2100
2143
2101
- let send_interval_ms = std:: env:: var ( "SEND_INTERVAL" )
2102
- . map ( |x| x. parse ( ) . unwrap ( ) )
2103
- . unwrap_or ( 5 ) ;
2144
+ info ! ( "sleep {warmup_sleep_s}s for swarm to stabilize" ) ;
2145
+ tokio:: time:: sleep ( Duration :: from_secs ( warmup_sleep_s) ) . await ;
2104
2146
2105
2147
info ! ( "sending & receiving {message_count} messages on each node" ) ;
2106
2148
// spawn send tasks
2107
- let sending = senders. into_iter ( ) . enumerate ( ) . map ( |( i, sender) | {
2149
+ let sending = senders. into_iter ( ) . enumerate ( ) . map ( |( i, ( sender, me ) ) | {
2108
2150
task ( async move {
2109
2151
for j in 0 ..message_count {
2110
- let message = format ! ( "{i }:{j}" ) ;
2152
+ let message = format ! ( "{}:{}" , me . fmt_short ( ) , j ) ;
2111
2153
let message: Bytes = message. as_bytes ( ) . to_vec ( ) . into ( ) ;
2112
2154
sender. broadcast ( message) . await ?;
2113
- if j % ( message_count / 10 . min ( message_count) ) == 0 {
2114
- info ! ( "{i}: sent {j} of {message_count}" ) // // #[tokio::test]
2115
- }
2116
2155
tokio:: time:: sleep ( Duration :: from_millis ( send_interval_ms) ) . await
2117
2156
}
2118
- info ! ( "{i}: sent all" ) ;
2119
- anyhow:: Ok ( ( ) )
2157
+ debug ! ( "{i}: sent all" ) ;
2158
+ anyhow:: Ok ( ( me , sender ) )
2120
2159
} )
2121
2160
} ) ;
2122
2161
let sending = FuturesUnordered :: from_iter ( sending) ;
2162
+
2163
+ let all_messages: BTreeSet < Bytes > = routers
2164
+ . iter ( )
2165
+ . map ( |r| r. endpoint ( ) . node_id ( ) )
2166
+ . flat_map ( |node_id| {
2167
+ ( 0 ..message_count)
2168
+ . map ( move |i| format ! ( "{}:{}" , node_id. fmt_short( ) , i) . into_bytes ( ) . into ( ) )
2169
+ } )
2170
+ . collect ( ) ;
2171
+ let all_messages = Arc :: new ( all_messages) ;
2172
+
2173
+ // closure to create a set of expected messages at a peer
2174
+ let expected = move |all_messages : & BTreeSet < Bytes > , me : NodeId | -> BTreeSet < Bytes > {
2175
+ let me = me. fmt_short ( ) ;
2176
+ all_messages
2177
+ . iter ( )
2178
+ . filter ( |m| !m. starts_with ( me. as_bytes ( ) ) )
2179
+ . cloned ( )
2180
+ . collect ( )
2181
+ } ;
2182
+
2123
2183
// spawn recv tasks
2124
- let receiving = receivers. into_iter ( ) . enumerate ( ) . map ( |( i, mut receiver) | {
2125
- task ( async move {
2126
- let total = message_count * ( node_count - 1 ) ;
2127
- let mut received = 0 ;
2128
- while let Some ( event) = receiver. try_next ( ) . await ? {
2129
- if let Event :: Gossip ( GossipEvent :: Received ( _message) ) = event {
2130
- received += 1 ;
2131
- if received % ( ( message_count / 10 . min ( message_count) ) * node_count) == 0 {
2132
- info ! ( "{i}: received {received} of {total}" ) ;
2184
+ let receiving = receivers
2185
+ . into_iter ( )
2186
+ . enumerate ( )
2187
+ . map ( |( _i, ( mut receiver, endpoint) ) | {
2188
+ let all_messages = Arc :: clone ( & all_messages) ;
2189
+ let me = endpoint. node_id ( ) ;
2190
+ task ( async move {
2191
+ let mut missing = expected ( & all_messages, endpoint. node_id ( ) ) ;
2192
+ let timeout = tokio:: time:: sleep ( timeout) ;
2193
+ tokio:: pin!( timeout) ;
2194
+ let res = loop {
2195
+ let event = tokio:: select! {
2196
+ res = receiver. next( ) => {
2197
+ match res {
2198
+ None => break Err ( anyhow!( "receiver closed" ) ) ,
2199
+ Some ( Err ( err) ) => break Err ( err. into( ) ) ,
2200
+ Some ( Ok ( event) ) => event,
2201
+ }
2202
+ } ,
2203
+ _ = & mut timeout => break Err ( anyhow!( "timeout" ) )
2204
+ } ;
2205
+ if let Event :: Gossip ( GossipEvent :: Received ( message) ) = event {
2206
+ if !missing. remove ( & message. content ) {
2207
+ break Err ( anyhow ! (
2208
+ "duplicate message: {:?} delivered from {}" ,
2209
+ String :: from_utf8_lossy( & message. content) ,
2210
+ message. delivered_from. fmt_short( )
2211
+ ) ) ;
2212
+ }
2213
+ if missing. is_empty ( ) {
2214
+ break Ok ( ( ) ) ;
2215
+ }
2133
2216
}
2134
- if received == total {
2135
- info ! ( "{i}: received all" ) ;
2136
- break ;
2217
+ } ;
2218
+ ( receiver, missing, res)
2219
+ } )
2220
+ . map ( move |res| ( me, res) )
2221
+ } ) ;
2222
+ let mut receiving = FuturesUnordered :: from_iter ( receiving) ;
2223
+
2224
+ let senders_fut = async move {
2225
+ let senders: Vec < _ > = sending. try_collect ( ) . await ?;
2226
+ anyhow:: Ok ( senders)
2227
+ } ;
2228
+ let expected_count = message_count * ( node_count - 1 ) ;
2229
+ let receivers_fut = task ( async move {
2230
+ let mut failed = 0 ;
2231
+ let mut missing_total = 0 ;
2232
+ let mut receivers = vec ! [ ] ;
2233
+ while let Some ( res) = receiving. next ( ) . await {
2234
+ let ( node_id, ( receiver, missing, res) ) = res;
2235
+ receivers. push ( receiver) ;
2236
+ match res {
2237
+ Err ( err) => {
2238
+ missing_total += missing. len ( ) ;
2239
+ failed += 1 ;
2240
+ warn ! ( me=%node_id. fmt_short( ) , ?missing, "recv task failed: {err:#}" ) ;
2241
+ for m in missing {
2242
+ let hash = blake3:: hash ( & m) ;
2243
+ warn ! ( me=%node_id. fmt_short( ) , ?hash, "missing" ) ;
2137
2244
}
2138
2245
}
2246
+ Ok ( ( ) ) => {
2247
+ assert ! ( missing. is_empty( ) ) ;
2248
+ }
2139
2249
}
2140
- anyhow:: Ok ( receiver)
2141
- } )
2250
+ }
2251
+ if failed > 0 {
2252
+ bail ! ( "Receive side failed: {failed} nodes together missed {missing_total} messages of {expected_count}" ) ;
2253
+ } else {
2254
+ Ok ( receivers)
2255
+ }
2142
2256
} ) ;
2143
- let receiving = FuturesUnordered :: from_iter ( receiving) ;
2144
-
2145
- let count_send = async move {
2146
- let res: Vec < _ > = sending. try_collect ( ) . await ?;
2147
- anyhow:: Ok ( res. len ( ) )
2148
- } ;
2149
- let count_recv = async move {
2150
- let res: Vec < _ > = receiving. try_collect ( ) . await ?;
2151
- anyhow:: Ok ( res. len ( ) )
2152
- } ;
2153
2257
2154
- let ( count_send , count_recv ) = ( count_send , count_recv ) . try_join ( ) . await ?;
2258
+ let ( senders , receivers ) = ( senders_fut , receivers_fut ) . try_join ( ) . await ?;
2155
2259
info ! ( "all done" ) ;
2156
- assert_eq ! ( count_send, node_count) ;
2157
- assert_eq ! ( count_recv, node_count) ;
2260
+ assert_eq ! ( senders. len( ) , node_count) ;
2261
+ assert_eq ! ( receivers. len( ) , node_count) ;
2262
+ drop ( senders) ;
2263
+ drop ( receivers) ;
2264
+ let _ = FuturesUnordered :: from_iter ( gossips. iter ( ) . map ( |gossip| gossip. shutdown ( ) ) )
2265
+ . count ( )
2266
+ . await ;
2267
+ let mut shutdown =
2268
+ FuturesUnordered :: from_iter ( routers. into_iter ( ) . map ( |router| async move {
2269
+ ( router. endpoint ( ) . node_id ( ) , router. shutdown ( ) . await )
2270
+ } ) ) ;
2271
+ while let Some ( ( node_id, res) ) = shutdown. next ( ) . await {
2272
+ res. with_context ( || format ! ( "shutdown failed for {}" , node_id. fmt_short( ) ) ) ?;
2273
+ }
2158
2274
2159
2275
Ok ( ( ) )
2160
2276
}
0 commit comments