@@ -11,6 +11,7 @@ use std::{
11
11
LazyLock , Mutex ,
12
12
atomic:: { AtomicBool , Ordering } ,
13
13
} ,
14
+ time:: Duration ,
14
15
} ;
15
16
16
17
use axum:: {
@@ -22,7 +23,7 @@ use futures::{Stream, TryStreamExt};
22
23
use reqwest:: { Client , Response , StatusCode } ;
23
24
use sarlacc:: Intern ;
24
25
use tokio:: sync:: RwLock ;
25
- use tracing:: { error, info} ;
26
+ use tracing:: { error, info, instrument } ;
26
27
27
28
use crate :: { discord:: Snowflake , webring:: CheckLevel } ;
28
29
@@ -32,7 +33,16 @@ use crate::{discord::Snowflake, webring::CheckLevel};
32
33
const WEBRING_CHANNEL : Snowflake = Snowflake :: new ( 1319140464812753009 ) ;
33
34
34
35
/// The time in milliseconds for which the server is considered online after a successful ping.
35
- static ONLINE_CHECK_TTL_MS : i64 = 1000 ;
36
+ const ONLINE_CHECK_TTL_MS : i64 = 1000 ;
37
+
38
+ /// The timeout to retry requesting a site after failure
39
+ const RETRY_TIMEOUT : Duration = Duration :: from_secs ( 5 ) ;
40
+
41
+ /// How many times to attempt to retry a connection after failure
42
+ const RETRY_COUNT : usize = 5 ;
43
+
44
+ /// How long requests will wait before failing due to timing out
45
+ const REQUEST_TIMEOUT : Duration = Duration :: from_secs ( 30 ) ;
36
46
37
47
/// The HTTP client used to make requests to the webring sites for validation.
38
48
static CLIENT : LazyLock < Client > = LazyLock :: new ( || {
@@ -43,6 +53,7 @@ static CLIENT: LazyLock<Client> = LazyLock::new(|| {
43
53
env!( "CARGO_PKG_VERSION" ) ,
44
54
env!( "CARGO_PKG_REPOSITORY" )
45
55
) )
56
+ . timeout ( REQUEST_TIMEOUT )
46
57
. build ( )
47
58
. expect ( "Creating the HTTP client should not fail" )
48
59
} ) ;
@@ -143,6 +154,7 @@ pub async fn check(
143
154
///
144
155
/// If the site fails any check, returns `Some(CheckFailure)`.
145
156
/// If the site passes all checks, returns `None`.
157
+ #[ instrument( skip( base_address) ) ]
146
158
async fn check_impl (
147
159
website : & Uri ,
148
160
check_level : CheckLevel ,
@@ -152,14 +164,40 @@ async fn check_impl(
152
164
return None ;
153
165
}
154
166
155
- let response = match if check_level == CheckLevel :: ForLinks {
156
- CLIENT . get ( website. to_string ( ) ) . send ( ) . await
157
- } else {
158
- CLIENT . head ( website. to_string ( ) ) . send ( ) . await
159
- } {
160
- Ok ( response) => response,
167
+ let mut response;
168
+
169
+ let mut retry_limit = RETRY_COUNT ;
170
+
171
+ loop {
172
+ response = if check_level == CheckLevel :: ForLinks {
173
+ CLIENT . get ( website. to_string ( ) ) . send ( ) . await
174
+ } else {
175
+ CLIENT . head ( website. to_string ( ) ) . send ( ) . await
176
+ } ;
177
+
178
+ if retry_limit == 0 {
179
+ break ;
180
+ }
181
+
182
+ match & response {
183
+ Ok ( _) => break ,
184
+ Err ( err) => {
185
+ info ! (
186
+ site = %website, %err, delay = ?RETRY_TIMEOUT , "Error requesting site; retrying after delay"
187
+ ) ;
188
+ }
189
+ }
190
+
191
+ retry_limit -= 1 ;
192
+
193
+ tokio:: time:: sleep ( RETRY_TIMEOUT ) . await ;
194
+ }
195
+
196
+ let response = match response {
197
+ Ok ( v) => v,
161
198
Err ( err) => return Some ( CheckFailure :: Connection ( err) ) ,
162
199
} ;
200
+
163
201
mark_server_as_online ( ) . await ;
164
202
let successful_response = match response. error_for_status ( ) {
165
203
Ok ( r) => r,
@@ -505,13 +543,29 @@ async fn scan_for_links(
505
543
506
544
#[ cfg( test) ]
507
545
mod tests {
508
- use axum:: { Router , body:: Bytes , http:: Uri , response:: Html , routing:: get} ;
546
+ use std:: {
547
+ sync:: {
548
+ Arc ,
549
+ atomic:: { AtomicBool , AtomicU64 , Ordering } ,
550
+ } ,
551
+ time:: Duration ,
552
+ } ;
553
+
554
+ use axum:: {
555
+ Router ,
556
+ body:: { Body , Bytes } ,
557
+ http:: Uri ,
558
+ response:: { Html , Response } ,
559
+ routing:: get,
560
+ } ;
509
561
use futures:: stream;
510
562
use indoc:: formatdoc;
511
563
use pretty_assertions:: assert_eq;
512
564
use reqwest:: StatusCode ;
513
565
use sarlacc:: Intern ;
514
566
567
+ use crate :: checking:: REQUEST_TIMEOUT ;
568
+
515
569
use super :: {
516
570
CheckFailure , CheckLevel , LinkStatus , LinkStatuses , WEBRING_CHANNEL , check, scan_for_links,
517
571
} ;
@@ -826,7 +880,7 @@ mod tests {
826
880
assert_eq ! ( expected, links. to_message( ) ) ;
827
881
}
828
882
829
- #[ tokio:: test]
883
+ #[ tokio:: test( start_paused = true ) ]
830
884
async fn check_failure_types ( ) {
831
885
// Start a web server so we can do each kinds of checks
832
886
let server_addr = ( "127.0.0.1" , 32750 ) ;
@@ -907,6 +961,57 @@ mod tests {
907
961
}
908
962
}
909
963
964
+ #[ tokio:: test( start_paused = true ) ]
965
+ async fn test_retrying ( ) {
966
+ // Start a web server that fails only the first request
967
+ let server_addr = ( "127.0.0.1" , 32752 ) ;
968
+
969
+ let ok_hits = Arc :: new ( AtomicU64 :: new ( 0 ) ) ;
970
+ let err_hits = Arc :: new ( AtomicU64 :: new ( 0 ) ) ;
971
+ let already_requested = Arc :: new ( AtomicBool :: new ( false ) ) ;
972
+
973
+ let ok_hits_for_server = Arc :: clone ( & ok_hits) ;
974
+ let err_hits_for_server = Arc :: clone ( & err_hits) ;
975
+ tokio:: spawn ( async move {
976
+ let listener = tokio:: net:: TcpListener :: bind ( & server_addr) . await . unwrap ( ) ;
977
+ let router = Router :: new ( ) . route (
978
+ "/up" ,
979
+ get ( async move || {
980
+ if already_requested. swap ( true , Ordering :: Relaxed ) {
981
+ ok_hits_for_server. fetch_add ( 1 , Ordering :: Relaxed ) ;
982
+ Response :: builder ( )
983
+ . status ( 200 )
984
+ . body ( Body :: from ( "Hi there!" ) )
985
+ . unwrap ( )
986
+ } else {
987
+ err_hits_for_server. fetch_add ( 1 , Ordering :: Relaxed ) ;
988
+ // Trigger the request timeout
989
+ tokio:: time:: sleep ( Duration :: from_secs ( 1 ) + REQUEST_TIMEOUT ) . await ;
990
+ Response :: builder ( )
991
+ . status ( 500 )
992
+ . body ( Body :: from ( "Retry plz!" ) )
993
+ . unwrap ( )
994
+ }
995
+ } ) ,
996
+ ) ;
997
+ axum:: serve ( listener, router) . await . unwrap ( ) ;
998
+ } ) ;
999
+
1000
+ let base = Intern :: new ( Uri :: from_static ( "https://ring.purduehackers.com" ) ) ;
1001
+
1002
+ let maybe_failure = super :: check (
1003
+ & Uri :: from_static ( "http://127.0.0.1:32752/up" ) ,
1004
+ CheckLevel :: JustOnline ,
1005
+ base,
1006
+ )
1007
+ . await
1008
+ . unwrap ( ) ;
1009
+
1010
+ assert ! ( maybe_failure. is_none( ) , "{maybe_failure:?}" ) ;
1011
+ assert_eq ! ( err_hits. load( Ordering :: Relaxed ) , 1 ) ;
1012
+ assert_eq ! ( ok_hits. load( Ordering :: Relaxed ) , 1 ) ;
1013
+ }
1014
+
910
1015
#[ tokio:: test]
911
1016
#[ ignore = "Kian's site could go down" ]
912
1017
async fn kians_site ( ) {
0 commit comments