Skip to content

Commit 0a4ae6e

Browse files
committed
add some retry for tests
1 parent c8a5ced commit 0a4ae6e

File tree

1 file changed

+91
-57
lines changed

1 file changed

+91
-57
lines changed

crates/kcserver/tests/python_kernel_tests.rs

Lines changed: 91 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ use common::test_utils::{
1616
create_execute_request, create_session_with_client, create_shutdown_request,
1717
create_test_session, get_python_executable, is_ipykernel_available,
1818
};
19-
use common::transport::{run_communication_test, CommunicationChannel, TransportType};
19+
use common::transport::{
20+
run_communication_test, CommunicationChannel, CommunicationTestResults, TransportType,
21+
};
2022
use common::TestServer;
2123
use kallichore_api::models::{InterruptMode, NewSession, SessionMode, VarAction, VarActionType};
2224
use kallichore_api::NewSessionResponse;
@@ -25,6 +27,66 @@ use kcshared::websocket_message::WebsocketMessage;
2527
use std::time::Duration;
2628
use uuid::Uuid;
2729

30+
const EXECUTE_REQUEST_MAX_ATTEMPTS: u8 = 3;
31+
const EXECUTE_TIMEOUT_SECS: u64 = 12;
32+
const EXECUTE_MAX_MESSAGES: u32 = 35;
33+
const EXECUTE_RETRY_BACKOFF_MS: u64 = 750;
34+
35+
async fn execute_test_code_with_retries(
36+
comm: &mut CommunicationChannel,
37+
) -> (CommunicationTestResults, u8) {
38+
let mut last_results = CommunicationTestResults::default();
39+
40+
for attempt in 1..=EXECUTE_REQUEST_MAX_ATTEMPTS {
41+
println!(
42+
"Sending execute_request to Python kernel (attempt {})...",
43+
attempt
44+
);
45+
let execute_request = create_execute_request();
46+
comm.send_message(&execute_request)
47+
.await
48+
.expect("Failed to send execute_request");
49+
50+
let results = run_communication_test(
51+
comm,
52+
Duration::from_secs(EXECUTE_TIMEOUT_SECS),
53+
EXECUTE_MAX_MESSAGES,
54+
)
55+
.await;
56+
57+
if results.execute_reply_received
58+
&& results.stream_output_received
59+
&& results.expected_output_found
60+
{
61+
println!(
62+
"Execute_request completed successfully on attempt {}",
63+
attempt
64+
);
65+
return (results, attempt);
66+
}
67+
68+
println!(
69+
"Execute_request attempt {} incomplete (execute_reply={}, stream_output={}, expected_output={}).",
70+
attempt,
71+
results.execute_reply_received,
72+
results.stream_output_received,
73+
results.expected_output_found
74+
);
75+
76+
last_results = results;
77+
78+
if attempt < EXECUTE_REQUEST_MAX_ATTEMPTS {
79+
println!(
80+
"Waiting {} ms before retrying execute_request...",
81+
EXECUTE_RETRY_BACKOFF_MS
82+
);
83+
tokio::time::sleep(Duration::from_millis(EXECUTE_RETRY_BACKOFF_MS)).await;
84+
}
85+
}
86+
87+
(last_results, EXECUTE_REQUEST_MAX_ATTEMPTS)
88+
}
89+
2890
/// Run a Python kernel test with the specified transport
2991
async fn run_python_kernel_test_transport(python_cmd: &str, transport: TransportType) {
3092
// For domain socket transport, we need to start a Unix socket server
@@ -120,34 +182,27 @@ async fn run_python_kernel_test_transport(python_cmd: &str, transport: Transport
120182
println!("Waiting for Python kernel to start up...");
121183
tokio::time::sleep(Duration::from_millis(800)).await; // Give kernel time to start
122184

123-
// Send an execute_request directly (kernel_info already happens during startup)
124-
let execute_request = create_execute_request();
125-
println!("Sending execute_request to Python kernel...");
126-
comm.send_message(&execute_request)
127-
.await
128-
.expect("Failed to send execute_request");
129-
130-
// Run the communication test with reasonable timeout to get all results
131-
let timeout = Duration::from_secs(12);
132-
let max_messages = 25;
133-
let results = run_communication_test(&mut comm, timeout, max_messages).await;
185+
let (results, attempts_used) = execute_test_code_with_retries(&mut comm).await;
134186

135187
results.print_summary();
136188

137189
// Assert only the essential functionality for faster tests
138190
assert!(
139191
results.execute_reply_received,
140-
"Expected to receive execute_reply from Python kernel, but didn't get one. The kernel is not executing code properly."
192+
"Expected to receive execute_reply from Python kernel after {} attempts, but didn't get one. The kernel is not executing code properly.",
193+
attempts_used
141194
);
142195

143196
assert!(
144197
results.stream_output_received,
145-
"Expected to receive stream output from Python kernel, but didn't get any. The kernel is not producing stdout output."
198+
"Expected to receive stream output from Python kernel after {} attempts, but didn't get any. The kernel is not producing stdout output.",
199+
attempts_used
146200
);
147201

148202
assert!(
149203
results.expected_output_found,
150-
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
204+
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output after {} attempts, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
205+
attempts_used,
151206
results.collected_output
152207
);
153208

@@ -501,34 +556,27 @@ async fn run_python_kernel_test_domain_socket(python_cmd: &str) {
501556
.await
502557
.expect("Failed to create domain socket communication channel");
503558

504-
// Send an execute_request directly (kernel_info already happens during startup)
505-
let execute_request = create_execute_request();
506-
println!("Sending execute_request to Python kernel...");
507-
comm.send_message(&execute_request)
508-
.await
509-
.expect("Failed to send execute_request");
510-
511-
// Run the communication test with reasonable timeout to get all results
512-
let timeout = Duration::from_secs(12);
513-
let max_messages = 25;
514-
let results = run_communication_test(&mut comm, timeout, max_messages).await;
559+
let (results, attempts_used) = execute_test_code_with_retries(&mut comm).await;
515560

516561
results.print_summary();
517562

518563
// Assert only the essential functionality for faster domain socket tests
519564
assert!(
520565
results.execute_reply_received,
521-
"Expected to receive execute_reply from Python kernel, but didn't get one. The kernel is not executing code properly."
566+
"Expected to receive execute_reply from Python kernel after {} attempts, but didn't get one. The kernel is not executing code properly.",
567+
attempts_used
522568
);
523569

524570
assert!(
525571
results.stream_output_received,
526-
"Expected to receive stream output from Python kernel, but didn't get any. The kernel is not producing stdout output."
572+
"Expected to receive stream output from Python kernel after {} attempts, but didn't get any. The kernel is not producing stdout output.",
573+
attempts_used
527574
);
528575

529576
assert!(
530577
results.expected_output_found,
531-
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
578+
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output after {} attempts, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
579+
attempts_used,
532580
results.collected_output
533581
);
534582

@@ -708,34 +756,27 @@ async fn run_python_kernel_test_named_pipe(python_cmd: &str, session_id: &str, p
708756
.await
709757
.expect("Failed to create named pipe communication channel");
710758

711-
// Send an execute_request directly
712-
let execute_request = create_execute_request();
713-
println!("Sending execute_request to Python kernel...");
714-
comm.send_message(&execute_request)
715-
.await
716-
.expect("Failed to send execute_request");
717-
718-
// Run the communication test with reasonable timeout to get all results
719-
let timeout = Duration::from_secs(12);
720-
let max_messages = 25;
721-
let results = run_communication_test(&mut comm, timeout, max_messages).await;
759+
let (results, attempts_used) = execute_test_code_with_retries(&mut comm).await;
722760

723761
results.print_summary();
724762

725763
// Assert only the essential functionality for faster tests
726764
assert!(
727765
results.execute_reply_received,
728-
"Expected to receive execute_reply from Python kernel, but didn't get one. The kernel is not executing code properly."
766+
"Expected to receive execute_reply from Python kernel after {} attempts, but didn't get one. The kernel is not executing code properly.",
767+
attempts_used
729768
);
730769

731770
assert!(
732771
results.stream_output_received,
733-
"Expected to receive stream output from Python kernel, but didn't get any. The kernel is not producing stdout output."
772+
"Expected to receive stream output from Python kernel after {} attempts, but didn't get any. The kernel is not producing stdout output.",
773+
attempts_used
734774
);
735775

736776
assert!(
737777
results.expected_output_found,
738-
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
778+
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output after {} attempts, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
779+
attempts_used,
739780
results.collected_output
740781
);
741782

@@ -847,34 +888,27 @@ async fn run_python_kernel_test_domain_socket_direct(
847888
.await
848889
.expect("Failed to create domain socket communication channel");
849890

850-
// Send an execute_request directly
851-
let execute_request = create_execute_request();
852-
println!("Sending execute_request to Python kernel...");
853-
comm.send_message(&execute_request)
854-
.await
855-
.expect("Failed to send execute_request");
856-
857-
// Run the communication test with reasonable timeout to get all results
858-
let timeout = Duration::from_secs(12);
859-
let max_messages = 25;
860-
let results = run_communication_test(&mut comm, timeout, max_messages).await;
891+
let (results, attempts_used) = execute_test_code_with_retries(&mut comm).await;
861892

862893
results.print_summary();
863894

864895
// Assert only the essential functionality for faster tests
865896
assert!(
866897
results.execute_reply_received,
867-
"Expected to receive execute_reply from Python kernel, but didn't get one. The kernel is not executing code properly."
898+
"Expected to receive execute_reply from Python kernel after {} attempts, but didn't get one. The kernel is not executing code properly.",
899+
attempts_used
868900
);
869901

870902
assert!(
871903
results.stream_output_received,
872-
"Expected to receive stream output from Python kernel, but didn't get any. The kernel is not producing stdout output."
904+
"Expected to receive stream output from Python kernel after {} attempts, but didn't get any. The kernel is not producing stdout output.",
905+
attempts_used
873906
);
874907

875908
assert!(
876909
results.expected_output_found,
877-
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
910+
"Expected to find 'Hello from Kallichore test!' and '2 + 3 = 5' in the kernel output after {} attempts, but didn't find both. The kernel executed but produced unexpected output. Actual collected output: {:?}",
911+
attempts_used,
878912
results.collected_output
879913
);
880914

0 commit comments

Comments
 (0)