Skip to content

Commit b5f9a01

Browse files
feat: Add ProbeBuilder (#1078)
* WIP * Improve tests * refactor according to review * Add error handling * changelog * clippy * Add docs on ProbeBuilder * Update crates/stackable-operator/src/builder/pod/probe.rs Co-authored-by: Nick <[email protected]> * Start directly with action functions. Add docs * Move rustdocs to module --------- Co-authored-by: Nick <[email protected]>
1 parent e723c30 commit b5f9a01

File tree

3 files changed

+347
-0
lines changed

3 files changed

+347
-0
lines changed

crates/stackable-operator/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
44

55
## [Unreleased]
66

7+
### Added
8+
9+
- Add `ProbeBuilder` to build Kubernetes container probes ([#1078]).
10+
711
### Changed
812

913
- BREAKING: The `ResolvedProductImage` field `app_version_label` was renamed to `app_version_label_value` to match changes to its type ([#1076]).
@@ -14,6 +18,7 @@ All notable changes to this project will be documented in this file.
1418
This is the case when referencing custom images via a `@sha256:...` hash. As such, the `product_image_selection::resolve` function is now fallible ([#1076]).
1519

1620
[#1076]: https://github.com/stackabletech/operator-rs/pull/1076
21+
[#1078]: https://github.com/stackabletech/operator-rs/pull/1078
1722

1823
## [0.94.0] - 2025-07-10
1924

crates/stackable-operator/src/builder/pod/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use crate::{
2929
};
3030

3131
pub mod container;
32+
pub mod probe;
3233
pub mod resources;
3334
pub mod security;
3435
pub mod volume;
Lines changed: 341 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
//! Kubernetes [`Probe`] builder.
2+
//!
3+
//! The upstream [`Probe`] struct does not prevent invalid probe configurations
4+
//! which leads to surprises at runtime which can be deeply hidden.
5+
//! You need to specify at least an action and interval (in this order).
6+
//!
7+
//! ### Usage example
8+
//!
9+
//! ```
10+
//! use stackable_operator::{
11+
//! builder::pod::probe::ProbeBuilder,
12+
//! time::Duration,
13+
//! };
14+
//! # use k8s_openapi::api::core::v1::HTTPGetAction;
15+
//! # use k8s_openapi::apimachinery::pkg::util::intstr::IntOrString;
16+
//!
17+
//! let probe = ProbeBuilder::http_get_port_scheme_path(8080, None, None)
18+
//! .with_period(Duration::from_secs(10))
19+
//! .build()
20+
//! .expect("failed to build probe");
21+
//!
22+
//! assert_eq!(
23+
//! probe.http_get,
24+
//! Some(HTTPGetAction {
25+
//! port: IntOrString::Int(8080),
26+
//! ..Default::default()
27+
//! })
28+
//! );
29+
//! assert_eq!(probe.period_seconds, Some(10));
30+
//! ```
31+
32+
use std::num::TryFromIntError;
33+
34+
use k8s_openapi::{
35+
api::core::v1::{ExecAction, GRPCAction, HTTPGetAction, Probe, TCPSocketAction},
36+
apimachinery::pkg::util::intstr::IntOrString,
37+
};
38+
use snafu::{ResultExt, Snafu, ensure};
39+
40+
use crate::time::Duration;
41+
42+
#[derive(Debug, Snafu)]
43+
pub enum Error {
44+
#[snafu(display(
45+
"The probe's {field:?} duration of {duration} is too long, as it's seconds doesn't fit into an i32"
46+
))]
47+
DurationTooLong {
48+
source: TryFromIntError,
49+
field: String,
50+
duration: Duration,
51+
},
52+
53+
#[snafu(display("The probe period is zero, but it needs to be a positive duration"))]
54+
PeriodIsZero {},
55+
}
56+
57+
#[derive(Debug)]
58+
pub struct ProbeBuilder<Action, Period> {
59+
// Mandatory field
60+
action: Action,
61+
period: Period,
62+
63+
// Fields with defaults
64+
success_threshold: i32,
65+
failure_threshold: i32,
66+
timeout: Duration,
67+
initial_delay: Duration,
68+
termination_grace_period: Duration,
69+
}
70+
71+
/// Available probes
72+
///
73+
/// Only one probe can be configured at a time. For more details about each
74+
/// type, see [container-probes] documentation.
75+
///
76+
/// [container-probes]: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes
77+
pub enum ProbeAction {
78+
Exec(ExecAction),
79+
Grpc(GRPCAction),
80+
HttpGet(HTTPGetAction),
81+
TcpSocket(TCPSocketAction),
82+
}
83+
84+
impl ProbeBuilder<(), ()> {
85+
/// This probe action executes the specified command
86+
pub fn exec_command(
87+
command: impl IntoIterator<Item = impl Into<String>>,
88+
) -> ProbeBuilder<ProbeAction, ()> {
89+
Self::exec(ExecAction {
90+
command: Some(command.into_iter().map(Into::into).collect()),
91+
})
92+
}
93+
94+
// Note: Ideally we also have a builder for `HTTPGetAction`, but that is lot's of effort we
95+
// don't want to spend now.
96+
/// This probe action does an HTTP GET request to the specified port. Optionally, you can
97+
/// configure a scheme and path, otherwise the Kubernetes default is used.
98+
pub fn http_get_port_scheme_path(
99+
port: u16,
100+
scheme: Option<String>,
101+
path: Option<String>,
102+
) -> ProbeBuilder<ProbeAction, ()> {
103+
Self::http_get(HTTPGetAction {
104+
path,
105+
scheme,
106+
port: IntOrString::Int(port.into()),
107+
..Default::default()
108+
})
109+
}
110+
111+
/// Set's an [`ExecAction`] as probe.
112+
///
113+
/// You likely want to use [`Self::exec_command`] whenever possible.
114+
pub fn exec(exec_action: ExecAction) -> ProbeBuilder<ProbeAction, ()> {
115+
Self::action(ProbeAction::Exec(exec_action))
116+
}
117+
118+
/// Set's an [`GRPCAction`] as probe.
119+
pub fn grpc(grpc_action: GRPCAction) -> ProbeBuilder<ProbeAction, ()> {
120+
Self::action(ProbeAction::Grpc(grpc_action))
121+
}
122+
123+
/// Set's an [`HTTPGetAction`] as probe.
124+
///
125+
/// For simple cases, there is a a convenience helper: [`Self::http_get_port_scheme_path`].
126+
pub fn http_get(http_get_action: HTTPGetAction) -> ProbeBuilder<ProbeAction, ()> {
127+
Self::action(ProbeAction::HttpGet(http_get_action))
128+
}
129+
130+
/// Set's an [`TCPSocketAction`] as probe.
131+
pub fn tcp_socket(tcp_socket_action: TCPSocketAction) -> ProbeBuilder<ProbeAction, ()> {
132+
Self::action(ProbeAction::TcpSocket(tcp_socket_action))
133+
}
134+
135+
/// Incase you already have an [`ProbeAction`] enum variant you can pass that here.
136+
///
137+
/// If not, it is recommended to use one of the specialized functions such as [`Self::exec`],
138+
/// [`Self::grpc`], [`Self::http_get`] or [`Self::tcp_socket`] or their helper functions.
139+
pub fn action(action: ProbeAction) -> ProbeBuilder<ProbeAction, ()> {
140+
ProbeBuilder {
141+
action,
142+
period: (),
143+
// The following values match the Kubernetes defaults
144+
success_threshold: 1,
145+
failure_threshold: 1,
146+
timeout: Duration::from_secs(1),
147+
initial_delay: Duration::from_secs(0),
148+
termination_grace_period: Duration::from_secs(0),
149+
}
150+
}
151+
}
152+
153+
impl ProbeBuilder<ProbeAction, ()> {
154+
/// The period/interval in which the probe should be executed.
155+
pub fn with_period(self, period: Duration) -> ProbeBuilder<ProbeAction, Duration> {
156+
let Self {
157+
action,
158+
period: (),
159+
success_threshold,
160+
failure_threshold,
161+
timeout,
162+
initial_delay,
163+
termination_grace_period,
164+
} = self;
165+
166+
ProbeBuilder {
167+
action,
168+
period,
169+
success_threshold,
170+
failure_threshold,
171+
timeout,
172+
initial_delay,
173+
termination_grace_period,
174+
}
175+
}
176+
}
177+
178+
impl ProbeBuilder<ProbeAction, Duration> {
179+
/// How often the probe must succeed before being considered successful.
180+
pub fn with_success_threshold(mut self, success_threshold: i32) -> Self {
181+
self.success_threshold = success_threshold;
182+
self
183+
}
184+
185+
/// The duration the probe needs to succeed before being considered successful.
186+
///
187+
/// This internally calculates the needed failure threshold based on the period and passes that
188+
/// to [`Self::with_success_threshold`].
189+
///
190+
/// This function returns an [`Error::PeriodIsZero`] error in case the period is zero, as it
191+
/// can not divide by zero.
192+
pub fn with_success_threshold_duration(
193+
self,
194+
success_threshold_duration: Duration,
195+
) -> Result<Self, Error> {
196+
ensure!(self.period.as_nanos() != 0, PeriodIsZeroSnafu);
197+
198+
// SAFETY: Period is checked above to be non-zero
199+
let success_threshold = success_threshold_duration.div_duration_f32(*self.period);
200+
Ok(self.with_success_threshold(success_threshold.ceil() as i32))
201+
}
202+
203+
/// How often the probe must fail before being considered failed.
204+
pub fn with_failure_threshold(mut self, failure_threshold: i32) -> Self {
205+
self.failure_threshold = failure_threshold;
206+
self
207+
}
208+
209+
pub fn with_timeout(mut self, timeout: Duration) -> Self {
210+
self.timeout = timeout;
211+
self
212+
}
213+
214+
pub fn with_initial_delay(mut self, initial_delay: Duration) -> Self {
215+
self.initial_delay = initial_delay;
216+
self
217+
}
218+
219+
pub fn with_termination_grace_period(mut self, termination_grace_period: Duration) -> Self {
220+
self.termination_grace_period = termination_grace_period;
221+
self
222+
}
223+
224+
/// The duration the probe needs to fail before being considered failed.
225+
///
226+
/// This internally calculates the needed failure threshold based on the period and passes that
227+
/// to [`Self::with_failure_threshold`].
228+
///
229+
/// This function returns an [`Error::PeriodIsZero`] error in case the period is zero, as it
230+
/// can not divide by zero.
231+
pub fn with_failure_threshold_duration(
232+
self,
233+
failure_threshold_duration: Duration,
234+
) -> Result<Self, Error> {
235+
ensure!(self.period.as_nanos() != 0, PeriodIsZeroSnafu);
236+
237+
// SAFETY: Period is checked above to be non-zero
238+
let failure_threshold = failure_threshold_duration.div_duration_f32(*self.period);
239+
Ok(self.with_failure_threshold(failure_threshold.ceil() as i32))
240+
}
241+
242+
pub fn build(self) -> Result<Probe, Error> {
243+
let mut probe = Probe {
244+
exec: None,
245+
failure_threshold: Some(self.failure_threshold),
246+
grpc: None,
247+
http_get: None,
248+
initial_delay_seconds: Some(self.initial_delay.as_secs().try_into().context(
249+
DurationTooLongSnafu {
250+
field: "initialDelay",
251+
duration: self.initial_delay,
252+
},
253+
)?),
254+
period_seconds: Some(self.period.as_secs().try_into().context(
255+
DurationTooLongSnafu {
256+
field: "period",
257+
duration: self.period,
258+
},
259+
)?),
260+
success_threshold: Some(self.success_threshold),
261+
tcp_socket: None,
262+
termination_grace_period_seconds: Some(
263+
self.termination_grace_period.as_secs().try_into().context(
264+
DurationTooLongSnafu {
265+
field: "terminationGracePeriod",
266+
duration: self.termination_grace_period,
267+
},
268+
)?,
269+
),
270+
timeout_seconds: Some(self.timeout.as_secs().try_into().context(
271+
DurationTooLongSnafu {
272+
field: "timeout",
273+
duration: self.timeout,
274+
},
275+
)?),
276+
};
277+
278+
match self.action {
279+
ProbeAction::Exec(exec_action) => probe.exec = Some(exec_action),
280+
ProbeAction::Grpc(grpc_action) => probe.grpc = Some(grpc_action),
281+
ProbeAction::HttpGet(http_get_action) => probe.http_get = Some(http_get_action),
282+
ProbeAction::TcpSocket(tcp_socket_action) => probe.tcp_socket = Some(tcp_socket_action),
283+
}
284+
285+
Ok(probe)
286+
}
287+
}
288+
289+
#[cfg(test)]
290+
mod tests {
291+
use super::*;
292+
293+
#[test]
294+
fn test_probe_builder_minimal() {
295+
let probe = ProbeBuilder::http_get_port_scheme_path(8080, None, None)
296+
.with_period(Duration::from_secs(10))
297+
.build()
298+
.expect("Valid inputs must produce a Probe");
299+
300+
assert_eq!(
301+
probe.http_get,
302+
Some(HTTPGetAction {
303+
port: IntOrString::Int(8080),
304+
..Default::default()
305+
})
306+
);
307+
assert_eq!(probe.period_seconds, Some(10));
308+
}
309+
310+
#[test]
311+
fn test_probe_builder_complex() {
312+
let probe = ProbeBuilder::exec_command(["sleep", "1"])
313+
.with_period(Duration::from_secs(5))
314+
.with_success_threshold(2)
315+
.with_failure_threshold_duration(Duration::from_secs(33))
316+
.expect("The period is always non-zero")
317+
.with_timeout(Duration::from_secs(3))
318+
.with_initial_delay(Duration::from_secs(7))
319+
.with_termination_grace_period(Duration::from_secs(4))
320+
.build()
321+
.expect("Valid inputs must produce a Probe");
322+
323+
assert_eq!(
324+
probe,
325+
Probe {
326+
exec: Some(ExecAction {
327+
command: Some(vec!["sleep".to_owned(), "1".to_owned()])
328+
}),
329+
failure_threshold: Some(7),
330+
grpc: None,
331+
http_get: None,
332+
initial_delay_seconds: Some(7),
333+
period_seconds: Some(5),
334+
success_threshold: Some(2),
335+
tcp_socket: None,
336+
termination_grace_period_seconds: Some(4),
337+
timeout_seconds: Some(3),
338+
}
339+
);
340+
}
341+
}

0 commit comments

Comments
 (0)