Skip to content

Commit cf81f4f

Browse files
authored
feat(docs): Write etl crate Rust docs (#262)
1 parent 46370fc commit cf81f4f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1542
-245
lines changed

etl/src/concurrency/shutdown.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,69 @@ use tokio::sync::watch;
22

33
use crate::concurrency::signal::{SignalRx, SignalTx, create_signal};
44

5+
/// Transmitter side of the shutdown coordination channel.
6+
///
7+
/// [`ShutdownTx`] enables sending shutdown signals to multiple workers simultaneously.
8+
/// It wraps a signal transmitter with shutdown-specific semantics and provides methods
9+
/// for triggering shutdown and creating receiver subscriptions.
510
#[derive(Debug, Clone)]
611
pub struct ShutdownTx(SignalTx);
712

813
impl ShutdownTx {
14+
/// Wraps a signal transmitter with shutdown semantics.
915
pub fn wrap(tx: SignalTx) -> Self {
1016
Self(tx)
1117
}
1218

19+
/// Triggers shutdown for all subscribed workers.
20+
///
21+
/// This method broadcasts a shutdown signal to all workers that have subscribed
22+
/// to this shutdown channel. Workers should respond by completing their current
23+
/// operations gracefully and terminating.
1324
pub fn shutdown(&self) -> Result<(), watch::error::SendError<()>> {
1425
self.0.send(())
1526
}
1627

28+
/// Creates a new shutdown receiver for worker subscription.
29+
///
30+
/// Each worker should call this method to get its own receiver that can be used
31+
/// to detect when shutdown has been requested. Multiple receivers can be created
32+
/// from the same transmitter.
1733
pub fn subscribe(&self) -> ShutdownRx {
1834
self.0.subscribe()
1935
}
2036
}
2137

38+
/// Receiver side of the shutdown coordination channel.
39+
///
40+
/// [`ShutdownRx`] is used by workers to detect when shutdown has been requested.
41+
/// It's a type alias for [`SignalRx`] with shutdown-specific semantics.
2242
pub type ShutdownRx = SignalRx;
2343

44+
/// Result type that distinguishes between normal operation and shutdown scenarios.
45+
///
46+
/// [`ShutdownResult`] is used by operations that can be interrupted by shutdown signals.
47+
/// It preserves both successful results and any partial data that was being processed
48+
/// when shutdown was requested.
2449
pub enum ShutdownResult<T, I> {
50+
/// Normal successful completion with result data.
2551
Ok(T),
52+
/// Operation was interrupted by shutdown, with any partial data preserved.
2653
Shutdown(I),
2754
}
2855

2956
impl<T, I> ShutdownResult<T, I> {
57+
/// Returns true if this result represents a shutdown scenario.
3058
pub fn should_shutdown(&self) -> bool {
3159
matches!(self, ShutdownResult::Shutdown(_))
3260
}
3361
}
3462

63+
/// Creates a new shutdown coordination channel.
64+
///
65+
/// This function creates a broadcast channel for coordinating shutdown across multiple
66+
/// workers. The transmitter can be used to trigger shutdown, while receivers can be
67+
/// distributed to workers that need to respond to shutdown signals.
3568
pub fn create_shutdown_channel() -> (ShutdownTx, ShutdownRx) {
3669
let (tx, rx) = create_signal();
3770
(ShutdownTx::wrap(tx), rx)

etl/src/concurrency/signal.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
11
use tokio::sync::watch;
22

3-
/// Type alias to abstract a watch channel of `()`.
3+
/// Transmitter side of a coordination signal channel.
4+
///
5+
/// [`SignalTx`] abstracts a watch channel transmitter for sending coordination signals
6+
/// between workers. The signal carries no data payload - it's purely for notification
7+
/// that some event or state change has occurred.
48
pub type SignalTx = watch::Sender<()>;
59

6-
/// Type alias to abstract a watch channel of `()`.
10+
/// Receiver side of a coordination signal channel.
11+
///
12+
/// [`SignalRx`] abstracts a watch channel receiver for detecting coordination signals.
13+
/// Workers can use this to wait for events from other parts of the system without
14+
/// polling or complex synchronization.
715
pub type SignalRx = watch::Receiver<()>;
816

9-
/// Creates a new pair of [`SignalTx`] and [`SignalRx`].
17+
/// Creates a new coordination signal channel.
18+
///
19+
/// This function creates a watch-based signaling channel optimized for coordination
20+
/// scenarios where multiple receivers need to be notified of the same event. Unlike
21+
/// mpsc channels, all receivers see the same signal simultaneously.
1022
pub fn create_signal() -> (SignalTx, SignalRx) {
1123
let (tx, rx) = watch::channel(());
1224
(tx, rx)

etl/src/concurrency/stream.rs

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -53,48 +53,45 @@ impl<B, S: Stream<Item = B>> BatchStream<B, S> {
5353
impl<B, S: Stream<Item = B>> Stream for BatchStream<B, S> {
5454
type Item = ShutdownResult<Vec<S::Item>, Vec<S::Item>>;
5555

56-
/// Polls the stream for the next batch of items.
56+
/// Polls the stream for the next batch of items using a complex state machine.
5757
///
58-
/// Returns:
59-
/// - `Poll::Ready(Some(batch))` when a complete batch is available
60-
/// - `Poll::Ready(None)` when the stream has ended
61-
/// - `Poll::Pending` when more items are needed to form a batch
62-
///
63-
/// The stream will emit a batch when:
64-
/// - The batch reaches maximum size
65-
/// - A timeout occurs
66-
/// - The stream is forcefully stopped
58+
/// This method implements a batching algorithm that balances throughput
59+
/// and latency by collecting items into batches based on both size and time constraints.
60+
/// The polling state machine handles multiple concurrent conditions and ensures proper
61+
/// resource cleanup during shutdown scenarios.
6762
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
6863
let mut this = self.as_mut().project();
6964

65+
// Fast path: if the inner stream has already ended, we're done
7066
if *this.inner_stream_ended {
7167
return Poll::Ready(None);
7268
}
7369

7470
loop {
71+
// Fast path: if we've been marked as stopped, terminate immediately
7572
if *this.stream_stopped {
7673
return Poll::Ready(None);
7774
}
7875

79-
// If the stream has been asked to stop, we mark the stream as stopped and return the
80-
// remaining elements, irrespectively of boundaries.
76+
// PRIORITY 1: Check for shutdown signal
77+
// Shutdown handling takes priority over all other operations to ensure
78+
// graceful termination. We return any accumulated items with shutdown indication.
8179
if this.shutdown_rx.has_changed().unwrap_or(false) {
8280
info!("the stream has been forcefully stopped");
8381

84-
// We mark the stream as stopped, in this way any further call with return
85-
// `Poll::Ready(None)`.
82+
// Mark stream as permanently stopped to prevent further polling
8683
*this.stream_stopped = true;
8784

88-
// We mark the current value as unchanged, effectively acknowledging that we have
89-
// seen it. This does not affect the correctness, but it makes the implementation
90-
// semantically more correct.
85+
// Acknowledge that we've seen the shutdown signal to maintain watch semantics
9186
this.shutdown_rx.mark_unchanged();
9287

93-
// Even if we have no items, we return this result, since we signal that a shutdown
94-
// signal was received and the consumer side of the stream, can decide what to do.
88+
// Return accumulated items (if any) with shutdown indication
89+
// Even empty batches are returned to signal shutdown occurred
9590
return Poll::Ready(Some(ShutdownResult::Shutdown(std::mem::take(this.items))));
9691
}
9792

93+
// PRIORITY 2: Timer management
94+
// Reset the timeout timer when starting a new batch or after emitting a batch
9895
if *this.reset_timer {
9996
this.deadline
10097
.set(Some(tokio::time::sleep(Duration::from_millis(
@@ -103,48 +100,63 @@ impl<B, S: Stream<Item = B>> Stream for BatchStream<B, S> {
103100
*this.reset_timer = false;
104101
}
105102

103+
// PRIORITY 3: Memory optimization
104+
// Pre-allocate batch capacity when starting to collect items
105+
// This avoids reallocations during batch collection
106106
if this.items.is_empty() {
107107
this.items.reserve_exact(this.batch_config.max_size);
108108
}
109109

110+
// PRIORITY 4: Poll underlying stream for new items
110111
match this.stream.as_mut().poll_next(cx) {
111-
Poll::Pending => break,
112+
Poll::Pending => {
113+
// No more items available right now, check if we should emit due to timeout
114+
break;
115+
}
112116
Poll::Ready(Some(item)) => {
117+
// New item available - add to current batch
113118
this.items.push(item);
114119

115-
// If we reached the `max_batch_size` we want to return the batch and reset the
116-
// timer.
120+
// SIZE-BASED EMISSION: If batch is full, emit immediately
121+
// This provides throughput optimization for high-volume streams
117122
if this.items.len() >= this.batch_config.max_size {
118-
*this.reset_timer = true;
123+
*this.reset_timer = true; // Schedule timer reset for next batch
119124
return Poll::Ready(Some(ShutdownResult::Ok(std::mem::take(this.items))));
120125
}
126+
// Continue loop to collect more items or check other conditions
121127
}
122128
Poll::Ready(None) => {
129+
// STREAM END: Underlying stream finished
130+
// Return final batch if we have items, otherwise signal completion
123131
let last = if this.items.is_empty() {
124-
None
132+
None // No final batch needed
125133
} else {
126-
*this.reset_timer = true;
134+
*this.reset_timer = true; // Clean up timer state
127135
Some(ShutdownResult::Ok(std::mem::take(this.items)))
128136
};
129137

130-
*this.inner_stream_ended = true;
138+
*this.inner_stream_ended = true; // Mark stream as permanently ended
131139

132140
return Poll::Ready(last);
133141
}
134142
}
135143
}
136144

137-
// If there are items, we want to check the deadline, if it's met, we return the batch
138-
// we currently have in memory, otherwise, we return.
145+
// PRIORITY 5: Time-based emission check
146+
// If we have items and the timeout has expired, emit the current batch
147+
// This provides latency bounds to prevent indefinite delays in low-volume scenarios
139148
if !this.items.is_empty()
140149
&& let Some(deadline) = this.deadline.as_pin_mut()
141150
{
151+
// Check if timeout has elapsed (this will register waker if not ready)
142152
ready!(deadline.poll(cx));
143-
*this.reset_timer = true;
153+
154+
*this.reset_timer = true; // Schedule timer reset for next batch
144155

145156
return Poll::Ready(Some(ShutdownResult::Ok(std::mem::take(this.items))));
146157
}
147158

159+
// No conditions met for batch emission - wait for more items or timeout
148160
Poll::Pending
149161
}
150162
}

etl/src/config/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1+
//! Configuration objects for ETL pipelines.
2+
//!
3+
//! Re-exports configuration types and utilities required for pipeline setup and operation.
4+
15
// Re-exports.
26
pub use etl_config::shared::*;

etl/src/conversions/bool.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ use crate::bail;
22
use crate::error::EtlResult;
33
use crate::error::{ErrorKind, EtlError};
44

5+
/// Parses a PostgreSQL boolean value from its text format representation.
6+
///
7+
/// PostgreSQL represents boolean values in text format as single characters:
8+
/// - `"t"` → `true` (exactly one lowercase 't')
9+
/// - `"f"` → `false` (exactly one lowercase 'f')
510
pub fn parse_bool(s: &str) -> EtlResult<bool> {
611
if s == "t" {
712
Ok(true)

0 commit comments

Comments
 (0)