Skip to content

Commit b98bcdb

Browse files
committed
feat(stats): No more double-counting for packets
Signed-off-by: Sergey Matov <[email protected]>
1 parent dd40877 commit b98bcdb

File tree

2 files changed

+108
-41
lines changed

2 files changed

+108
-41
lines changed

stats/src/dpstats.rs

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ impl VpcMapName {
3838
}
3939
}
4040

41+
/// Compute overlap in nanoseconds between [a_start, a_end] and [b_start, b_end].
42+
#[inline]
43+
fn overlap_nanos(a_start: Instant, a_end: Instant, b_start: Instant, b_end: Instant) -> u128 {
44+
let start = if a_start > b_start { a_start } else { b_start };
45+
let end = if a_end < b_end { a_end } else { b_end };
46+
if end > start {
47+
end.duration_since(start).as_nanos()
48+
} else {
49+
0
50+
}
51+
}
52+
4153
/// A `StatsCollector` is responsible for collecting and aggregating packet statistics for a
4254
/// collection of workers running packet processing pipelines on various threads.
4355
#[derive(Debug)]
@@ -50,6 +62,7 @@ pub struct StatsCollector {
5062
/// Filter for batches which have been submitted to the `submitted` filter. This filter is
5163
/// used to calculate rates.
5264
submitted: SavitzkyGolayFilter<hashbrown::HashMap<VpcDiscriminant, TransmitSummary<u64>>>,
65+
/// Running cumulative totals used to produce monotonic series into the SG derivative filter.
5366
cumulative_totals: hashbrown::HashMap<VpcDiscriminant, TransmitSummary<u64>>,
5467
/// Reader for the VPC map. This reader is used to determine the VPCs that are currently
5568
/// known to the system.
@@ -173,38 +186,91 @@ impl StatsCollector {
173186
}
174187
})
175188
.collect();
189+
190+
// Proportionally distribute each (src,dst) update across overlapping batches.
176191
update.summary.vpc.iter().for_each(|(src, summary)| {
177192
summary.dst.iter().for_each(|(dst, stats)| {
178-
slices.iter_mut().for_each(|batch| {
179-
// TODO: this can be much more efficient
180-
let SplitCount {
181-
inside: packets, ..
182-
} = batch.split_count(&update, stats.packets);
183-
let SplitCount { inside: bytes, .. } =
184-
batch.split_count(&update, stats.bytes);
185-
let stats = PacketAndByte { packets, bytes };
186-
if packets == 0 && bytes == 0 {
187-
return;
193+
if stats.packets == 0 && stats.bytes == 0 {
194+
return;
195+
}
196+
197+
let upd_start = update.summary.start;
198+
let upd_end = update.start() + update.duration;
199+
200+
// Pre-compute overlaps with all candidate batch slices
201+
let overlaps: Vec<u128> = slices
202+
.iter()
203+
.map(|b| overlap_nanos(b.start, b.planned_end, upd_start, upd_end))
204+
.collect();
205+
let total_ov: u128 = overlaps.iter().copied().sum();
206+
if total_ov == 0 {
207+
return;
208+
}
209+
210+
// Integer-safe split: give the remainder to the last overlapping bucket
211+
let mut rem_pkts = stats.packets;
212+
let mut rem_bytes = stats.bytes;
213+
214+
let last_idx = overlaps
215+
.iter()
216+
.enumerate()
217+
.rfind(|&(_, &ov)| ov > 0)
218+
.map(|(i, _)| i);
219+
220+
for (i, batch) in slices.iter_mut().enumerate() {
221+
let ov = overlaps[i];
222+
if ov == 0 {
223+
continue;
224+
}
225+
226+
let is_last = Some(i) == last_idx;
227+
228+
let pkts_in = if is_last {
229+
rem_pkts
230+
} else {
231+
let v = ((stats.packets as u128) * ov / total_ov) as u64;
232+
// track remainder
233+
rem_pkts = rem_pkts.saturating_sub(v);
234+
v
235+
};
236+
237+
let bytes_in = if is_last {
238+
rem_bytes
239+
} else {
240+
let v = ((stats.bytes as u128) * ov / total_ov) as u64;
241+
rem_bytes = rem_bytes.saturating_sub(v);
242+
v
243+
};
244+
245+
if pkts_in == 0 && bytes_in == 0 {
246+
continue;
188247
}
248+
249+
let apportioned = PacketAndByte {
250+
packets: pkts_in,
251+
bytes: bytes_in,
252+
};
253+
189254
match batch.vpc.get_mut(src) {
190255
None => {
191256
let mut tx_summary = TransmitSummary::new();
192-
tx_summary.dst.insert(*dst, stats);
257+
tx_summary.dst.insert(*dst, apportioned);
193258
batch.vpc.insert(*src, tx_summary);
194259
}
195260
Some(tx_summary) => match tx_summary.dst.get_mut(dst) {
196261
None => {
197-
tx_summary.dst.insert(*dst, stats);
262+
tx_summary.dst.insert(*dst, apportioned);
198263
}
199264
Some(s) => {
200-
*s += stats;
265+
*s += apportioned;
201266
}
202267
},
203268
}
204-
})
269+
}
205270
});
206271
});
207272
}
273+
208274
let current_time = Instant::now();
209275
let mut expired = self
210276
.outstanding
@@ -258,26 +324,27 @@ impl StatsCollector {
258324
}
259325
});
260326
});
261-
//self.submitted.push(concluded.vpc);
327+
328+
// Update cumulative totals from the *concluded* batch (apportioned already)
262329
for (&src, tx_summary) in concluded.vpc.iter() {
263330
let totals = self
264331
.cumulative_totals
265332
.entry(src)
266333
.or_insert_with(TransmitSummary::new);
267-
334+
268335
for (&dst, &stats) in tx_summary.dst.iter() {
269336
match totals.dst.get_mut(&dst) {
270337
Some(entry) => {
271338
entry.packets = entry.packets.saturating_add(stats.packets);
272-
entry.bytes = entry.bytes.saturating_add(stats.bytes);
339+
entry.bytes = entry.bytes.saturating_add(stats.bytes);
273340
}
274341
None => {
275342
totals.dst.insert(dst, stats);
276343
}
277344
}
278345
}
279346
}
280-
347+
281348
// Push the cumulative snapshot into the SG derivative filter
282349
debug!("sg snapshot: {:?}", self.cumulative_totals);
283350
self.submitted.push(self.cumulative_totals.clone());

stats/src/rate.rs

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -205,29 +205,29 @@ impl TryFrom<&SavitzkyGolayFilter<TransmitSummary<u64>>>
205205
.collect();
206206
let mut out = TransmitSummary::<SavitzkyGolayFilter<u64>>::new();
207207
for (idx, summary) in values.iter().enumerate() {
208-
all_keys
209-
.iter()
210-
.for_each(|&k| match (summary.dst.get(&k), out.dst.get_mut(&k)) {
211-
(Some(count), Some(out)) => {
212-
out.packets.push(count.packets);
213-
out.bytes.push(count.bytes);
214-
}
215-
(Some(count), None) => {
216-
let mut packets = SavitzkyGolayFilter::new(value.step);
217-
let mut bytes = SavitzkyGolayFilter::new(value.step);
218-
packets.push(count.packets);
219-
bytes.push(count.bytes);
220-
out.dst.insert(k, PacketAndByte { packets, bytes });
221-
}
222-
(None, Some(out)) => {
223-
debug_assert!(idx != 0);
224-
out.packets.push(out.packets.data[out.packets.idx - 1]);
225-
out.bytes.push(out.bytes.data[out.bytes.idx - 1]);
226-
}
227-
(None, None) => {
228-
// no data yet
229-
}
230-
});
208+
all_keys
209+
.iter()
210+
.for_each(|&k| match (summary.dst.get(&k), out.dst.get_mut(&k)) {
211+
(Some(count), Some(out)) => {
212+
out.packets.push(count.packets);
213+
out.bytes.push(count.bytes);
214+
}
215+
(Some(count), None) => {
216+
let mut packets = SavitzkyGolayFilter::new(value.step);
217+
let mut bytes = SavitzkyGolayFilter::new(value.step);
218+
packets.push(count.packets);
219+
bytes.push(count.bytes);
220+
out.dst.insert(k, PacketAndByte { packets, bytes });
221+
}
222+
(None, Some(out)) => {
223+
debug_assert!(idx != 0);
224+
out.packets.push(out.packets.data[out.packets.idx - 1]);
225+
out.bytes.push(out.bytes.data[out.bytes.idx - 1]);
226+
}
227+
(None, None) => {
228+
// no data yet
229+
}
230+
});
231231
}
232232
Ok(out)
233233
}

0 commit comments

Comments
 (0)