Skip to content

Commit df8e559

Browse files
committed
scx_chaos: use peek operation to optimise for empty delay dsq
Use the new `scx_bpf_dsq_peek` in scx_chaos to optimise for the fast path. This avoids locking the DSQs and should be beneficial in the common case where the DSQ is empty/nothing in the DSQ is ready. Add a few stats for tracking how successful peek is. This works really well on my local machine for skipping the hot path. This mostly avoids contention with the crawling timer thread, as the insertion in chaos_enqueue and removal in chaos_dispatch are all local to one CPU and the locking overhead would be minimal. Test plan: - CI ``` jake@merlin:/data/users/jake/repos/scx/ > cargo build --release -p scx_chaos && sudo target/release/scx_chaos --random-delay-frequency 0.01 --random-delay-min-us 100000 --random-delay-max-us 200000 --stats 10 ... Finished `release` profile [optimized] target(s) in 1m 01s 11:28:59 [INFO] Running scx_chaos (build ID: 1.0.20-ga6134e95-dirty x86_64-unknown-linux-gnu) 11:28:59 [INFO] Builder { traits: [RandomDelays { frequency: 0.01, min_us: 100000, max_us: 200000 }], verbose: 0, kprobe_random_delays: None, p2dq_opts: SchedulerOpts { disable_kthreads_local: false, autoslice: false, interactive_ratio: 10, deadline: false, eager_load_balance: false, freq_control: false, greedy_idle_disable: true, interactive_sticky: false, interactive_fifo: false, dispatch_pick2_disable: false, dispatch_lb_busy: 75, dispatch_lb_interactive: true, keep_running: false, atq_enabled: false, cpu_priority: false, interactive_dsq: true, wakeup_lb_busy: 0, wakeup_llc_migrations: false, select_idle_in_enqueue: false, queued_wakeup: false, idle_resume_us: None, max_dsq_pick2: false, task_slice: false, min_slice_us: 100, lb_mode: Load, sched_mode: Default, lb_slack_factor: 5, min_llc_runs_pick2: 1, saturated_percent: 5, dsq_time_slices: [], dsq_shift: 4, llc_shards: 5, min_nr_queued_pick2: 0, dumb_queues: 3, init_dsq_index: 0, virt_llc_enabled: false, topo: TopologyArgs { virt_llc: None } }, requires_ppid: None } 11:28:59 [INFO] DSQ[0] slice_ns 100000 11:28:59 [INFO] DSQ[1] slice_ns 3200000 11:28:59 [INFO] DSQ[2] slice_ns 6400000 11:28:59 [WARN] libbpf: map 'chaos': BPF map skeleton link is uninitialized chaos traits: random_delays/cpu_freq/degradation 0/0/0 chaos excluded/skipped 0/0 kprobe_random_delays 0 timer kicks: 0 peek: empty/not_ready/needs_proc 1057/0/0 chaos traits: random_delays/cpu_freq/degradation 3/0/0 chaos excluded/skipped 0/0 kprobe_random_delays 0 timer kicks: 3 peek: empty/not_ready/needs_proc 107168/309/9716 chaos traits: random_delays/cpu_freq/degradation 0/0/0 chaos excluded/skipped 0/0 kprobe_random_delays 0 timer kicks: 0 peek: empty/not_ready/needs_proc 91787/0/15417 ^C11:29:23 [INFO] Unregister scx_chaos scheduler ```
1 parent 36048b9 commit df8e559

File tree

6 files changed

+87
-4
lines changed

6 files changed

+87
-4
lines changed

scheds/include/scx/common.bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ u32 scx_bpf_reenqueue_local(void) __ksym;
7575
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
7676
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
7777
void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
78+
struct task_struct *scx_bpf_dsq_peek(u64 dsq_id) __ksym __weak;
7879
int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, u64 flags) __ksym __weak;
7980
struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
8081
void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;

scheds/include/scx/compat.bpf.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,13 @@ static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
230230
scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) : \
231231
scx_bpf_pick_any_cpu(cpus_allowed, flags))
232232

233+
#define __COMPAT_scx_bpf_dsq_peek(dsq_id) \
234+
(bpf_ksym_exists(scx_bpf_dsq_peek) ? scx_bpf_dsq_peek(dsq_id) : ({ \
235+
struct task_struct *p = NULL; \
236+
bpf_for_each(scx_dsq, p, dsq_id, 0) { break; } \
237+
p; \
238+
}))
239+
233240
/*
234241
* Define sched_ext_ops. This may be expanded to define multiple variants for
235242
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().

scheds/rust/scx_chaos/src/bpf/intf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ enum chaos_stat_idx {
5454
CHAOS_STAT_CHAOS_SKIPPED,
5555
CHAOS_STAT_KPROBE_RANDOM_DELAYS,
5656
CHAOS_STAT_TIMER_KICKS,
57+
CHAOS_STAT_PEEK_EMPTY_DSQ,
58+
CHAOS_STAT_PEEK_NOT_READY,
59+
CHAOS_STAT_PEEK_NEEDS_PROCESSING,
5760
CHAOS_NR_STATS,
5861
};
5962

scheds/rust/scx_chaos/src/bpf/main.bpf.c

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ const volatile u64 kprobe_delays_max_ns = 2;
6363
#define MIN(x, y) ((x) < (y) ? (x) : (y))
6464
#define MAX(x, y) ((x) > (y) ? (x) : (y))
6565

66+
#define U64_MAX ((u64)~0ULL)
67+
6668
enum chaos_timer_callbacks {
6769
CHAOS_TIMER_CHECK_QUEUES,
6870
CHAOS_MAX_TIMERS,
@@ -143,6 +145,33 @@ static __always_inline void chaos_stat_inc(enum chaos_stat_idx stat)
143145
(*cnt_p)++;
144146
}
145147

148+
/*
149+
* Get the next time a delay DSQ needs processing.
150+
*
151+
* Safe for delay DSQs which use monotonic time (vtimes won't wrap to U64_MAX).
152+
* Must be called with RCU read lock held.
153+
*/
154+
static __always_inline u64 delay_dsq_next_time(u64 dsq_id)
155+
{
156+
struct task_struct *first_p;
157+
u64 vtime;
158+
159+
// If we don't have native peek, fall back to always iterating
160+
if (!bpf_ksym_exists(scx_bpf_dsq_peek)) {
161+
chaos_stat_inc(CHAOS_STAT_PEEK_NEEDS_PROCESSING);
162+
return 0;
163+
}
164+
165+
first_p = scx_bpf_dsq_peek(dsq_id);
166+
if (!first_p) {
167+
chaos_stat_inc(CHAOS_STAT_PEEK_EMPTY_DSQ);
168+
return U64_MAX;
169+
}
170+
171+
vtime = first_p->scx.dsq_vtime;
172+
return vtime;
173+
}
174+
146175
static __always_inline enum chaos_trait_kind
147176
choose_chaos(struct chaos_task_ctx *taskc)
148177
{
@@ -362,9 +391,25 @@ __weak u64 check_dsq_times(int cpu_idx)
362391
u64 next_trigger_time = 0;
363392
u64 now = bpf_ktime_get_ns();
364393
bool has_kicked = false;
394+
u64 dsq_id = get_cpu_delay_dsq(cpu_idx);
365395

366396
bpf_rcu_read_lock();
367-
bpf_for_each(scx_dsq, p, get_cpu_delay_dsq(cpu_idx), 0) {
397+
398+
next_trigger_time = delay_dsq_next_time(dsq_id);
399+
if (next_trigger_time > now + chaos_timer_check_queues_slack_ns) {
400+
chaos_stat_inc(CHAOS_STAT_PEEK_NOT_READY);
401+
// DSQ empty (U64_MAX) or first task beyond slack window
402+
bpf_rcu_read_unlock();
403+
return next_trigger_time == U64_MAX ? 0 : next_trigger_time;
404+
}
405+
406+
chaos_stat_inc(CHAOS_STAT_PEEK_NEEDS_PROCESSING);
407+
408+
// Need to iterate: no peek support (0), task ready, or task within slack window
409+
next_trigger_time = 0;
410+
411+
// Need to iterate to handle ready tasks
412+
bpf_for_each(scx_dsq, p, dsq_id, 0) {
368413
p = bpf_task_from_pid(p->pid);
369414
if (!p)
370415
break;
@@ -387,8 +432,8 @@ __weak u64 check_dsq_times(int cpu_idx)
387432
if (next_trigger_time > now + chaos_timer_check_queues_slack_ns)
388433
break;
389434
}
390-
bpf_rcu_read_unlock();
391435

436+
bpf_rcu_read_unlock();
392437
return next_trigger_time;
393438
}
394439

@@ -531,9 +576,17 @@ void BPF_STRUCT_OPS(chaos_dispatch, s32 cpu, struct task_struct *prev)
531576
struct enqueue_promise promise;
532577
struct chaos_task_ctx *taskc;
533578
struct task_struct *p;
534-
u64 now = bpf_ktime_get_ns();
579+
u64 now = bpf_ktime_get_ns();
580+
u64 dsq_id = get_cpu_delay_dsq(-1);
581+
582+
// Check if we need to process the delay DSQ
583+
if (delay_dsq_next_time(dsq_id) > now) {
584+
chaos_stat_inc(CHAOS_STAT_PEEK_NOT_READY);
585+
goto p2dq;
586+
}
587+
chaos_stat_inc(CHAOS_STAT_PEEK_NEEDS_PROCESSING);
535588

536-
bpf_for_each(scx_dsq, p, get_cpu_delay_dsq(-1), 0) {
589+
bpf_for_each(scx_dsq, p, dsq_id, 0) {
537590
p = bpf_task_from_pid(p->pid);
538591
if (!p)
539592
continue;
@@ -557,6 +610,7 @@ void BPF_STRUCT_OPS(chaos_dispatch, s32 cpu, struct task_struct *prev)
557610
bpf_task_release(p);
558611
}
559612

613+
p2dq:
560614
return p2dq_dispatch_impl(cpu, prev);
561615
}
562616

scheds/rust/scx_chaos/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ impl Scheduler {
200200
kprobe_random_delays: stats
201201
[bpf_intf::chaos_stat_idx_CHAOS_STAT_KPROBE_RANDOM_DELAYS as usize],
202202
timer_kicks: stats[bpf_intf::chaos_stat_idx_CHAOS_STAT_TIMER_KICKS as usize],
203+
peek_empty_dsq: stats[bpf_intf::chaos_stat_idx_CHAOS_STAT_PEEK_EMPTY_DSQ as usize],
204+
peek_not_ready: stats[bpf_intf::chaos_stat_idx_CHAOS_STAT_PEEK_NOT_READY as usize],
205+
peek_needs_processing: stats
206+
[bpf_intf::chaos_stat_idx_CHAOS_STAT_PEEK_NEEDS_PROCESSING as usize],
203207
}
204208
}
205209

scheds/rust/scx_chaos/src/stats.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ pub struct Metrics {
2929
pub timer_kicks: u64,
3030
#[stat(desc = "Number of times a kprobe caused a random delay to be applied")]
3131
pub kprobe_random_delays: u64,
32+
#[stat(desc = "Peek found empty DSQ")]
33+
pub peek_empty_dsq: u64,
34+
#[stat(desc = "Peek found task not ready")]
35+
pub peek_not_ready: u64,
36+
#[stat(desc = "Peek determined DSQ needs processing")]
37+
pub peek_needs_processing: u64,
3238
}
3339

3440
impl Metrics {
@@ -44,6 +50,11 @@ impl Metrics {
4450
self.kprobe_random_delays,
4551
self.timer_kicks,
4652
)?;
53+
writeln!(
54+
w,
55+
"peek: empty/not_ready/needs_proc {}/{}/{}",
56+
self.peek_empty_dsq, self.peek_not_ready, self.peek_needs_processing,
57+
)?;
4758
Ok(())
4859
}
4960

@@ -56,6 +67,9 @@ impl Metrics {
5667
chaos_skipped: self.chaos_skipped - rhs.chaos_skipped,
5768
kprobe_random_delays: self.kprobe_random_delays - rhs.kprobe_random_delays,
5869
timer_kicks: self.timer_kicks - rhs.timer_kicks,
70+
peek_empty_dsq: self.peek_empty_dsq - rhs.peek_empty_dsq,
71+
peek_not_ready: self.peek_not_ready - rhs.peek_not_ready,
72+
peek_needs_processing: self.peek_needs_processing - rhs.peek_needs_processing,
5973
}
6074
}
6175
}

0 commit comments

Comments
 (0)