Skip to content

Commit 2253df3

Browse files
committed
mitosis: dynamic cpus for affinitized tasks
Tasks that are affinitized (e.g. to a subset of their cell) are presently simply bound to a single cpu out of their affinity mask. This mostly works since most affinitized tasks are just affinitized to a single cpu. We've observed instances where tasks are affinitized to multiple cpus and the initialization-time binding can lead to significant cpu imbalance. This commit adds a new flag --dynamic-affinity-cpu-selection which will dynamically re-assign such tasks (only affinitized tasks) to random cpus within their affinity mask in order to reduce load imbalance. This doesn't completely eliminate cpu load imbalance for these kinds of tasks but avoids load-balancing or other complexities and ultimately prevents long-term imbalance due to making random decisions at each scheduling point. Additionally I've added a test script to reproduce this behavior (may take a few runs due to the random assignment). Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
1 parent 3c8138b commit 2253df3

File tree

3 files changed

+426
-2
lines changed

3 files changed

+426
-2
lines changed

scheds/rust/scx_mitosis/src/bpf/mitosis.bpf.c

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ const volatile bool debug_events_enabled = false;
3636
const volatile bool exiting_task_workaround_enabled = true;
3737
const volatile bool split_vtime_updates = false;
3838
const volatile bool cpu_controller_disabled = false;
39+
const volatile bool dynamic_affinity_cpu_selection = false;
3940

4041
/*
4142
* CPU assignment changes aren't fully in effect until a subsequent tick()
@@ -616,7 +617,50 @@ s32 BPF_STRUCT_OPS(mitosis_select_cpu, struct task_struct *p, s32 prev_cpu,
616617

617618
if (!tctx->all_cell_cpus_allowed) {
618619
cstat_inc(CSTAT_AFFN_VIOL, tctx->cell, cctx);
619-
cpu = dsq_to_cpu(tctx->dsq);
620+
621+
if (dynamic_affinity_cpu_selection) {
622+
const struct cpumask *idle_smtmask;
623+
624+
/*
625+
* Dynamic affinity balancing: use pick_idle_cpu_from
626+
* for SMT-aware idle CPU selection. If no idle CPU,
627+
* randomly pick to balance load over time.
628+
*/
629+
idle_smtmask = scx_bpf_get_idle_smtmask();
630+
if (!idle_smtmask) {
631+
cpu = prev_cpu;
632+
goto affn_done;
633+
}
634+
635+
cpu = pick_idle_cpu_from(p, p->cpus_ptr, prev_cpu,
636+
idle_smtmask);
637+
scx_bpf_put_idle_cpumask(idle_smtmask);
638+
639+
if (cpu < 0)
640+
cpu = bpf_cpumask_any_distribute(p->cpus_ptr);
641+
642+
/*
643+
* If switching to a different CPU's DSQ, update the
644+
* DSQ assignment and reset vtime to the new CPU's
645+
* vtime_now to ensure fair scheduling in the new DSQ.
646+
*/
647+
if (cpu_dsq(cpu) != tctx->dsq) {
648+
struct cpu_ctx *new_cctx = lookup_cpu_ctx(cpu);
649+
if (new_cctx) {
650+
tctx->dsq = cpu_dsq(cpu);
651+
p->scx.dsq_vtime =
652+
READ_ONCE(new_cctx->vtime_now);
653+
} else {
654+
/* Invalid CPU, fall back to prev_cpu */
655+
cpu = prev_cpu;
656+
}
657+
}
658+
} else {
659+
cpu = dsq_to_cpu(tctx->dsq);
660+
}
661+
662+
affn_done:
663+
620664
if (scx_bpf_test_and_clear_cpu_idle(cpu))
621665
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, 0);
622666
return cpu;
@@ -665,7 +709,36 @@ void BPF_STRUCT_OPS(mitosis_enqueue, struct task_struct *p, u64 enq_flags)
665709
vtime = p->scx.dsq_vtime;
666710

667711
if (!tctx->all_cell_cpus_allowed) {
668-
cpu = dsq_to_cpu(tctx->dsq);
712+
if (dynamic_affinity_cpu_selection) {
713+
/*
714+
* Dynamic affinity balancing: if current assigned CPU
715+
* has tasks queued, randomly pick from allowed CPUs to
716+
* balance load across compatible CPUs over time.
717+
*
718+
* This runs even if select_cpu() was called, because
719+
* select_cpu() may have picked a random CPU when no
720+
* idle CPUs were available, without checking queue depth.
721+
*/
722+
cpu = dsq_to_cpu(tctx->dsq);
723+
if (scx_bpf_dsq_nr_queued(tctx->dsq) > 0) {
724+
s32 new_cpu;
725+
726+
new_cpu = bpf_cpumask_any_distribute(p->cpus_ptr);
727+
if (new_cpu < nr_possible_cpus &&
728+
cpu_dsq(new_cpu) != tctx->dsq) {
729+
struct cpu_ctx *new_cctx;
730+
731+
new_cctx = lookup_cpu_ctx(new_cpu);
732+
if (new_cctx) {
733+
tctx->dsq = cpu_dsq(new_cpu);
734+
vtime = READ_ONCE(new_cctx->vtime_now);
735+
cpu = new_cpu;
736+
}
737+
}
738+
}
739+
} else {
740+
cpu = dsq_to_cpu(tctx->dsq);
741+
}
669742
} else if (!__COMPAT_is_enq_cpu_selected(enq_flags)) {
670743
/*
671744
* If we haven't selected a cpu, then we haven't looked for and kicked an

scheds/rust/scx_mitosis/src/main.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ struct Opts {
118118
#[clap(long, action = clap::ArgAction::SetTrue)]
119119
cpu_controller_disabled: bool,
120120

121+
/// Enable dynamic CPU selection for affinitized tasks.
122+
/// When enabled, affinitized tasks are randomly distributed across compatible
123+
/// CPUs on each wake rather than being statically assigned to a single CPU.
124+
/// This helps balance load when many affinitized tasks share overlapping CPU masks.
125+
#[clap(long, action = clap::ArgAction::SetTrue)]
126+
dynamic_affinity_cpu_selection: bool,
127+
121128
#[clap(flatten, next_help_heading = "Libbpf Options")]
122129
pub libbpf: LibbpfOpts,
123130
}
@@ -222,6 +229,11 @@ impl<'a> Scheduler<'a> {
222229
.as_mut()
223230
.unwrap()
224231
.cpu_controller_disabled = opts.cpu_controller_disabled;
232+
skel.maps
233+
.rodata_data
234+
.as_mut()
235+
.unwrap()
236+
.dynamic_affinity_cpu_selection = opts.dynamic_affinity_cpu_selection;
225237

226238
skel.maps.rodata_data.as_mut().unwrap().nr_possible_cpus = *NR_CPUS_POSSIBLE as u32;
227239
for cpu in topology.all_cpus.keys() {

0 commit comments

Comments
 (0)