Skip to content

Commit 667b6e4

Browse files
committed
scx_p2dq: Add DHQ support and fix migration-disabled task errors
Integrate Double Helix Queue (DHQ) as an alternative to ATQ for LLC-aware task migration, and fix critical race condition causing migration-disabled task errors. DHQ Integration: - Add --dhq-enabled flag to enable DHQ mode for LLC migration - Add --dhq-max-imbalance parameter (default: 3) to control strand balance - Create one DHQ per pair of LLCs in same NUMA node - Map each LLC to a specific strand (A or B) for cache affinity - Each CPU inherits strand from its LLC for proper load distribution - DHQ provides cache-aware migration with controlled cross-LLC movement Strand-Specific DHQ Operations: Use scx_dhq_peek_strand() and scx_dhq_pop_strand() instead of generic operations to ensure CPUs only consume from their designated strand. This preserves cache locality and prevents load imbalance. Data Structure Changes: - Add mig_dhq and dhq_strand to cpu_ctx and llc_ctx - Add llc_pair_dhqs[] for shared DHQs between LLC pairs - Add llcs_per_node[] to track LLCs per NUMA node - Add P2DQ_ENQUEUE_PROMISE_DHQ_VTIME enqueue promise type - Add enqueue_promise_dhq struct for DHQ-specific metadata Configuration: - p2dq_config.dhq_enabled: Enable DHQ mode - p2dq_config.dhq_max_imbalance: Control strand pairing (0 = unlimited) - Priority mode: lowest vtime wins across strands Build System: - Add lib/dhq.bpf.c to scx_p2dq and scx_chaos builds - Include lib/dhq.h in types.h scx_chaos Compatibility: - Update enqueue promise handling to recognize DHQ type - Error message updated to mention both ATQs and DHQs not supported Benefits: - Cache affinity: Tasks stay on origin LLC (strand) - Controlled migration: max_imbalance prevents migration storms - Race-free: Atomic affinity handling eliminates migration-disabled errors - Work conservation: Cross-strand stealing when priority demands - Scalable: Lock contention distributed across DHQ strands Signed-off-by: Daniel Hodges <[email protected]>
1 parent baf286c commit 667b6e4

File tree

8 files changed

+513
-57
lines changed

8 files changed

+513
-57
lines changed

lib/dhq.bpf.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include "scxtest/scx_test.h"
21
#include <scx/common.bpf.h>
32
#include <lib/sdt_task.h>
43

scheds/include/lib/dhq.h

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#pragma once
2+
3+
#ifdef __BPF__
4+
#include <scx/common.bpf.h>
5+
#include <scx/bpf_arena_common.bpf.h>
6+
#include <scx/bpf_arena_spin_lock.h>
7+
#endif /* __BPF__ */
8+
9+
#include <lib/minheap.h>
10+
11+
#define SCX_DHQ_INF_CAPACITY ((u64)-1)
12+
13+
/* Dequeue modes */
14+
#define SCX_DHQ_MODE_ALTERNATING 0
15+
#define SCX_DHQ_MODE_PRIORITY 1
16+
#define SCX_DHQ_MODE_BALANCED 2
17+
18+
/* Strand identifiers */
19+
#define SCX_DHQ_STRAND_A 0
20+
#define SCX_DHQ_STRAND_B 1
21+
#define SCX_DHQ_STRAND_AUTO 2 /* Auto-select based on balancing */
22+
23+
/**
24+
* scx_dhq - Double Helix Queue
25+
*
26+
* A queue structure inspired by DNA's double helix with two intertwined
27+
* strands. Tasks can be enqueued to either strand and dequeued according
28+
* to different strategies (alternating, priority-based, or balanced).
29+
*
30+
* For cross-LLC task migration, strands typically represent the two LLCs
31+
* sharing the queue. The max_imbalance constraint applies only to enqueue
32+
* (based on size) to prevent one LLC from flooding the DHQ. On dequeue,
33+
* asymmetric consumption is allowed so idle LLCs can freely steal work
34+
* from busy LLCs without being blocked by imbalance constraints.
35+
*
36+
* @strand_a: Min heap for strand A
37+
* @strand_b: Min heap for strand B
38+
* @lock: Arena spinlock for thread-safety
39+
* @capacity: Total capacity across both strands
40+
* @size_a: Number of tasks in strand A
41+
* @size_b: Number of tasks in strand B
42+
* @seq_a: Sequence number for strand A (FIFO mode)
43+
* @seq_b: Sequence number for strand B (FIFO mode)
44+
* @dequeue_count_a: Number of dequeues from strand A (tracking only)
45+
* @dequeue_count_b: Number of dequeues from strand B (tracking only)
46+
* @max_imbalance: Maximum size difference on enqueue (0 = no limit)
47+
* @fifo: FIFO mode flag (1 = FIFO, 0 = priority/vtime)
48+
* @last_strand: Last strand dequeued from (for alternating mode)
49+
* @mode: Dequeue mode (ALTERNATING, PRIORITY, or BALANCED)
50+
*/
51+
struct scx_dhq {
52+
scx_minheap_t *strand_a;
53+
scx_minheap_t *strand_b;
54+
arena_spinlock_t lock;
55+
u64 capacity;
56+
u64 size_a;
57+
u64 size_b;
58+
u64 seq_a;
59+
u64 seq_b;
60+
u64 dequeue_count_a;
61+
u64 dequeue_count_b;
62+
u64 max_imbalance;
63+
u8 fifo;
64+
u8 last_strand;
65+
u8 mode;
66+
};
67+
68+
typedef struct scx_dhq __arena scx_dhq_t;
69+
70+
#ifdef __BPF__
71+
/**
72+
* scx_dhq_create_internal - Create a double helix queue
73+
* @fifo: true for FIFO mode, false for vtime/priority mode
74+
* @capacity: Total capacity (SCX_DHQ_INF_CAPACITY for unlimited)
75+
* @mode: Dequeue mode (ALTERNATING, PRIORITY, or BALANCED)
76+
* @max_imbalance: Maximum size difference allowed on enqueue (0 for unlimited)
77+
* NOTE: Only applies to enqueue, not dequeue. This prevents
78+
* one strand from flooding the queue while allowing asymmetric
79+
* consumption on dequeue for efficient cross-LLC work stealing.
80+
*
81+
* Returns: Pointer to scx_dhq_t or NULL on failure
82+
*/
83+
u64 scx_dhq_create_internal(bool fifo, size_t capacity, u64 mode, u64 max_imbalance);
84+
85+
#define scx_dhq_create(fifo, mode) \
86+
scx_dhq_create_internal((fifo), SCX_DHQ_INF_CAPACITY, (mode), 0)
87+
88+
#define scx_dhq_create_size(fifo, capacity, mode) \
89+
scx_dhq_create_internal((fifo), (capacity), (mode), 0)
90+
91+
#define scx_dhq_create_balanced(fifo, capacity, mode, max_imbalance) \
92+
scx_dhq_create_internal((fifo), (capacity), (mode), (max_imbalance))
93+
94+
/**
95+
* scx_dhq_insert - Insert task into DHQ in FIFO mode
96+
* @dhq_ptr: Pointer to double helix queue
97+
* @taskc_ptr: Pointer to task context
98+
* @strand: Target strand (STRAND_A, STRAND_B, or STRAND_AUTO)
99+
*
100+
* Returns: 0 on success, negative error code on failure
101+
*/
102+
int scx_dhq_insert(scx_dhq_t *dhq_ptr, u64 taskc_ptr, u64 strand);
103+
104+
/**
105+
* scx_dhq_insert_vtime - Insert task into DHQ with vtime/priority
106+
* @dhq: Pointer to double helix queue
107+
* @taskc_ptr: Pointer to task context
108+
* @vtime: Virtual time / priority value
109+
* @strand: Target strand (STRAND_A, STRAND_B, or STRAND_AUTO)
110+
*
111+
* Returns: 0 on success, negative error code on failure
112+
*/
113+
int scx_dhq_insert_vtime(scx_dhq_t *dhq, u64 taskc_ptr, u64 vtime, u64 strand);
114+
115+
/**
116+
* scx_dhq_nr_queued - Get total number of queued tasks
117+
* @dhq: Pointer to double helix queue
118+
*
119+
* Returns: Total number of tasks in both strands
120+
*/
121+
int scx_dhq_nr_queued(scx_dhq_t *dhq);
122+
123+
/**
124+
* scx_dhq_nr_queued_strand - Get number of queued tasks in specific strand
125+
* @dhq: Pointer to double helix queue
126+
* @strand: Target strand (STRAND_A or STRAND_B)
127+
*
128+
* Returns: Number of tasks in specified strand
129+
*/
130+
int scx_dhq_nr_queued_strand(scx_dhq_t *dhq, u64 strand);
131+
132+
/**
133+
* scx_dhq_pop - Dequeue task according to queue mode
134+
* @dhq: Pointer to double helix queue
135+
*
136+
* Returns: Task context pointer or NULL if empty
137+
*/
138+
u64 scx_dhq_pop(scx_dhq_t *dhq);
139+
140+
/**
141+
* scx_dhq_pop_strand - Dequeue task from specific strand
142+
* @dhq: Pointer to double helix queue
143+
* @strand: Target strand (STRAND_A or STRAND_B)
144+
*
145+
* Returns: Task context pointer or NULL if strand is empty
146+
*/
147+
u64 scx_dhq_pop_strand(scx_dhq_t *dhq, u64 strand);
148+
149+
/**
150+
* scx_dhq_peek - Peek at next task without removing it
151+
* @dhq: Pointer to double helix queue
152+
*
153+
* Returns: Task context pointer or NULL if empty
154+
*/
155+
u64 scx_dhq_peek(scx_dhq_t *dhq);
156+
157+
/**
158+
* scx_dhq_peek_strand - Peek at next task in specific strand
159+
* @dhq: Pointer to double helix queue
160+
* @strand: Target strand (STRAND_A or STRAND_B)
161+
*
162+
* Returns: Task context pointer or NULL if strand is empty
163+
*/
164+
u64 scx_dhq_peek_strand(scx_dhq_t *dhq, u64 strand);
165+
#endif /* __BPF__ */

scheds/rust/scx_chaos/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ fn main() {
1010
.add_source("src/bpf/lib/arena.bpf.c")
1111
.add_source("src/bpf/lib/atq.bpf.c")
1212
.add_source("src/bpf/lib/bitmap.bpf.c")
13+
.add_source("src/bpf/lib/dhq.bpf.c")
1314
.add_source("src/bpf/lib/minheap.bpf.c")
1415
.add_source("src/bpf/lib/rbtree.bpf.c")
1516
.add_source("src/bpf/lib/sdt_alloc.bpf.c")

scheds/rust/scx_chaos/src/bpf/main.bpf.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,8 @@ complete_p2dq_enqueue_move(struct enqueue_promise *pro,
505505
break;
506506
case P2DQ_ENQUEUE_PROMISE_ATQ_FIFO:
507507
case P2DQ_ENQUEUE_PROMISE_ATQ_VTIME:
508-
scx_bpf_error("chaos: ATQs not supported");
508+
case P2DQ_ENQUEUE_PROMISE_DHQ_VTIME:
509+
scx_bpf_error("chaos: ATQs/DHQs not supported");
509510
break;
510511
case P2DQ_ENQUEUE_PROMISE_FAILED:
511512
scx_bpf_error("chaos: delayed async_p2dq_enqueue failed");

scheds/rust/scx_p2dq/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ fn main() {
1010
.enable_skel("src/bpf/main.bpf.c", "bpf")
1111
.add_source("src/bpf/lib/arena.bpf.c")
1212
.add_source("src/bpf/lib/atq.bpf.c")
13+
.add_source("src/bpf/lib/dhq.bpf.c")
1314
.add_source("src/bpf/lib/bitmap.bpf.c")
1415
.add_source("src/bpf/lib/cpumask.bpf.c")
1516
.add_source("src/bpf/lib/minheap.bpf.c")

0 commit comments

Comments
 (0)