Skip to content

Commit a16a427

Browse files
committed
CA-424055: NUMA: avoid using up the entire memory on node0
Versions of Xen without per-node claim support won't allow using up all the memory from node0 (it'll move some of its memory to other nodes). On versions of Xen with per-node claim support it'll respect the claim, but it is undesirable to completely run out of memory on node0, since some devices may require the low 4GiB of RAM for DMA. Xen would reserve 2^32 on node0, and only use it as last resort. However we don't know how much of that has been used up already. For now reserve just 2^31 by default, and make this configurable in xenopsd.conf. Signed-off-by: Edwin Török <edwin.torok@citrix.com>
1 parent 6acd011 commit a16a427

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

ocaml/xenopsd/lib/xenopsd.ml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ let numa_placement_compat = ref true
7272

7373
let numa_best_effort_prio_mem_only = ref false
7474

75+
let numa_reserve_node0_dmaheap_bits = ref 31
76+
7577
(* O(N^2) operations, until we get a xenstore cache, so use a small number here *)
7678
let vm_guest_agent_xenstore_quota = ref 128
7779

@@ -282,6 +284,12 @@ let options =
282284
filtered NUMA nodes based on available memory. Only use if there are \
283285
issues with the new best effort policy"
284286
)
287+
; ( "numa-reserve-node0-dmaheap-bits"
288+
, Arg.Int (fun x -> numa_reserve_node0_dmaheap_bits := x)
289+
, (fun () -> string_of_int !numa_reserve_node0_dmaheap_bits)
290+
, "Reserve 2^N bytes on node0 in the NUMA planner, similar to how Xen would\n\
291+
\ protect the low 4GiB of the memory for the DMA heap"
292+
)
285293
; ( "pci-quarantine"
286294
, Arg.Bool (fun b -> pci_quarantine := b)
287295
, (fun () -> string_of_bool !pci_quarantine)

ocaml/xenopsd/xc/domain.ml

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,20 @@ let numa_mutex = Mutex.create ()
10991099
(* protected by numa_mutex *)
11001100
let numa_resources = ref NUMAResources.empty
11011101

1102+
let numa_reserve_node0 mem =
1103+
let reserved = Int64.shift_left 1L !Xenopsd.numa_reserve_node0_dmaheap_bits in
1104+
let mem' = Int64.(max 0L (sub mem reserved)) in
1105+
(* Prefer nodes other than node0 when node0 is low on memory.
1106+
This is reserved for the DMA heap in Xen, and even if we try to make a VM
1107+
use it up, Xen will move some of its memory to other nodes to avoid
1108+
running out.
1109+
Without the claim API we have no way of forcing or querying this
1110+
(and even with the claim API it'd be desirable to avoid running out on
1111+
node0).
1112+
*)
1113+
D.debug "numa_reserve_node0, reducing free memory: %Lu -> %Lu" mem mem' ;
1114+
mem'
1115+
11021116
let numa_init () =
11031117
let xcext = Xenctrlext.get_handle () in
11041118
let host = Lazy.force numa_hierarchy in
@@ -1129,7 +1143,19 @@ let numa_placement domid ~vcpus ~cores ~memory ~required_free affinity =
11291143
let numa_meminfo = (numainfo xcext).memory |> Array.to_seq in
11301144
let nodes =
11311145
Seq.map2
1132-
(fun node m -> (node, NUMA.resource host node ~memory:m.memfree))
1146+
(fun node m ->
1147+
( node
1148+
, let (NUMA.Node nodeid) = node in
1149+
let memory = m.memfree in
1150+
let memory =
1151+
if nodeid = 0 then
1152+
numa_reserve_node0 memory
1153+
else
1154+
memory
1155+
in
1156+
NUMA.resource host node ~memory
1157+
)
1158+
)
11331159
(NUMA.nodes host) numa_meminfo
11341160
in
11351161
let vm = NUMARequest.make ~memory ~vcpus ~cores in

0 commit comments

Comments
 (0)