diff --git a/build_sdk.py b/build_sdk.py index 1956f135e..94a5bc4ba 100644 --- a/build_sdk.py +++ b/build_sdk.py @@ -31,7 +31,7 @@ KERNEL_CONFIG_TYPE = Union[bool, str] KERNEL_OPTIONS = Dict[str, KERNEL_CONFIG_TYPE] -X86_64_TOOLCHAIN = "" +X86_64_TOOLCHAIN = "x86_64-linux-gnu-" AARCH64_TOOLCHAIN = "aarch64-none-elf-" # We can use the same toolchain for both 32-bit and 64-bit RISC-V builds. RISCV_TOOLCHAIN = "riscv64-unknown-elf-" @@ -456,18 +456,39 @@ class ConfigInfo: "hello": Path("example/polarfire/hello") } ), - # BoardInfo( - # name="x86_64", - # arch=BoardArch.X86_64, - # gcc_flags = "", - # loader_link_address=0x80200000, - # kernel_options = { - # "KernelIsMCS": True, - # "KernelPlatform": "pc99", - # "KernelSel4Arch": "x86_64", - # }, - # examples = {} - # ), + BoardInfo( + name="x86_64_virt", + arch=BoardArch.X86_64, + gcc_flags = "GCC_MARCH=nehalem", + loader_link_address=0x10000000, # 256MB + kernel_options = { + "KernelIsMCS": True, + "KernelPlatform": "pc99", + "KernelSel4Arch": "x86_64", + "KernelVTX": True, + "KernelX86MicroArch": "nehalem", + }, + examples = { + "hello": Path("example/x86_64_virt/hello") + } + ), + BoardInfo( + name="x86_64_supermicro", + arch=BoardArch.X86_64, + gcc_flags = "GCC_MARCH=skylake", + loader_link_address=0x10000000, # 256MB + kernel_options = { + "KernelIsMCS": True, + "KernelPlatform": "pc99", + "KernelSel4Arch": "x86_64", + "KernelVTX": True, + "KernelLAPICMode": "X2APIC", + "KernelX86MicroArch": "skylake", + }, + examples = { + "hello": Path("example/x86_64_supermicro/hello") + } + ), ) SUPPORTED_CONFIGS = ( @@ -615,7 +636,7 @@ def build_sel4( elif board.arch == BoardArch.AARCH64: toolchain_config = f"-DCROSS_COMPILER_PREFIX={AARCH64_TOOLCHAIN}" elif board.arch == BoardArch.X86_64: - if host_arch != "x86_64": + if host_platform.machine() != "x86_64": assert False, "@ivanv: Figure out cross-compiling to x86-64" else: toolchain_config = "" @@ -734,6 +755,8 @@ def build_lib_component( arch_args = f"ARCH=riscv64 TOOLCHAIN={RISCV_TOOLCHAIN}" elif board.arch == BoardArch.RISCV32: arch_args = f"ARCH=riscv32 TOOLCHAIN={RISCV_TOOLCHAIN}" + elif board.arch == BoardArch.X86_64: + arch_args = f"ARCH=x86_64 TOOLCHAIN={X86_64_TOOLCHAIN}" else: raise Exception(f"Unexpected arch given: {board.arch}", board.arch) diff --git a/example/x86_64_supermicro/hello/Makefile b/example/x86_64_supermicro/hello/Makefile new file mode 100644 index 000000000..46dbb6d1c --- /dev/null +++ b/example/x86_64_supermicro/hello/Makefile @@ -0,0 +1,55 @@ +# +# Copyright 2021, Breakaway Consulting Pty. Ltd. +# +# SPDX-License-Identifier: BSD-2-Clause +# +ifeq ($(strip $(BUILD_DIR)),) +$(error BUILD_DIR must be specified) +endif + +ifeq ($(strip $(MICROKIT_SDK)),) +$(error MICROKIT_SDK must be specified) +endif + +ifeq ($(strip $(MICROKIT_BOARD)),) +$(error MICROKIT_BOARD must be specified) +endif + +ifeq ($(strip $(MICROKIT_CONFIG)),) +$(error MICROKIT_CONFIG must be specified) +endif + +TOOLCHAIN := x86_64-linux-gnu + +ARCH := skylake + +CC := $(TOOLCHAIN)-gcc +LD := $(TOOLCHAIN)-ld +AS := $(TOOLCHAIN)-as +MICROKIT_TOOL ?= $(MICROKIT_SDK)/bin/microkit + +HELLO_OBJS := hello.o + +BOARD_DIR := $(MICROKIT_SDK)/board/$(MICROKIT_BOARD)/$(MICROKIT_CONFIG) + +IMAGES := hello.elf +CFLAGS := -march=$(ARCH) -DARCH_x86_64 -nostdlib -ffreestanding -g3 -O3 -Wall -Wno-unused-function -Werror -I$(BOARD_DIR)/include +LDFLAGS := -L$(BOARD_DIR)/lib +LIBS := -lmicrokit -Tmicrokit.ld + +IMAGE_FILE = $(BUILD_DIR)/loader.img +REPORT_FILE = $(BUILD_DIR)/report.txt + +all: $(IMAGE_FILE) + +$(BUILD_DIR)/%.o: %.c Makefile + $(CC) -c $(CFLAGS) $< -o $@ + +$(BUILD_DIR)/%.o: %.s Makefile + $(AS) -g3 -march=$(ARCH) $< -o $@ + +$(BUILD_DIR)/hello.elf: $(addprefix $(BUILD_DIR)/, $(HELLO_OBJS)) + $(LD) $(LDFLAGS) $^ $(LIBS) -o $@ + +$(IMAGE_FILE) $(REPORT_FILE): $(addprefix $(BUILD_DIR)/, $(IMAGES)) hello.system + $(MICROKIT_TOOL) hello.system --search-path $(BUILD_DIR) --board $(MICROKIT_BOARD) --config $(MICROKIT_CONFIG) --x86-machine machine.json -o $(IMAGE_FILE) -r $(REPORT_FILE) diff --git a/example/x86_64_supermicro/hello/hello.c b/example/x86_64_supermicro/hello/hello.c new file mode 100644 index 000000000..9519a6cb1 --- /dev/null +++ b/example/x86_64_supermicro/hello/hello.c @@ -0,0 +1,18 @@ +/* + * Copyright 2021, Breakaway Consulting Pty. Ltd. + * + * SPDX-License-Identifier: BSD-2-Clause + */ +#include +#include + +void +init(void) +{ + microkit_dbg_puts("hello, world\n"); +} + +void +notified(microkit_channel ch) +{ +} \ No newline at end of file diff --git a/example/x86_64_supermicro/hello/hello.system b/example/x86_64_supermicro/hello/hello.system new file mode 100644 index 000000000..afd6f12b4 --- /dev/null +++ b/example/x86_64_supermicro/hello/hello.system @@ -0,0 +1,11 @@ + + + + + + + diff --git a/example/x86_64_supermicro/hello/machine.json b/example/x86_64_supermicro/hello/machine.json new file mode 100644 index 000000000..15457c409 --- /dev/null +++ b/example/x86_64_supermicro/hello/machine.json @@ -0,0 +1,62 @@ +{ + "memory": [ + { + "base": 1048576, + "size": 1768484864 + }, + { + "base": 1812037632, + "size": 1839104 + }, + { + "base": 1859203072, + "size": 19845120 + }, + { + "base": 4294967296, + "size": 32212254720 + } + ], + "kdevs": [ + { + "name": "apic", + "base": 4276092928, + "size": 4096 + }, + { + "name": "ioapic.0", + "base": 4273995776, + "size": 4096 + }, + { + "name": "drhu.0", + "base": 3321872384, + "size": 4096 + }, + { + "name": "drhu.1", + "base": 3774857216, + "size": 4096 + }, + { + "name": "drhu.2", + "base": 4227842048, + "size": 4096 + }, + { + "name": "drhu.3", + "base": 2868887552, + "size": 4096 + } + ], + "rmrrs": [ + { + "device": 160, + "base": 1853911040, + "limit": 1853980671 + } + ], + "bootinfo": { + "numIOPTLevels": 4 + } +} diff --git a/example/x86_64_virt/hello/Makefile b/example/x86_64_virt/hello/Makefile new file mode 100644 index 000000000..42cdcc289 --- /dev/null +++ b/example/x86_64_virt/hello/Makefile @@ -0,0 +1,55 @@ +# +# Copyright 2021, Breakaway Consulting Pty. Ltd. +# +# SPDX-License-Identifier: BSD-2-Clause +# +ifeq ($(strip $(BUILD_DIR)),) +$(error BUILD_DIR must be specified) +endif + +ifeq ($(strip $(MICROKIT_SDK)),) +$(error MICROKIT_SDK must be specified) +endif + +ifeq ($(strip $(MICROKIT_BOARD)),) +$(error MICROKIT_BOARD must be specified) +endif + +ifeq ($(strip $(MICROKIT_CONFIG)),) +$(error MICROKIT_CONFIG must be specified) +endif + +TOOLCHAIN := x86_64-linux-gnu + +ARCH := nehalem + +CC := $(TOOLCHAIN)-gcc +LD := $(TOOLCHAIN)-ld +AS := $(TOOLCHAIN)-as +MICROKIT_TOOL ?= $(MICROKIT_SDK)/bin/microkit + +HELLO_OBJS := hello.o + +BOARD_DIR := $(MICROKIT_SDK)/board/$(MICROKIT_BOARD)/$(MICROKIT_CONFIG) + +IMAGES := hello.elf +CFLAGS := -march=$(ARCH) -DARCH_x86_64 -nostdlib -ffreestanding -g3 -O3 -Wall -Wno-unused-function -Werror -I$(BOARD_DIR)/include +LDFLAGS := -L$(BOARD_DIR)/lib +LIBS := -lmicrokit -Tmicrokit.ld + +IMAGE_FILE = $(BUILD_DIR)/loader.img +REPORT_FILE = $(BUILD_DIR)/report.txt + +all: $(IMAGE_FILE) + +$(BUILD_DIR)/%.o: %.c Makefile + $(CC) -c $(CFLAGS) $< -o $@ + +$(BUILD_DIR)/%.o: %.s Makefile + $(AS) -g3 -march=$(ARCH) $< -o $@ + +$(BUILD_DIR)/hello.elf: $(addprefix $(BUILD_DIR)/, $(HELLO_OBJS)) + $(LD) $(LDFLAGS) $^ $(LIBS) -o $@ + +$(IMAGE_FILE) $(REPORT_FILE): $(addprefix $(BUILD_DIR)/, $(IMAGES)) hello.system + $(MICROKIT_TOOL) hello.system --search-path $(BUILD_DIR) --board $(MICROKIT_BOARD) --config $(MICROKIT_CONFIG) --x86-machine machine.json -o $(IMAGE_FILE) -r $(REPORT_FILE) diff --git a/example/x86_64_virt/hello/hello.c b/example/x86_64_virt/hello/hello.c new file mode 100644 index 000000000..9519a6cb1 --- /dev/null +++ b/example/x86_64_virt/hello/hello.c @@ -0,0 +1,18 @@ +/* + * Copyright 2021, Breakaway Consulting Pty. Ltd. + * + * SPDX-License-Identifier: BSD-2-Clause + */ +#include +#include + +void +init(void) +{ + microkit_dbg_puts("hello, world\n"); +} + +void +notified(microkit_channel ch) +{ +} \ No newline at end of file diff --git a/example/x86_64_virt/hello/hello.system b/example/x86_64_virt/hello/hello.system new file mode 100644 index 000000000..afd6f12b4 --- /dev/null +++ b/example/x86_64_virt/hello/hello.system @@ -0,0 +1,11 @@ + + + + + + + diff --git a/example/x86_64_virt/hello/machine.json b/example/x86_64_virt/hello/machine.json new file mode 100644 index 000000000..6204e7b9c --- /dev/null +++ b/example/x86_64_virt/hello/machine.json @@ -0,0 +1,33 @@ +{ + "memory": [ + { + "base": 1048576, + "size": 2146299904 + }, + { + "base": 4294967296, + "size": 2147483648 + } + ], + "kdevs": [ + { + "name": "apic", + "base": 4276092928, + "size": 4096 + }, + { + "name": "ioapic.0", + "base": 4273995776, + "size": 4096 + }, + { + "name": "drhu.0", + "base": 4275634176, + "size": 4096 + } + ], + "rmrrs": [], + "bootinfo": { + "numIOPTLevels": 3 + } +} diff --git a/example/x86_64_virt/sim b/example/x86_64_virt/sim new file mode 100755 index 000000000..e05334f87 --- /dev/null +++ b/example/x86_64_virt/sim @@ -0,0 +1,60 @@ +#!/bin/sh +# +# This is a wrapper around QEMU that can be used to test the images built for +# the x86_64_virt microkit board. Pass the "loader.img" file as first argument. +# +set -eu + +# QEMU machine setup. +MACHINE=q35,accel=kvm,kernel-irqchip=split +CPU=Nehalem,+fsgsbase,+pdpe1gb,+pcid,+invpcid,+xsave,+xsaves,+xsaveopt,+vmx,+vme +RAM=4G + +# Parse the command line. +if [ $# -lt 1 ]; then + echo "Usage: $(basename "$0") [qemu args]" >&2 + exit 1 +fi +image="$1" +shift + +# Create a temporary directory. +TMPDIR="$(mktemp -d)" +trap "rm -rf -- '$TMPDIR'" EXIT + +# A subdirectory for the ISO files. +ISODIR="$TMPDIR"/iso +mkdir -p "$ISODIR" + +# Copy the microkit image. +cp "$image" "$ISODIR"/image + +# Create the grub configuration file. +mkdir -p "$ISODIR"/boot/grub +cat > "$ISODIR"/boot/grub/grub.cfg <<-EOF +set default=0 +set timeout=0 + +serial --unit=0 --speed=115200 +terminal_input serial +terminal_output serial + +menuentry 'microkit' { + multiboot2 /image +} +EOF + +# Build a bootable grub ISO image. +grub-mkrescue -o "$TMPDIR"/grub.iso "$ISODIR" + +# Run qemu. +set -x +exec qemu-system-x86_64 \ + -machine "$MACHINE" \ + -cpu "$CPU" \ + -m "$RAM" \ + -display none \ + -serial mon:stdio \ + -device intel-iommu \ + -cdrom "$TMPDIR"/grub.iso \ + "$@" diff --git a/libmicrokit/Makefile b/libmicrokit/Makefile index 62cf5e5da..a6b295a42 100644 --- a/libmicrokit/Makefile +++ b/libmicrokit/Makefile @@ -7,17 +7,21 @@ ifeq ($(strip $(BUILD_DIR)),) $(error BUILD_DIR must be specified) endif -# ifeq ($(strip $(GCC_CPU)),) -# $(error GCC_CPU must be specified) -# endif - ifeq ($(strip $(ARCH)),) $(error ARCH must be specified) endif -# ifeq ($(strip $(TOOLCHAIN)),) -# $(error TOOLCHAIN must be specified) -# endif +ifeq ($(ARCH),aarch64) +ifeq ($(strip $(GCC_CPU)),) +$(error GCC_CPU must be specified for ARCH=$(ARCH)) +endif +endif + +ifeq ($(ARCH),x86_64) +ifeq ($(strip $(GCC_MARCH)),) +$(error GCC_MARCH must be specified for ARCH=$(ARCH)) +endif +endif ifeq ($(ARCH),aarch64) C_FLAGS_ARCH := -mcpu=$(GCC_CPU) -mgeneral-regs-only @@ -38,21 +42,21 @@ else ifeq ($(ARCH),riscv32) ASM_CPP_FLAGS_ARCH := -march=$(MARCH) -mabi=$(MABI) ASM_FLAGS_ARCH := -g -march=$(MARCH) -mabi=$(MABI) ARCH_DIR := riscv +else ifeq ($(ARCH),x86_64) + C_FLAGS_ARCH := -march=$(GCC_MARCH) -DARCH_x86_64 + ASM_CPP_FLAGS_ARCH := -march=generic64 + ASM_FLAGS_ARCH := -march=generic64 + ARCH_DIR := x86_64 else - $(error ARCH must be aarch64 or riscv64 or riscv32) + $(error ARCH must be aarch64 or riscv64 or riscv32 or x86_64) endif C_FLAGS := -std=gnu11 -g -O3 -nostdlib -ffreestanding -Wall -Wno-maybe-uninitialized -Wno-unused-function -Werror -Iinclude -I$(SEL4_SDK)/include $(C_FLAGS_ARCH) ASM_CPP_FLAGS := -x assembler-with-cpp -c -g $(ASM_CPP_FLAGS_ARCH) ASM_FLAGS := -g $(ASM_FLAGS_ARCH) -ifeq ($(ARCH),x86_64) - GCC := gcc - AS := as -else - GCC := $(TOOLCHAIN)gcc - AS := $(TOOLCHAIN)as -endif +GCC := $(TOOLCHAIN)gcc +AS := $(TOOLCHAIN)as LIBS := libmicrokit.a OBJS := main.o crt0.o dbg.o diff --git a/libmicrokit/include/microkit.h b/libmicrokit/include/microkit.h index b4ec83ce4..7d7afecc9 100644 --- a/libmicrokit/include/microkit.h +++ b/libmicrokit/include/microkit.h @@ -124,7 +124,11 @@ microkit_pd_restart(microkit_id pd, uintptr_t entry_point) seL4_Error err; seL4_UserContext ctxt; memzero(&ctxt, sizeof(seL4_UserContext)); +#ifdef ARCH_x86_64 + ctxt.rip = entry_point; +#else ctxt.pc = entry_point; +#endif err = seL4_TCB_WriteRegisters( BASE_TCB_CAP + pd, true, diff --git a/libmicrokit/src/x86_64/crt0.s b/libmicrokit/src/x86_64/crt0.s new file mode 100644 index 000000000..002cf2c83 --- /dev/null +++ b/libmicrokit/src/x86_64/crt0.s @@ -0,0 +1,5 @@ + .section .text.start + .globl _start +_start: + leaq 0xff0 + _stack(%rip), %rsp + call main diff --git a/loader/Makefile b/loader/Makefile index 845215dd9..fbc857b4e 100644 --- a/loader/Makefile +++ b/loader/Makefile @@ -7,10 +7,6 @@ ifeq ($(strip $(BUILD_DIR)),) $(error BUILD_DIR must be specified) endif -# ifeq ($(strip $(GCC_CPU)),) -# $(error GCC_CPU must be specified) -# endif - ifeq ($(strip $(BOARD)),) $(error BOARD must be specified) endif @@ -23,25 +19,24 @@ ifeq ($(strip $(ARCH)),) $(error ARCH must be specified) endif -# ifeq ($(strip $(TOOLCHAIN)),) -# $(error TOOLCHAIN must be specified) -# endif +ifeq ($(ARCH),aarch64) +ifeq ($(strip $(GCC_CPU)),) +$(error GCC_CPU must be specified for ARCH=$(ARCH)) +endif +endif + +ifeq ($(strip $(TOOLCHAIN)),) +$(error TOOLCHAIN must be specified) +endif ifeq ($(strip $(NUM_CPUS)),) $(error NUM_CPUS must be specified) endif -ifeq ($(ARCH),x86_64) - GCC := gcc - AS := as - CPP := cpp - ld := ld -else - GCC := $(TOOLCHAIN)gcc - AS := $(TOOLCHAIN)as - CPP := $(TOOLCHAIN)cpp - LD := $(TOOLCHAIN)ld -endif +GCC := $(TOOLCHAIN)gcc +AS := $(TOOLCHAIN)as +CPP := $(TOOLCHAIN)cpp +LD := $(TOOLCHAIN)ld ifeq ($(ARCH),aarch64) # FIXME @ivanv: investigate why having -O3 causes QEMU ARM virt to not be able to boot. @@ -62,11 +57,16 @@ else ifeq ($(ARCH),riscv32) ASM_FLAGS_ARCH := -march=$(MARCH) -mabi=$(MABI) LD_FLAGS_ARCH := -m elf32lriscv else ifeq ($(ARCH),x86_64) - C_FLAGS := -std=gnu11 -g -O3 -nostdlib -ffreestanding -DBOARD_$(BOARD) -DARCH_x86_64 -Wall -Werror - ASM_CPP_FLAGS := -x assembler-with-cpp -c -g -DBOARD_$(BOARD) - ASM_FLAGS := -g + CONFIG_CFLAGS = \ + -DCONFIG_MULTIBOOT_GRAPHICS_MODE_NONE \ + -DCONFIG_MULTIBOOT1_HEADER \ + -DCONFIG_MULTIBOOT2_HEADER + C_FLAGS_ARCH := -O3 -m32 $(CONFIG_CFLAGS) + ASM_CPP_FLAGS_ARCH := -m32 $(CONFIG_CFLAGS) -D__ASM__HEADER__ + ASM_FLAGS_ARCH := --32 + LD_FLAGS_ARCH := -m elf_i386 else - $(error ARCH must be aarch64 or riscv64 or riscv32) + $(error ARCH must be aarch64 or riscv64 or riscv32 or x86_64) endif C_FLAGS := -std=gnu11 -g -nostdlib -ffreestanding -DBOARD_$(BOARD) -DNUM_CPUS=$(NUM_CPUS) -DARCH_$(ARCH) -Wall -Werror $(C_FLAGS_ARCH) @@ -87,8 +87,11 @@ else ifeq ($(ARCH),riscv64) else ifeq ($(ARCH),riscv32) OBJECTS := loader.o crt0.o ARCH_DIR := riscv +else ifeq ($(ARCH),x86_64) + OBJECTS := loader.o crt0.o multiboot.o + ARCH_DIR := x86_64 else - $(error ARCH must be aarch64 or riscv64 or riscv32) + $(error ARCH must be aarch64 or riscv64 or riscv32 or x86_64) endif $(BUILD_DIR)/%.o : src/$(ARCH_DIR)/%.S diff --git a/loader/src/x86_64/crt0.s b/loader/src/x86_64/crt0.s index e69de29bb..a23a48875 100644 --- a/loader/src/x86_64/crt0.s +++ b/loader/src/x86_64/crt0.s @@ -0,0 +1,51 @@ +/* + * Copyright 2023, Neutrality. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +.section .text.start +.code32 + +/* + * Loader entry point. + */ + .globl start +start: + /* Load a stack. */ + leal stack_top, %esp + + /* Reset the EFLAGS register. */ + pushl $0 + popf + + /* Save the %eax and %ebx registers. */ + pushl %ebx /* multiboot_info_ptr */ + pushl %eax /* multiboot_magic */ + + /* Call the loader C function to tweak the multiboot info structure. + * The multiboot args are already on the stack. */ + call loader + testl %eax, %eax + js halt + + /* Jump into the seL4 kernel. */ + popl %eax + popl %ebx + leal kernel_entry, %ecx + jmp *(%ecx) + + /* Here be dragons. */ +halt: + cli +1: + hlt + jmp 1b + +/* + * Allocate a small stack for the few things we need to push there. + */ + .align 16 +stack: + .fill 0x100 +stack_top: diff --git a/loader/src/x86_64/loader.c b/loader/src/x86_64/loader.c index d0007d057..d51ffadb9 100644 --- a/loader/src/x86_64/loader.c +++ b/loader/src/x86_64/loader.c @@ -1,249 +1,160 @@ /* - * Copyright 2021, Breakaway Consulting Pty. Ltd. + * Copyright 2023, Neutrality. * * SPDX-License-Identifier: BSD-2-Clause */ + #include -#include - -// @ivanv: merge this with loader.c aarch64 - -_Static_assert(sizeof(uintptr_t) == 8 || sizeof(uintptr_t) == 4, "Expect uintptr_t to be 32-bit or 64-bit"); - -#if UINTPTR_MAX == 0xffffffffUL -#define WORD_SIZE 32 -#else -#define WORD_SIZE 64 -#endif - -#if WORD_SIZE == 32 -#define MAGIC 0x5e14dead -#else -#define MAGIC 0x5e14dead14de5ead -#endif - -#define ALIGN(n) __attribute__((__aligned__(n))) - -#define MASK(x) ((1U << x) - 1) - -#define STACK_SIZE 4096 - -#define FLAG_SEL4_HYP (1UL << 0) - -struct region { - uintptr_t load_addr; - uintptr_t size; - uintptr_t offset; - uintptr_t type; -}; - -struct loader_data { - uintptr_t magic; - uintptr_t flags; - uintptr_t kernel_entry; - uintptr_t ui_p_reg_start; - uintptr_t ui_p_reg_end; - uintptr_t pv_offset; - uintptr_t v_entry; - uintptr_t extra_device_addr_p; - uintptr_t extra_device_size; - - uintptr_t num_regions; - struct region regions[]; -}; - -typedef void (*sel4_entry)( - uintptr_t ui_p_reg_start, - uintptr_t ui_p_reg_end, - intptr_t pv_offset, - uintptr_t v_entry, - uintptr_t dtb_addr_p, - uintptr_t dtb_size, - // uintptr_t hart_id, - // uintptr_t core_id, - uintptr_t extra_device_addr_p, - uintptr_t extra_device_size -); - -char _stack[STACK_SIZE] ALIGN(16); - -/* Paging structures for kernel mapping */ -uint64_t boot_lvl1_pt[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_pt[1 << 9] ALIGN(1 << 12); -/* Paging structures for identity mapping */ -uint64_t boot_lvl2_pt_elf[1 << 9] ALIGN(1 << 12); - -extern char _text; -extern char _text_end; -extern char _bss_end; -const struct loader_data *loader_data = (void *)&_bss_end; - -static void -memcpy(void *dst, const void *src, size_t sz) -{ - char *dst_ = dst; - const char *src_ = src; - while (sz-- > 0) { - *dst_++ = *src_++; - } -} +#include "utils.h" +#include "multiboot.h" -#define SBI_CONSOLE_PUTCHAR 1 +/* + * These global variables are overwritten by the microkit tool when building the + * image. + */ +uint32_t kernel_entry; +uint32_t monitor_addr; +uint32_t monitor_size; +uint64_t extra_device_addr_p; +uint64_t extra_device_size; -#define SBI_CALL(which, arg0, arg1, arg2) ({ \ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \ - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); \ - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); \ - register uintptr_t a7 asm ("a7") = (uintptr_t)(which); \ - asm volatile ("ecall" \ - : "+r" (a0) \ - : "r" (a1), "r" (a2), "r" (a7) \ - : "memory"); \ - a0; \ -}) +/* Name the initial task. This adds nothing but flare to the boot logs. */ +static char *monitor_cmdline = "microkit"; -#define SBI_CALL_1(which, arg0) SBI_CALL(which, arg0, 0, 0) +/* Hardcode the serial port address. + * @mat: one day this should be configurable. */ +static const uint16_t serial_port = 0x3f8; -static void -putc(uint8_t ch) +/* Round a number up to the next 64-bit boundary. */ +static uint32_t roundup64(uint32_t n) { - SBI_CALL_1(SBI_CONSOLE_PUTCHAR, ch); + if (n & 7) + n = (n & ~7) + 8; + return n; } -static void -puts(const char *s) +/* Serial code taken from seL4/src/plat/pc99/machine/io.c */ +static void serial_init(void) { - while (*s) { - putc(*s); - s++; - } + while (!(in8(serial_port + 5) & 0x60)) /* wait until not busy */ + ; + + out8(serial_port + 1, 0x00); /* disable generating interrupts */ + out8(serial_port + 3, 0x80); /* line control register: command: set divisor */ + out8(serial_port, 0x01); /* set low byte of divisor to 0x01 = 115200 baud */ + out8(serial_port + 1, 0x00); /* set high byte of divisor to 0x00 */ + out8(serial_port + 3, 0x03); /* line control register: set 8 bit, no parity, 1 stop bit */ + out8(serial_port + 4, 0x0b); /* modem control register: set DTR/RTS/OUT2 */ + + in8(serial_port); /* clear receiver serial_port */ + in8(serial_port + 5); /* clear line status serial_port */ + in8(serial_port + 6); /* clear modem status serial_port */ } -static char -hexchar(unsigned int v) +static inline void putc(uint8_t ch) { - return v < 10 ? '0' + v : ('a' - 10) + v; + while ((in8(serial_port + 5) & 0x20) == 0) + ; + out8(serial_port, ch); } -static void -puthex32(uint32_t val) +static inline void puts(const char *s) { - char buffer[8 + 3]; - buffer[0] = '0'; - buffer[1] = 'x'; - buffer[8 + 3 - 1] = 0; - for (unsigned i = 8 + 1; i > 1; i--) { - buffer[i] = hexchar(val & 0xf); - val >>= 4; - } - puts(buffer); + while (*s) + putc(*s++); } -static void -puthex64(uint64_t val) +static int loader_multiboot2(uint32_t multiboot_info_ptr) { - char buffer[16 + 3]; - buffer[0] = '0'; - buffer[1] = 'x'; - buffer[16 + 3 - 1] = 0; - for (unsigned i = 16 + 1; i > 1; i--) { - buffer[i] = hexchar(val & 0xf); - val >>= 4; - } - puts(buffer); + uint32_t *total_size = (uint32_t *) multiboot_info_ptr; + uint32_t last_tag_offset = 0; + + /* Walk the list of multiboot info tags. */ + for (uint32_t i = 2 * sizeof (uint32_t); i < *total_size; ) { + struct multiboot2_tag *tag = (void *) (multiboot_info_ptr + i); + + /* Fail if we were given any multiboot module. */ + if (tag->type == MULTIBOOT2_INFO_TAG_MODULE) { + puts("LDR|ERROR: multiboot modules not supported\r\n"); + return -1; + } + + /* Break on the closing tag. */ + if (tag->type == MULTIBOOT2_INFO_TAG_END && tag->size == 8) { + last_tag_offset = i; + break; + } + + /* Skip this tag and round up to the next 64-bit boundary. */ + i = roundup64(i + tag->size); + } + + /* That shouldn't happen but who knows. */ + if (!last_tag_offset) { + puts("LDR|ERROR: invalid boot information tag list\r\n"); + return -1; + } + + /* + * From here onwards we are carelessly extending the list of multiboot2 + * tags without checking that we do not overwrite anything important. + * So far there seem to be quite a lot of space between this tag list + * and the next memory region in use so that's good enough for a + * proof-of-concept implementation, but one this this should really be + * cleaned up. + */ + + /* Add a module tag for the monitor inittask ELF file. */ + struct multiboot2_tag_module *module = (void *) (multiboot_info_ptr + last_tag_offset); + module->head.type = MULTIBOOT2_INFO_TAG_MODULE; + module->head.size = sizeof (*module) + strlen(monitor_cmdline) + 1; + module->mod_start = monitor_addr; + module->mod_end = monitor_addr + monitor_size; + memcpy(&module->cmdline[0], monitor_cmdline, strlen(monitor_cmdline) + 1); + + /* Account for the new tag. */ + *total_size += roundup64(module->head.size); + last_tag_offset += roundup64(module->head.size); + + /* Add a custom tag to register device memory: memory regions that will + * be marked as device untyped by the kernel. This is an unofficial + * addition to the multiboot2 specs. */ + struct multiboot2_tag_device_memory *devmem = (void *) (multiboot_info_ptr + last_tag_offset); + devmem->head.type = MULTIBOOT2_INFO_TAG_DEVICE_MEMORY; + devmem->head.size = sizeof (*devmem); + devmem->dmem_addr = extra_device_addr_p; + devmem->dmem_size = extra_device_size; + + /* Account for the new tag. */ + *total_size += roundup64(devmem->head.size); + last_tag_offset += roundup64(devmem->head.size); + + /* Add a new end tag to close the list. Note that we do not need to + * account for this end tag since we have overwritten the previous one + * which was already accounted for. */ + struct multiboot2_tag *end = (void *) (multiboot_info_ptr + last_tag_offset); + end->type = MULTIBOOT2_INFO_TAG_END; + end->size = sizeof (*end); + + return 0; } -/* - * Print out the loader data structure. - * - * This doesn't *do anything*. It helps when - * debugging to verify that the data structures are - * being interpretted correctly by the loader. - */ -static void -print_flags(void) -{ - if (loader_data->flags & FLAG_SEL4_HYP) { - puts(" seL4 configured as hypervisor\n"); - } -} - -static void -print_loader_data(void) +int loader(uint32_t multiboot_magic, uint32_t multiboot_info_ptr) { - puts("LDR|INFO: Flags: "); - puthex64(loader_data->flags); - puts("\n"); - print_flags(); - puts("LDR|INFO: Kernel: entry: "); - puthex64(loader_data->kernel_entry); - puts("\n"); - - puts("LDR|INFO: Root server: physmem: "); - puthex64(loader_data->ui_p_reg_start); - puts(" -- "); - puthex64(loader_data->ui_p_reg_end); - puts("\nLDR|INFO: virtmem: "); - puthex64(loader_data->ui_p_reg_start - loader_data->pv_offset); - puts(" -- "); - puthex64(loader_data->ui_p_reg_end - loader_data->pv_offset); - puts("\nLDR|INFO: entry : "); - puthex64(loader_data->v_entry); - puts("\n"); - - for (uint32_t i = 0; i < loader_data->num_regions; i++) { - const struct region *r = &loader_data->regions[i]; - puts("LDR|INFO: region: "); - puthex32(i); - puts(" addr: "); - puthex64(r->load_addr); - puts(" size: "); - puthex64(r->size); - puts(" offset: "); - puthex64(r->offset); - puts(" type: "); - puthex64(r->type); - puts("\n"); - } -} - -static void -copy_data(void) -{ - const void *base = &loader_data->regions[loader_data->num_regions]; - for (uint32_t i = 0; i < loader_data->num_regions; i++) { - const struct region *r = &loader_data->regions[i]; - puts("LDR|INFO: copying region "); - puthex32(i); - puts("\n"); - memcpy((void *)(uintptr_t)r->load_addr, base + r->offset, r->size); - } -} - -static void -start_kernel(void) -{ -} - -int -main(void) -{ - puts("LDR|INFO: altloader for seL4 starting\n"); - /* Check that the loader magic number is set correctly */ - if (loader_data->magic != MAGIC) { - puts("LDR|ERROR: mismatch on loader data structure magic number\n"); - return 1; - } - - print_loader_data(); - - copy_data(); - - puts("LDR|INFO: jumping to kernel\n"); - start_kernel(); - - puts("LDR|ERROR: seL4 Loader: Error - KERNEL RETURNED\n"); - goto fail; - -fail: + serial_init(); + + switch (multiboot_magic) { + case MULTIBOOT1_BOOT_MAGIC: + puts("LDR|INFO: booted as Multiboot v1\r\n"); + puts("LDR|ERROR: multiboot v1 not supported\r\n"); + return -1; + + case MULTIBOOT2_BOOT_MAGIC: + puts("LDR|INFO: booted as Multiboot v2\r\n"); + return loader_multiboot2(multiboot_info_ptr); + + default: + puts("LDR|ERROR: invalid multiboot magic\r\n"); + return -1; + } } diff --git a/loader/src/x86_64/multiboot.S b/loader/src/x86_64/multiboot.S new file mode 100644 index 000000000..c522c6d76 --- /dev/null +++ b/loader/src/x86_64/multiboot.S @@ -0,0 +1,69 @@ +/* + * Copyright 2023, Neutrality. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +/* + * Note that a lot of this is taken from seL4. Some fields from seL4's + * multiboot structure are configurable and we should make sure that we use the + * same values here because we'll hand over the multiboot information data + * (mostly) untouched to seL4. + */ + +#include "multiboot.h" + +#ifdef CONFIG_MULTIBOOT_GRAPHICS_MODE_NONE +# define MBT1_FLAGS (MULTIBOOT1_HEADER_FLAG_PAGE_ALIGN | MULTIBOOT1_HEADER_FLAG_REQ_MEMORY) +# define MBT1_GFXMODE MULTIBOOT1_HEADER_GFX_MODE_GFX +#else +# define MBT1_FLAGS (MULTIBOOT1_HEADER_FLAG_PAGE_ALIGN | MULTIBOOT1_HEADER_FLAG_REQ_MEMORY | MULTIBOOT1_HEADER_FLAG_REQ_VIDEO) +# ifdef CONFIG_MULTIBOOT_GRAPHICS_MODE_TEXT +# define MBT1_GFXMODE MULTIBOOT1_HEADER_GFX_MODE_TEXT +# else +# define MBT1_GFXMODE MULTIBOOT1_HEADER_GFX_MODE_GFX +# endif +#endif + +#ifndef CONFIG_MULTIBOOT_GRAPHICS_MODE_HEIGHT +#define CONFIG_MULTIBOOT_GRAPHICS_MODE_HEIGHT 0 +#endif +#ifndef CONFIG_MULTIBOOT_GRAPHICS_MODE_WIDTH +#define CONFIG_MULTIBOOT_GRAPHICS_MODE_WIDTH 0 +#endif +#ifndef CONFIG_MULTIBOOT_GRAPHICS_MODE_DEPTH +#define CONFIG_MULTIBOOT_GRAPHICS_MODE_DEPTH 0 +#endif + +.section .text.start +.code32 + +#ifdef CONFIG_MULTIBOOT1_HEADER + .align 4 + .long MULTIBOOT1_HEADER_MAGIC /* magic */ + .long MBT1_FLAGS /* flags */ + .long -(MBT1_FLAGS + MULTIBOOT1_HEADER_MAGIC) /* checksum */ + .long 0 /* header_addr */ + .long 0 /* load_addr */ + .long 0 /* load_end_addr */ + .long 0 /* bss_end_addr */ + .long 0 /* entry_addr */ + .long MBT1_GFXMODE /* mode_type */ + .long CONFIG_MULTIBOOT_GRAPHICS_MODE_WIDTH /* width */ + .long CONFIG_MULTIBOOT_GRAPHICS_MODE_HEIGHT /* height */ + .long CONFIG_MULTIBOOT_GRAPHICS_MODE_DEPTH /* depth */ +#endif + +#ifdef CONFIG_MULTIBOOT2_HEADER +#define MBT2_SIZE (__mbi2_end - __mbi2_start) + .align 8 +__mbi2_start: + .long MULTIBOOT2_HEADER_MAGIC /* magic */ + .long 0 /* architecture (i386) */ + .long MBT2_SIZE /* header size */ + .long -(MULTIBOOT2_HEADER_MAGIC + MBT2_SIZE) /* checksum */ + .word 0x0 /* end tag: type */ + .word 0x0 /* end tag: flags */ + .long 0x8 /* end tag: size */ +__mbi2_end: +#endif diff --git a/loader/src/x86_64/multiboot.h b/loader/src/x86_64/multiboot.h new file mode 100644 index 000000000..584b98334 --- /dev/null +++ b/loader/src/x86_64/multiboot.h @@ -0,0 +1,74 @@ +/* + * Copyright 2023, Neutrality. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +/* + * Multiboot 1 + */ + +#define MULTIBOOT1_HEADER_MAGIC 0x1BADB002 + +#define MULTIBOOT1_HEADER_FLAG_PAGE_ALIGN (1 << 0) +#define MULTIBOOT1_HEADER_FLAG_REQ_MEMORY (1 << 1) +#define MULTIBOOT1_HEADER_FLAG_REQ_VIDEO (1 << 2) + +#define MULTIBOOT1_HEADER_GFX_MODE_GFX 0 +#define MULTIBOOT1_HEADER_GFX_MODE_TEXT 1 + +#define MULTIBOOT1_BOOT_MAGIC 0x2BADB002 + +/* + * Multiboot 2 + */ + +#define MULTIBOOT2_HEADER_MAGIC 0xE85250D6 + +#define MULTIBOOT2_BOOT_MAGIC 0x36d76289 + +#define MULTIBOOT2_INFO_TAG_END 0 +#define MULTIBOOT2_INFO_TAG_COMMAND_LINE 1 +#define MULTIBOOT2_INFO_TAG_BOOTLOADER_NAME 2 +#define MULTIBOOT2_INFO_TAG_MODULE 3 +#define MULTIBOOT2_INFO_TAG_MEMORY 4 +#define MULTIBOOT2_INFO_TAG_BIOS_BOOT_DEVICE 5 +#define MULTIBOOT2_INFO_TAG_MEMORY_MAP 6 +#define MULTIBOOT2_INFO_TAG_VBE_INFO 7 +#define MULTIBOOT2_INFO_TAG_FRAMEBUFFER_INFO 8 +#define MULTIBOOT2_INFO_TAG_ELF_SYMBOLS 9 +#define MULTIBOOT2_INFO_TAG_APM_TABLE 10 +#define MULTIBOOT2_INFO_TAG_EFI32_SYSTEM_TABLE 11 +#define MULTIBOOT2_INFO_TAG_EFI64_SYSTEM_TABLE 12 +#define MULTIBOOT2_INFO_TAG_SMBIOS_TABLES 13 +#define MULTIBOOT2_INFO_TAG_ACPI_OLD_RSDP 14 +#define MULTIBOOT2_INFO_TAG_ACPI_NEW_RSDP 15 +#define MULTIBOOT2_INFO_TAG_NETWORK_INFO 16 +#define MULTIBOOT2_INFO_TAG_EFI_MEMORY_MAP 17 +#define MULTIBOOT2_INFO_TAG_EFI_BOOTSVC_RUNNING 18 +#define MULTIBOOT2_INFO_TAG_EFI32_IMAGE_HANDLE 19 +#define MULTIBOOT2_INFO_TAG_EFI64_IMAGE_HANDLE 20 +#define MULTIBOOT2_INFO_TAG_LOAD_BASE_PADDR 21 +#define MULTIBOOT2_INFO_TAG_DEVICE_MEMORY 42 /* Custom extension. */ + +#ifndef __ASM__HEADER__ + +struct multiboot2_tag { + uint32_t type; + uint32_t size; +} __attribute__((packed)); + +struct multiboot2_tag_module { + struct multiboot2_tag head; + uint32_t mod_start; + uint32_t mod_end; + char cmdline[0]; +} __attribute__((packed)); + +struct multiboot2_tag_device_memory { + struct multiboot2_tag head; + uint64_t dmem_addr; + uint64_t dmem_size; +} __attribute__((packed)); + +#endif diff --git a/loader/src/x86_64/utils.h b/loader/src/x86_64/utils.h new file mode 100644 index 000000000..afa246952 --- /dev/null +++ b/loader/src/x86_64/utils.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023, Neutrality. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +static inline void memcpy(char *dst, char *src, uint32_t len) +{ + while (len--) + *dst++ = *src++; +} + +static inline uint32_t strlen(char *str) +{ + uint32_t len = 0; + while (*str++) + len++; + return len; +} + +static inline uint8_t in8 (uint16_t port) +{ + uint8_t value; + __asm__ __volatile__ ("inb %w1,%0":"=a" (value):"Nd" (port)); + return value; +} + +static inline void out8 (uint16_t port, uint8_t value) +{ + __asm__ __volatile__ ("outb %b0,%w1": :"a" (value), "Nd" (port)); +} diff --git a/monitor/Makefile b/monitor/Makefile index 163ff8c82..c7e1bed49 100644 --- a/monitor/Makefile +++ b/monitor/Makefile @@ -7,17 +7,21 @@ ifeq ($(strip $(BUILD_DIR)),) $(error BUILD_DIR must be specified) endif -# ifeq ($(strip $(GCC_CPU)),) -# $(error GCC_CPU must be specified) -# endif - ifeq ($(strip $(ARCH)),) $(error ARCH must be specified) endif -# ifeq ($(strip $(TOOLCHAIN)),) -# $(error TOOLCHAIN must be specified) -# endif +ifeq ($(ARCH),aarch64) +ifeq ($(strip $(GCC_CPU)),) +$(error GCC_CPU must be specified for ARCH=$(ARCH)) +endif +endif + +ifeq ($(ARCH),x86_64) +ifeq ($(strip $(GCC_MARCH)),) +$(error GCC_MARCH must be specified for ARCH=$(ARCH)) +endif +endif ifeq ($(ARCH),aarch64) C_FLAGS := -std=gnu11 -g -O3 -nostdlib -ffreestanding -mcpu=$(GCC_CPU) -DARCH_aarch64 -Wall -Wno-maybe-uninitialized -Werror -I$(SEL4_SDK)/include @@ -31,17 +35,16 @@ else ifeq ($(ARCH),riscv32) C_FLAGS := -mcmodel=medany -std=gnu11 -g -O3 -nostdlib -ffreestanding -march=rv32imac -mabi=ilp32 -DARCH_riscv32 -Wall -Wno-maybe-uninitialized -Werror -I$(SEL4_SDK)/include ASM_CPP_FLAGS := -x assembler-with-cpp -c -g -march=rv32imac -mabi=ilp32 ASM_FLAGS := -g -march=rv32imac -mabi=ilp32 +else ifeq ($(ARCH),x86_64) + C_FLAGS := -std=gnu11 -g -O3 -nostdlib -ffreestanding -march=$(GCC_MARCH) -mabi=sysv -DARCH_x86_64 -Wall -Wno-maybe-uninitialized -Werror -I$(SEL4_SDK)/include + ASM_CPP_FLAGS := -x assembler-with-cpp -c -g -march=rv32imac -mabi=ilp32 + ASM_FLAGS := -g -march=generic64 else - $(error ARCH must be aarch64 or riscv64 or riscv32) + $(error ARCH must be aarch64 or riscv64 or riscv32 or x86_64) endif -ifeq ($(ARCH),x86_64) - GCC := gcc - AS := as -else - GCC := $(TOOLCHAIN)gcc - AS := $(TOOLCHAIN)as -endif +GCC := $(TOOLCHAIN)gcc +AS := $(TOOLCHAIN)as PROGS := monitor.elf OBJECTS := main.o crt0.o debug.o util.o diff --git a/monitor/src/x86_64/crt0.s b/monitor/src/x86_64/crt0.s new file mode 100644 index 000000000..002cf2c83 --- /dev/null +++ b/monitor/src/x86_64/crt0.s @@ -0,0 +1,5 @@ + .section .text.start + .globl _start +_start: + leaq 0xff0 + _stack(%rip), %rsp + call main diff --git a/tool/microkit/__main__.py b/tool/microkit/__main__.py index 38a773a73..17eeda84d 100644 --- a/tool/microkit/__main__.py +++ b/tool/microkit/__main__.py @@ -57,6 +57,9 @@ Sel4Invocation, Sel4ARMPageTableMap, Sel4RISCVPageTableMap, + Sel4X86PDPTMap, + Sel4X86PageDirectoryMap, + Sel4X86PageTableMap, Sel4TcbSetSchedParams, Sel4TcbSetSpace, Sel4TcbSetIpcBuffer, @@ -101,11 +104,13 @@ SEL4_ARM_PAGE_CACHEABLE, SEL4_RISCV_DEFAULT_VMATTRIBUTES, SEL4_RISCV_EXECUTE_NEVER, + SEL4_X86_DEFAULT_VMATTRIBUTES, SEL4_OBJECT_TYPE_NAMES, ) from microkit.sysxml import ProtectionDomain, xml2system, SystemDescription, PlatformDescription from microkit.sysxml import SysMap, SysMemoryRegion # This shouldn't be needed here as such from microkit.loader import Loader, _check_non_overlapping +from microkit.x86loader import X86Loader # This is a workaround for: https://github.com/indygreg/PyOxidizer/issues/307 # Basically, pyoxidizer generates code that results in argv[0] being set to None. @@ -551,17 +556,20 @@ def allocate_fixed_objects(self, kernel_config: KernelConfig, phys_address: int, def allocate_objects(self, kernel_config: KernelConfig, object_type: int, names: List[str], size: Optional[int] = None) -> List[KernelObject]: count = len(names) - if object_type in FIXED_OBJECT_SIZES: - assert size is None - alloc_size = Sel4Object(object_type).get_size(kernel_config) - api_size = 0 - elif object_type in (Sel4Object.CNode, Sel4Object.SchedContext): + + if object_type in (Sel4Object.CNode, Sel4Object.SchedContext): + # Objects of variable size. assert size is not None assert is_power_of_two(size) api_size = int(log2(size)) alloc_size = size * SEL4_SLOT_SIZE else: - raise Exception(f"Invalid object type: {object_type}") + # Objects of fixed size. + assert size is None + api_size = 0 + alloc_size = Sel4Object(object_type).get_size(kernel_config) + if alloc_size is None: + raise Exception(f"Invalid object type: {object_type}") allocation = self._kao.alloc(alloc_size, count) base_cap_slot = self._cap_slot self._cap_slot += count @@ -643,6 +651,7 @@ def build_system( invocation_table_size: int, system_cnode_size: int, search_paths: List[Path], + x86_machine, ) -> BuiltSystem: """Build system as description by the inputs, with a 'BuiltSystem' object as the output.""" assert is_power_of_two(system_cnode_size) @@ -693,18 +702,31 @@ def build_system( available_memory, kernel_boot_region = emulate_kernel_boot_partial( kernel_config, kernel_elf, + x86_machine, ) - # The kernel relies on the reserved region being allocated above the kernel - # boot/ELF region, so we have the end of the kernel boot region as the lower - # bound for allocating the reserved region. - reserved_base = available_memory.allocate_from(reserved_size, kernel_boot_region.end) - assert kernel_boot_region.base < reserved_base - # The kernel relies on the initial task being allocated above the reserved - # region, so we have the address of the end of the reserved region as the - # lower bound for allocating the initial task. - initial_task_phys_base = available_memory.allocate_from(initial_task_size, reserved_base + reserved_size) - assert reserved_base < initial_task_phys_base + if kernel_config.arch == KernelArch.X86_64: + # On x86 the kernel loads the inittask at the end of the boot/ELF + # region, so we have the end of the kernel boot region as the lower + # bound for allocating the initial task. + initial_task_phys_base = available_memory.allocate_from(initial_task_size, kernel_boot_region.end) + assert kernel_boot_region.base < initial_task_phys_base + # On x86 the kernel relies on the reserved region being allocated above + # the inittask region, so we have the address of the end of the inittask + # region as the lower bound for allocating the reserved region. + reserved_base = available_memory.allocate_from(reserved_size, initial_task_phys_base + initial_task_size) + assert initial_task_phys_base < reserved_base + else: + # The kernel relies on the reserved region being allocated above the + # kernel boot/ELF region, so we have the end of the kernel boot region + # as the lower bound for allocating the reserved region. + reserved_base = available_memory.allocate_from(reserved_size, kernel_boot_region.end) + assert kernel_boot_region.base < reserved_base + # The kernel relies on the initial task being allocated above the + # reserved region, so we have the address of the end of the reserved + # region as the lower bound for allocating the initial task. + initial_task_phys_base = available_memory.allocate_from(initial_task_size, reserved_base + reserved_size) + assert reserved_base < initial_task_phys_base initial_task_phys_region = MemoryRegion(initial_task_phys_base, initial_task_phys_base + initial_task_size) initial_task_virt_region = virt_mem_region_from_elf(monitor_elf, kernel_config.minimum_page_size) @@ -754,7 +776,8 @@ def build_system( kernel_elf, initial_task_phys_region, initial_task_virt_region, - reserved_region + reserved_region, + x86_machine ) for ut in kernel_boot_info.untyped_objects: @@ -1252,6 +1275,11 @@ def build_system( # Allocating for 3-level page table d_names = [f"PageTable: PD/VM={names[idx]} VADDR=0x{vaddr:x}" for idx, vaddr in ds] d_objects = init_system.allocate_objects(kernel_config, Sel4Object.PageTable, d_names) + elif kernel_config.arch == KernelArch.X86_64: + ud_names = [f"PageDirectoryPointerTable: PD/VM={names[idx]} VADDR=0x{vaddr:x}" for idx, vaddr in uds] + ud_objects = init_system.allocate_objects(kernel_config, Sel4Object.PdPt, ud_names) + d_names = [f"PageDirectory: PD/VM={names[idx]} VADDR=0x{vaddr:x}" for idx, vaddr in ds] + d_objects = init_system.allocate_objects(kernel_config, Sel4Object.PageDirectory, d_names) else: raise Exception(f"Unexpected kernel architecture: {kernel_config.arch}") @@ -1676,6 +1704,13 @@ def build_system( (Sel4ARMPageTableMap, ds, d_objects), (Sel4ARMPageTableMap, pts, pt_objects), ] + elif kernel_config.arch == KernelArch.X86_64: + default_vm_attributes = SEL4_X86_DEFAULT_VMATTRIBUTES + vspace_invocations = [ + (Sel4X86PDPTMap, uds, ud_objects), + (Sel4X86PageDirectoryMap, ds, d_objects), + (Sel4X86PageTableMap, pts, pt_objects), + ] else: raise Exception(f"Unexpected kernel architecture: {kernel_config.arch}") @@ -1877,6 +1912,7 @@ def main() -> int: parser.add_argument("--board", required=True, choices=available_boards) parser.add_argument("--config", required=True) parser.add_argument("--search-path", nargs='*', type=Path) + parser.add_argument("--x86-machine") args = parser.parse_args() board_path = boards_path / args.board @@ -1948,6 +1984,17 @@ def main() -> int: aarch64_smc_calls = sel4_config.get("ALLOW_SMC_CALLS", False) + # Load the x86 machine description file. + if sel4_arch == "x86_64": + if not args.x86_machine: + raise UserError("Tool argument --x86-machine is mandatory on x86") + with open(args.x86_machine, "r") as f: + x86_machine = json_load(f) + else: + if args.x86_machine: + raise UserError("Tool argument --x86-machine is only valid for x86") + x86_machine = None + kernel_config = KernelConfig( arch = arch, word_size = sel4_config["WORD_SIZE"], @@ -1991,6 +2038,7 @@ def main() -> int: invocation_table_size, system_cnode_size, search_paths, + x86_machine, ) print(f"BUILT: {system_cnode_size=} {built_system.number_of_system_caps=} {invocation_table_size=} {built_system.invocation_data_size=}") if (built_system.number_of_system_caps <= system_cnode_size and @@ -2118,8 +2166,14 @@ def main() -> int: for idx, invocation in enumerate(built_system.system_invocations): f.write(f" 0x{idx:04x} {invocation_to_str(kernel_config, invocation, cap_lookup)}\n") + # Use a different loader on x86. + if arch == KernelArch.X86_64: + LoaderClass = X86Loader + else: + LoaderClass = Loader + # FIXME: Verify that the regions do not overlap! - loader = Loader( + loader = LoaderClass( kernel_config, loader_elf_path, kernel_elf, diff --git a/tool/microkit/elf.py b/tool/microkit/elf.py index 8f7b2872f..94b7a8369 100644 --- a/tool/microkit/elf.py +++ b/tool/microkit/elf.py @@ -7,9 +7,11 @@ from struct import Struct, pack from enum import IntEnum, IntFlag from dataclasses import dataclass +from io import BytesIO from typing import List, Literal, Optional, Tuple +from .util import round_up class ObjectFileType(IntEnum): ET_NONE = 0 @@ -55,6 +57,8 @@ class MachineType(IntEnum): # This is all we support for now, and I don't # feel like typing them all out! # These values are from Linux source in include/uapi/linux/elf-em.h + EM_386 = 3 + EM_X86_64 = 62 EM_AARCH64 = 183 EM_RISCV = 243 @@ -111,6 +115,16 @@ class ElfVersion(IntEnum): "entsize", ) +ELF_SYMBOL32 = Struct(" "ElfFile": ph_fields = ELF_PROGRAM_HEADER32_FIELDS sh_fmt = ELF_SECTION_HEADER32 sh_fields = ELF_SECTION_HEADER32_FIELDS + sym_fmt = ELF_SYMBOL32 + sym_fields = ELF_SYMBOL32_FIELDS elf = cls(word_size=32) elif class_ == 2: hdr_fmt = ELF_HEADER64 @@ -343,58 +359,83 @@ def from_path(cls, path: Path) -> "ElfFile": return elf - def write(self, path: Path, machine: MachineType) -> None: + def iowrite(self, f: BytesIO, machine: MachineType) -> None: """Note: This only supports writing out of program headers and segments. It does *not* support writing out sections at this point in time. """ - with path.open("wb") as f: + if self.word_size == 64: ehsize = ELF_HEADER64.size + 5 phentsize = ELF_PROGRAM_HEADER64.size - header = ElfHeader( - ident_data=DataEncoding.ELFDATA2LSB, - ident_version=ElfVersion.EV_CURRENT, - ident_osabi=OperatingSystemAbi.ELFOSABI_STANDALINE, - ident_abiversion=0, - type_ = ObjectFileType.ET_EXEC, - machine=machine, - version=ElfVersion.EV_CURRENT, - entry=self.entry, - phoff=ehsize, - shoff=0, - flags=0, - ehsize=ehsize, - phentsize=phentsize, - phnum=len(self.segments), - shentsize=0, - shnum=0, - shstrndx=0, - ) + shentsize = ELF_SECTION_HEADER64.size + else: + ehsize = ELF_HEADER32.size + 5 + phentsize = ELF_PROGRAM_HEADER32.size + shentsize = ELF_SECTION_HEADER32.size + phoff = round_up(ehsize, 8) + shoff = phoff + len(self.segments) * phentsize + sum(len(s.data) for s in self.segments) + header = ElfHeader( + ident_data=DataEncoding.ELFDATA2LSB, + ident_version=ElfVersion.EV_CURRENT, + ident_osabi=OperatingSystemAbi.ELFOSABI_STANDALINE, + ident_abiversion=0, + type_ = ObjectFileType.ET_EXEC, + machine=machine, + version=ElfVersion.EV_CURRENT, + entry=self.entry, + phoff=phoff, + shoff=shoff, + flags=0, + ehsize=ehsize, + phentsize=phentsize, + phnum=len(self.segments), + shentsize=shentsize, + shnum=1, + shstrndx=0, + ) + if self.word_size == 64: header_bytes = ELF_HEADER64.pack(*(getattr(header, field) for field in ELF_HEADER64_FIELDS)) f.write(ELF_MAGIC) f.write(pack(" None: + with path.open("wb") as f: + self.iowrite(f, machine) def add_segment(self, segment: ElfSegment) -> None: # TODO: Check that the segment doesn't overlap any existing diff --git a/tool/microkit/sel4.py b/tool/microkit/sel4.py index 204082d58..63c326348 100644 --- a/tool/microkit/sel4.py +++ b/tool/microkit/sel4.py @@ -86,6 +86,17 @@ class Sel4Object(IntEnum): Vspace = 11 Vcpu = 12 + # These names are specific to x86, the numbers are meaningless and + # overriden below via X86_64_OBJECTS. + PageDirectory = 13 + PdPt = 14 + Pml4 = 15 + IOPageTable = 16 + EptPml4 = 17 + EptPdPt = 18 + EptPageDirectory = 19 + EptPageTable = 20 + def get_id(self, kernel_config: KernelConfig) -> int: if kernel_config.arch == KernelArch.AARCH64: if kernel_config.hyp_mode and kernel_config.arm_pa_size_bits == 40 and self in AARCH64_HYP_OBJECTS: @@ -115,6 +126,8 @@ def get_size(self, kernel_config: KernelConfig) -> Optional[int]: return AARCH64_HYP_OBJECT_SIZES[self] elif kernel_config.arch == KernelArch.RISCV64 and kernel_config.hyp_mode and self in RISCV64_HYP_OBJECT_SIZES: return RISCV64_HYP_OBJECT_SIZES[self] + elif kernel_config.arch == KernelArch.X86_64 and self in X86_64_OBJECT_SIZES: + return X86_64_OBJECT_SIZES[self] elif self in FIXED_OBJECT_SIZES: return FIXED_OBJECT_SIZES[self] else: @@ -155,16 +168,23 @@ def get_size(self, kernel_config: KernelConfig) -> Optional[int]: } # @ivanv: Double check these, not sure about the first two and the last one. -# X86_64_OBJECTS = { -# Sel4Object.PdPt: 7, # ?? seL4_X64_PML4Object -# Sel4Object.Pml4: 8, # seL4_X64_PML4Object -# Sel4Object.HugePage: 9, -# Sel4Object.SmallPage: 10, -# Sel4Object.LargePage: 11, -# Sel4Object.PageTable: 12, -# Sel4Object.PageDirectory: 13, -# Sel4Object.IOPageTable: 14 # seL4_X86_IOPageTableObject, not sure if necessary -# } +X86_64_OBJECTS = { + Sel4Object.PdPt: 7, # seL4_X86_PDPTObject + Sel4Object.Pml4: 8, # seL4_X64_PML4Object + Sel4Object.HugePage: 9, # seL4_X64_HugePageObject + Sel4Object.SmallPage: 10, # seL4_X86_4K + Sel4Object.LargePage: 11, # seL4_X86_LargePageObject + Sel4Object.PageTable: 12, # seL4_X86_PageTableObject + Sel4Object.PageDirectory: 13, # seL4_X86_PageDirectoryObject + Sel4Object.IOPageTable: 14, # seL4_X86_IOPageTableObject + Sel4Object.Vcpu: 15, # seL4_X86_VCPUObject + Sel4Object.EptPml4: 16, # seL4_X86_EPTPML4Object + Sel4Object.EptPdPt: 17, # seL4_X86_EPTPDPTObject + Sel4Object.EptPageDirectory: 18, # seL4_X86_EPTPDObject + Sel4Object.EptPageTable: 19, # seL4_X86_EPTPTObject + # A Vspace on X86-64 is represented by a PML4 + Sel4Object.Vspace: 8, +} SEL4_OBJECT_TYPE_NAMES = { Sel4Object.Untyped: "SEL4_UNTYPED_OBJECT", @@ -178,6 +198,8 @@ def get_size(self, kernel_config: KernelConfig) -> Optional[int]: Sel4Object.LargePage: "SEL4_LARGE_PAGE_OBJECT", Sel4Object.HugePage: "SEL4_HUGE_PAGE_SIZE", Sel4Object.PageTable: "SEL4_PAGE_TABLE_OBJECT", + Sel4Object.PageDirectory: "SEL4_PAGE_DIRECTORY_OBJECT", + Sel4Object.PdPt: "SEL4_PAGE_DIRECTORY_POINTER_TABLE_OBJECT", Sel4Object.Vspace: "SEL4_VSPACE_OBJECT", Sel4Object.Vcpu: "SEL4_VCPU_OBJECT", } @@ -208,6 +230,18 @@ def get_size(self, kernel_config: KernelConfig) -> Optional[int]: Sel4Object.Vspace: 1 << 14, } +X86_64_OBJECT_SIZES = { + Sel4Object.PageDirectory: 1 << 12, + Sel4Object.PdPt: 1 << 12, + Sel4Object.Pml4: 1 << 12, + Sel4Object.IOPageTable: 1 << 12, + Sel4Object.Vcpu: 1 << 14, + Sel4Object.EptPml4: 1 << 12, + Sel4Object.EptPdPt: 1 << 12, + Sel4Object.EptPageDirectory: 1 << 12, + Sel4Object.EptPageTable: 1 << 12, +} + VARIABLE_SIZE_OBJECTS = { Sel4Object.CNode, Sel4Object.Untyped, @@ -257,11 +291,34 @@ def get_size(self, kernel_config: KernelConfig) -> Optional[int]: INIT_THREAD_SC_CAP_ADDRESS = 14 SMC_CAP_ADDRESS = 15 +N_VTD_CONTEXTS = 256 +VTD_PT_INDEX_BITS = 9 + def _get_n_paging(region: MemoryRegion, bits: int) -> int: start = round_down(region.base, 1 << bits) end = round_up(region.end, 1 << bits) return (end - start) // (1 << bits) +def _vtd_get_n_paging(x86_machine) -> int: + size = 0 + size += 1 # one for the root table + size += N_VTD_CONTEXTS # one for each context + size += len(x86_machine["rmrrs"]) # one for each device + + if len(x86_machine["rmrrs"]) == 0: + return size + + # @mat: there's more stuff in seL4's vtd_get_n_paging() to be duplicated here. + + for i in range(x86_machine["bootinfo"]["numIOPTLevels"] - 1, 0, -1): + nbits = VTD_PT_INDEX_BITS * i + 12 + if nbits >= 32: + size += 1 + else: + for rmrr in x86_machine["rmrrs"]: + region = MemoryRegion(base=rmrr["base"], end=rmrr["limit"]) + size += _get_n_paging(region, 32 - nbits) + return size def _get_arch_n_paging(kernel_config: KernelConfig, region: MemoryRegion) -> int: if kernel_config.arch == KernelArch.AARCH64: @@ -297,6 +354,17 @@ def _get_arch_n_paging(kernel_config: KernelConfig, region: MemoryRegion) -> int _get_n_paging(region, PUD_INDEX_OFFSET) + _get_n_paging(region, PD_INDEX_OFFSET) ) + elif kernel_config.arch == KernelArch.X86_64: + PT_INDEX_OFFSET = 12 + PD_INDEX_OFFSET = (PT_INDEX_OFFSET + 9) + PDPT_INDEX_OFFSET = (PD_INDEX_OFFSET + 9) + PML4_INDEX_OFFSET = (PDPT_INDEX_OFFSET + 9) + + return ( + _get_n_paging(region, PML4_INDEX_OFFSET) + + _get_n_paging(region, PDPT_INDEX_OFFSET) + + _get_n_paging(region, PD_INDEX_OFFSET) + ) else: raise Exception(f"Unknown kernel architecture {kernel_config.arch}") @@ -770,6 +838,59 @@ class Sel4Label(IntEnum): RISCVVCPUSetTCB = 66 RISCVVCPUReadReg = 67 RISCVVCPUWriteReg = 68 + # X86 PDPT + X86PDPTMap = 69 + X86PDPTUnmap = 70 + # X86 Page Directory + X86PageDirectoryMap = 71 + X86PageDirectoryUnmap = 72 + X86PageDirectoryGetStatusBits = 73 + # X86 Page Table + X86PageTableMap = 74 + X86PageTableUnmap = 75 + # X86 IO Page + X86IOPageTableMap = 76 + X86IOPageTableUnmap = 77 + # X86 Page + X86PageMap = 78 + X86PageUnmap = 79 + X86PageMapIO = 80 + X86PageGetAddress = 81 + X86PageMapEPT = 82 + # X86 ASID + X86ASIDControlMakePool = 83 + # X86 ASID Pool + X86ASIDPoolAssign = 84 + # X86 IO Port Control + X86IOPortControlIssue = 85 + # X86 IO PORT + X86IOPortIn8 = 86 + X86IOPortIn16 = 87 + X86IOPortIn32 = 88 + X86IOPortOut8 = 89 + X86IOPortOut16 = 90 + X86IOPortOut32 = 91 + # X86 IRQ + X86IRQIssueIRQHandlerIOAPIC = 92 + X86IRQIssueIRQHandlerMSI = 93 + # X86 TCB + TCBSetEPTRoot = 94 + # X86 VCPU + X86VCPUSetTCB = 95 + X86VCPUReadVMCS = 96 + X86VCPUWriteVMCS = 97 + X86VCPUEnableIOPort = 98 + X86VCPUDisableIOPort = 99 + X86VCPUWriteRegisters = 100 + # X86 EPTPDPT + X86EPTPDPTMap = 101 + X86EPTPDPTUnmap = 102 + # X86 EPTPD + X86EPTPDMap = 103 + X86EPTPDUnmap = 104 + # X86 EPTPT + X86EPTPTMap = 105 + X86EPTPTUnmap = 106 def get_id(self, kernel_config: KernelConfig) -> int: if kernel_config.arch == KernelArch.AARCH64: @@ -784,6 +905,11 @@ def get_id(self, kernel_config: KernelConfig) -> int: return RISCV_LABELS[self] else: return self + elif kernel_config.arch == KernelArch.X86_64: + if self in X86_64_LABELS: + return X86_64_LABELS[self] + else: + return self else: raise Exception(f"Unknown kernel architecture: {kernel_config.arch}") @@ -826,35 +952,60 @@ def get_id(self, kernel_config: KernelConfig) -> int: Sel4Label.RISCVVCPUWriteReg: 46, } -""" -# @ivanv: Looks like x86 messes everything up since it has -# other TCB invocations... -class Sel4LabelX86(IntEnum): - X86PDPTMap - X86PDPTUnmap - X86PageDirectoryMap - X86PageDirectoryUnmap - X86PageTableMap - X86PageTableUnmap - X86IOPageTableMap - X86IOPageTableUnmap - X86PageMap - X86PageUnmap - X86PageMapIO - X86PageGetAddress - X86ASIDControlMakePool - X86ASIDPoolAssign - X86IOPortControlIssue - X86IOPortIn8 - X86IOPortIn16 - X86IOPortIn32 - X86IOPortOut8 - X86IOPortOut16 - X86IOPortOut32 - X86IRQIssueIRQHandlerIOAPIC - X86IRQIssueIRQHandlerMSI -""" - +X86_64_LABELS = { + # X86 PDPT + Sel4Label.X86PDPTMap: 36, + Sel4Label.X86PDPTUnmap: 37, + # X86 Page Directory + Sel4Label.X86PageDirectoryMap: 38, + Sel4Label.X86PageDirectoryUnmap: 39, + # X86 Page Table + Sel4Label.X86PageTableMap: 40, + Sel4Label.X86PageTableUnmap: 41, + # X86 IO Page + Sel4Label.X86IOPageTableMap: 42, + Sel4Label.X86IOPageTableUnmap: 43, + # X86 Page + Sel4Label.X86PageMap: 44, + Sel4Label.X86PageUnmap: 45, + Sel4Label.X86PageMapIO: 46, + Sel4Label.X86PageGetAddress: 47, + Sel4Label.X86PageMapEPT: 48, + # X86 ASID + Sel4Label.X86ASIDControlMakePool: 49, + # X86 ASID Pool + Sel4Label.X86ASIDPoolAssign: 50, + # X86 IO Port Control + Sel4Label.X86IOPortControlIssue: 51, + # X86 IO PORT + Sel4Label.X86IOPortIn8: 52, + Sel4Label.X86IOPortIn16: 53, + Sel4Label.X86IOPortIn32: 54, + Sel4Label.X86IOPortOut8: 55, + Sel4Label.X86IOPortOut16: 56, + Sel4Label.X86IOPortOut32: 57, + # X86 IRQ + Sel4Label.X86IRQIssueIRQHandlerIOAPIC: 58, + Sel4Label.X86IRQIssueIRQHandlerMSI: 59, + # X86 TCB + Sel4Label.TCBSetEPTRoot: 60, + # X86 VCPU + Sel4Label.X86VCPUSetTCB: 61, + Sel4Label.X86VCPUReadVMCS: 62, + Sel4Label.X86VCPUWriteVMCS: 63, + Sel4Label.X86VCPUEnableIOPort: 64, + Sel4Label.X86VCPUDisableIOPort: 65, + Sel4Label.X86VCPUWriteRegisters: 66, + # X86 EPTPDPT + Sel4Label.X86EPTPDPTMap: 67, + Sel4Label.X86EPTPDPTUnmap: 68, + # X86 EPTPD + Sel4Label.X86EPTPDMap: 69, + Sel4Label.X86EPTPDUnmap: 70, + # X86 EPTPT + Sel4Label.X86EPTPTMap: 71, + Sel4Label.X86EPTPTUnmap: 72, +} ### Invocations @@ -1078,7 +1229,7 @@ def __init__(self, arch: KernelArch, asid_pool: int, vspace: int): elif arch == KernelArch.RISCV64: self.label = Sel4Label.RISCVASIDPoolAssign elif arch == KernelArch.X86_64: - self.label = Sel4LabelX86.X86ASIDPoolAssign + self.label = Sel4Label.X86ASIDPoolAssign else: raise Exception(f"Unexpected kernel architecture: {arch}") @@ -1147,6 +1298,38 @@ class Sel4RISCVPageTableMap(Sel4Invocation): vaddr: int attr: int +@dataclass +class Sel4X86PDPTMap(Sel4Invocation): + _object_type = "Page Directory Pointer Table" + _method_name = "Map" + _extra_caps = ("vspace", ) + label = Sel4Label.X86PDPTMap + pdpt: int + vspace: int + vaddr: int + attr: int + +@dataclass +class Sel4X86PageDirectoryMap(Sel4Invocation): + _object_type = "Page Directory" + _method_name = "Map" + _extra_caps = ("vspace", ) + label = Sel4Label.X86PageDirectoryMap + page_directory: int + vspace: int + vaddr: int + attr: int + +@dataclass +class Sel4X86PageTableMap(Sel4Invocation): + _object_type = "Page Table" + _method_name = "Map" + _extra_caps = ("vspace", ) + label = Sel4Label.X86PageTableMap + page_table: int + vspace: int + vaddr: int + attr: int @dataclass class Sel4PageMap(Sel4Invocation): @@ -1308,14 +1491,15 @@ def _kernel_self_mem(kernel_elf: ElfFile) -> Tuple[int, int]: """Return the physical memory range used by the kernel itself.""" base = kernel_elf.segments[0].phys_addr ki_end_v, _= kernel_elf.find_symbol("ki_end") - ki_end_p = ki_end_v - kernel_elf.segments[0].virt_addr + base + v_p_offset = kernel_elf.segments[-1].virt_addr - kernel_elf.segments[-1].phys_addr + ki_end_p = ki_end_v - v_p_offset return (base, ki_end_p) - def _kernel_boot_mem(kernel_elf: ElfFile) -> MemoryRegion: base = kernel_elf.segments[0].phys_addr ki_boot_end_v, _ = kernel_elf.find_symbol("ki_boot_end") - ki_boot_end_p = ki_boot_end_v - kernel_elf.segments[0].virt_addr + base + v_p_offset = kernel_elf.segments[-1].virt_addr - kernel_elf.segments[-1].phys_addr + ki_boot_end_p = ki_boot_end_v - v_p_offset return MemoryRegion(base, ki_boot_end_p) @@ -1350,13 +1534,16 @@ def calculate_kernel_virtual_base(kernel_config: KernelConfig) -> int: return 2 ** 64 - 2 ** 39 else: raise Exception("Unsupported number of RISC-V page table levels") + elif kernel_config.arch == KernelArch.X86_64: + return 2 ** 64 - 2 ** 39 else: raise Exception(f"Unexpected kernel architecture: {kernel_config.arch}") def _kernel_partial_boot( kernel_config: KernelConfig, - kernel_elf: ElfFile) -> _KernelPartialBootInfo: + kernel_elf: ElfFile, + x86_machine) -> _KernelPartialBootInfo: """Emulate what happens during a kernel boot, up to the point where the reserved region is allocated. @@ -1380,21 +1567,41 @@ def _kernel_partial_boot( device_size = 1 << 21 elif kernel_config.arch == KernelArch.AARCH64: device_size = 1 << 12 + elif kernel_config.arch == KernelArch.X86_64: + device_size = 1 << 12 else: raise Exception(f"Unexpected kernel architecture {config.arch}") - for paddr in _kernel_device_addrs(kernel_config.arch, kernel_elf): - device_memory.remove_region(paddr, paddr + device_size) + if kernel_config.arch == KernelArch.X86_64: + # On x86 the kernel devices are detected at runtime. The user + # is expected to collect the data from a live machine and pass + # it to us via --x86-machine. + for kdev in x86_machine["kdevs"]: + device_memory.remove_region(kdev["base"], kdev["base"] + kdev["size"]) + else: + for paddr in _kernel_device_addrs(kernel_config.arch, kernel_elf): + device_memory.remove_region(paddr, paddr + device_size) # Remove all the actual physical memory from the device regions - # but add it all to the actual normal memory regions - for start, end in _kernel_phys_mem(kernel_elf): - device_memory.remove_region(start, end) - normal_memory.insert_region(start, end) + # but add it all to the actual normal memory regions. On x86 the + # physical memory is detected at runtime and passed via the + # machine configuration file. + if kernel_config.arch == KernelArch.X86_64: + for mr in x86_machine["memory"]: + device_memory.remove_region(mr["base"], mr["base"] + mr["size"]) + normal_memory.insert_region(mr["base"], mr["base"] + mr["size"]) + else: + for start, end in _kernel_phys_mem(kernel_elf): + device_memory.remove_region(start, end) + normal_memory.insert_region(start, end) # Remove the kernel image itself normal_memory.remove_region(*_kernel_self_mem(kernel_elf)) + # Remove the MSI region on x86_64 + if kernel_config.arch == KernelArch.X86_64: + device_memory.remove_region(0xfffffff8, 0x100000000) + # but get the boot region, we'll add that back later # FIXME: Why calcaultae it now if we add it back later? boot_region = _kernel_boot_mem(kernel_elf) @@ -1405,13 +1612,14 @@ def _kernel_partial_boot( def emulate_kernel_boot_partial( kernel_config: KernelConfig, kernel_elf: ElfFile, + x86_machine, ) -> Tuple[DisjointMemoryRegion, MemoryRegion]: """Return the memory available after a 'partial' boot emulation. This allows the caller to allocate a reserved memory region at an appropriate location. """ - partial_info = _kernel_partial_boot(kernel_config, kernel_elf) + partial_info = _kernel_partial_boot(kernel_config, kernel_elf, x86_machine) return partial_info.normal_memory, partial_info.boot_region @@ -1420,12 +1628,14 @@ def emulate_kernel_boot( kernel_elf: ElfFile, initial_task_phys_region: MemoryRegion, initial_task_virt_region: MemoryRegion, - reserved_region: MemoryRegion) -> KernelBootInfo: + reserved_region: MemoryRegion, + x86_machine, + ) -> KernelBootInfo: """Emulate what happens during a kernel boot, generating a representation of the BootInfo struct.""" # And the the reserved region assert initial_task_phys_region.size == initial_task_virt_region.size - partial_info = _kernel_partial_boot(kernel_config, kernel_elf) + partial_info = _kernel_partial_boot(kernel_config, kernel_elf, x86_machine) normal_memory = partial_info.normal_memory device_memory = partial_info.device_memory boot_region = partial_info.boot_region @@ -1434,7 +1644,7 @@ def emulate_kernel_boot( normal_memory.remove_region(reserved_region.base, reserved_region.end) # Now, the tricky part! determine which memory is used for the initial task objects - initial_objects_size = calculate_rootserver_size(kernel_config, initial_task_virt_region) + initial_objects_size = calculate_rootserver_size(kernel_config, initial_task_virt_region, x86_machine) initial_objects_align = _rootserver_max_size_bits(kernel_config) # Find an appropriate region of normal memory to allocate the objects @@ -1452,8 +1662,17 @@ def emulate_kernel_boot( sched_control_cap_count = kernel_config.num_cpus paging_cap_count = _get_arch_n_paging(kernel_config, initial_task_virt_region) page_cap_count = initial_task_virt_region.size // kernel_config.minimum_page_size + # On x84_64 we have one more frame for the extra bootinfo region. + # @mat: there could in theory be more than one frame. + if kernel_config.arch == KernelArch.X86_64: + page_cap_count += 1 first_untyped_cap = fixed_cap_count + paging_cap_count + sched_control_cap_count + page_cap_count - schedcontrol_cap = fixed_cap_count + paging_cap_count + + # On X86 the schedcontrol caps are created before the paging caps. + if kernel_config.arch == KernelArch.X86_64: + schedcontrol_cap = fixed_cap_count + else: + schedcontrol_cap = fixed_cap_count + paging_cap_count # Determining seL4_MaxUntypedBits if kernel_config.arch == KernelArch.AARCH64 or kernel_config.arch == KernelArch.X86_64: @@ -1485,7 +1704,7 @@ def emulate_kernel_boot( ) -def calculate_rootserver_size(kernel_config: KernelConfig, initial_task_region: MemoryRegion) -> int: +def calculate_rootserver_size(kernel_config: KernelConfig, initial_task_region: MemoryRegion, x86_machine) -> int: # FIXME: These constants should ideally come from the config / kernel # binary not be hard coded here. # But they are constant so it isn't too bad. @@ -1525,10 +1744,16 @@ def calculate_rootserver_size(kernel_config: KernelConfig, initial_task_region: size += 1 << (tcb_bits) size += 2 * (1 << page_bits) size += 1 << asid_pool_bits + if kernel_config.arch == KernelArch.X86_64: + # One more page for the extra boot info. + size += 1 << page_bits size += 1 << vspace_bits - size += _get_arch_n_paging(kernel_config, initial_task_region) * (1 << page_table_bits) size += 1 << min_sched_context_bits + size += _get_arch_n_paging(kernel_config, initial_task_region) * (1 << page_table_bits) + if kernel_config.arch == KernelArch.X86_64: + # Add VT-d pages. + size += _vtd_get_n_paging(x86_machine) * (1 << page_table_bits) return size diff --git a/tool/microkit/util.py b/tool/microkit/util.py index 0ec5a30f7..7698bef23 100644 --- a/tool/microkit/util.py +++ b/tool/microkit/util.py @@ -108,7 +108,9 @@ def aligned_power_of_two_regions(self, kernel_virtual_base: int, max_bits: int) base_paddr = kernel_vaddr_to_paddr(kernel_virtual_base, base) end_paddr = kernel_vaddr_to_paddr(kernel_virtual_base, base + sz) base = machine_add(base, sz) - r.append(MemoryRegion(base_paddr, end_paddr)) + # Note that seL4_MinUntypedBits is 4 on all supported targets. + if size_bits >= 4: + r.append(MemoryRegion(base_paddr, end_paddr)) return r diff --git a/tool/microkit/x86loader.py b/tool/microkit/x86loader.py new file mode 100644 index 000000000..2cb4484ff --- /dev/null +++ b/tool/microkit/x86loader.py @@ -0,0 +1,80 @@ +from pathlib import Path +from struct import pack +from typing import Optional, List, Tuple +from io import BytesIO + +from microkit.elf import ElfFile, ElfSegment, SegmentAttributes, MachineType +from microkit.util import round_up, MemoryRegion +from microkit.sel4 import KernelConfig + +class X86Loader: + """Special loader for X86. + + On X86 we take advantage of the multiboot bootloader that can load + all ELF sections at their physical addresses. We inject extra + sections to the loader ELF file for the PDs, the kernel, and the + initial task. The output image file is a 32-bit ELF file because + grub does not load 64-bit ELF files in legacy (BIOS) boot. + + """ + + def __init__(self, + kernel_config: KernelConfig, + loader_elf_path: Path, + kernel_elf: ElfFile, + initial_task_elf: ElfFile, + initial_task_phys_base: Optional[int], + reserved_region: MemoryRegion, + regions: List[Tuple[int, bytes]], + ) -> None: + # Load the loader ELF file. + self._elf = ElfFile.from_path(loader_elf_path) + + # Add the PD memory regions as segments. + for r in regions: + segment = ElfSegment(phys_addr=r[0], + virt_addr=0, + data=r[1], + loadable=True, + attrs=SegmentAttributes.PF_R) + self._elf.add_segment(segment) + + # Add the kernel memory regions as segments. + for segment in kernel_elf.segments: + # Wipe the virtual address fields that are unnecessary and + # cause issues since they are 64-bit wide. + segment.virt_addr = 0 + self._elf.add_segment(segment) + + # Save the kernel's entry point address so we can jump into it + # once we're done with our boot dance. + self._elf.write_symbol("kernel_entry", pack(' None: + self._elf.write(path, MachineType.EM_386)