From 9dbaa04128ee24f598c69bbcbae931650b2ed7d4 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 18 Jun 2025 16:19:21 +0200 Subject: [PATCH 01/57] ZTS: Use FreeBSD cloudinit images FreeBSD provides CI-IMAGES since some time. These images are based on nuageinit, which does not support fqdn and sudo for example. So we need currently some workarounds to get it working. The FreeBSD images will be more compatible with cloud-init in some near future. Then we can remove the workaround things. These versions are used for testing: - freebsd13-4r (RELEASE) - freebsd14-3s (STABLE) - freebsd15-0c (CURRENT) Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Tino Reichardt Closes #17462 --- .github/workflows/scripts/qemu-2-start.sh | 143 ++++++++++++++-------- .github/workflows/zfs-qemu.yml | 10 +- 2 files changed, 99 insertions(+), 54 deletions(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 28da6700e541..7e20a98c2faf 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -12,10 +12,10 @@ OS="$1" # OS variant (virt-install --os-variant list) OSv=$OS -# compressed with .zst extension -REPO="https://github.com/mcmilk/openzfs-freebsd-images" -FREEBSD="$REPO/releases/download/v2025-04-13" -URLzs="" +# FreeBSD urls's +FREEBSD_REL="https://download.freebsd.org/releases/CI-IMAGES" +FREEBSD_SNAP="https://download.freebsd.org/snapshots/CI-IMAGES" +URLxz="" # Ubuntu mirrors UBMIRROR="https://cloud-images.ubuntu.com" @@ -72,49 +72,56 @@ case "$OS" in URL="https://download.fedoraproject.org/pub/fedora/linux/releases/42/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-42-1.1.x86_64.qcow2" ;; freebsd13-4r) - OSNAME="FreeBSD 13.4-RELEASE" + FreeBSD="13.4-RELEASE" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd13.0" - URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst" - BASH="/usr/local/bin/bash" + URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" + KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" NIC="rtl8139" ;; freebsd13-5r) - OSNAME="FreeBSD 13.5-RELEASE" + FreeBSD="13.5-RELEASE" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd13.0" - URLzs="$FREEBSD/amd64-freebsd-13.5-RELEASE.qcow2.zst" - BASH="/usr/local/bin/bash" + URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" + KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" NIC="rtl8139" ;; - freebsd14-1r) - OSNAME="FreeBSD 14.1-RELEASE" + freebsd14-2r) + FreeBSD="14.2-RELEASE" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" - URLzs="$FREEBSD/amd64-freebsd-14.1-RELEASE.qcow2.zst" - BASH="/usr/local/bin/bash" + KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" + URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" ;; - freebsd14-2r) - OSNAME="FreeBSD 14.2-RELEASE" + freebsd14-3r) + FreeBSD="14.3-RELEASE" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" - URLzs="$FREEBSD/amd64-freebsd-14.2-RELEASE.qcow2.zst" - BASH="/usr/local/bin/bash" + URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" + KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" ;; freebsd13-5s) - OSNAME="FreeBSD 13.5-STABLE" + FreeBSD="13.5-STABLE" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd13.0" - URLzs="$FREEBSD/amd64-freebsd-13.5-STABLE.qcow2.zst" - BASH="/usr/local/bin/bash" + URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" + KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" NIC="rtl8139" ;; - freebsd14-2s) - OSNAME="FreeBSD 14.2-STABLE" + freebsd14-3s) + FreeBSD="14.3-STABLE" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" - URLzs="$FREEBSD/amd64-freebsd-14.2-STABLE.qcow2.zst" - BASH="/usr/local/bin/bash" + URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" + KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - OSNAME="FreeBSD 15.0-CURRENT" + FreeBSD="15.0-CURRENT" + OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" - URLzs="$FREEBSD/amd64-freebsd-15.0-CURRENT.qcow2.zst" - BASH="/usr/local/bin/bash" + URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" + KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; tumbleweed) OSNAME="openSUSE Tumbleweed" @@ -168,31 +175,37 @@ echo "CPU=\"$CPU\"" >> $ENV sudo mkdir -p "/mnt/tests" sudo chown -R $(whoami) /mnt/tests +DISK="/dev/zvol/zpool/openzfs" +sudo zfs create -ps -b 64k -V 80g zpool/openzfs +while true; do test -b $DISK && break; sleep 1; done + # we are downloading via axel, curl and wget are mostly slower and # require more return value checking -IMG="/mnt/tests/cloudimg.qcow2" -if [ ! -z "$URLzs" ]; then - echo "Loading image $URLzs ..." - time axel -q -o "$IMG.zst" "$URLzs" - zstd -q -d --rm "$IMG.zst" +IMG="/mnt/tests/cloud-image" +if [ ! -z "$URLxz" ]; then + echo "Loading $URLxz ..." + time axel -q -o "$IMG" "$URLxz" + echo "Loading $KSRC ..." + time axel -q -o ~/src.txz $KSRC else - echo "Loading image $URL ..." + echo "Loading $URL ..." time axel -q -o "$IMG" "$URL" fi -DISK="/dev/zvol/zpool/openzfs" -FORMAT="raw" -sudo zfs create -ps -b 64k -V 80g zpool/openzfs -while true; do test -b $DISK && break; sleep 1; done echo "Importing VM image to zvol..." -sudo qemu-img dd -f qcow2 -O raw if=$IMG of=$DISK bs=4M +if [ ! -z "$URLxz" ]; then + xzcat -T0 $IMG | sudo dd of=$DISK bs=4M +else + sudo qemu-img dd -f qcow2 -O raw if=$IMG of=$DISK bs=4M +fi rm -f $IMG PUBKEY=$(cat ~/.ssh/id_ed25519.pub) -cat < /tmp/user-data +if [ ${OS:0:7} != "freebsd" ]; then + cat < /tmp/user-data #cloud-config -fqdn: $OS +hostname: $OS users: - name: root @@ -208,6 +221,19 @@ growpart: devices: ['/'] ignore_growroot_disabled: false EOF +else + cat < /tmp/user-data +#cloud-config + +hostname: $OS + +# minimized config without sudo for nuageinit of FreeBSD +growpart: + mode: auto + devices: ['/'] + ignore_growroot_disabled: false +EOF +fi sudo virsh net-update default add ip-dhcp-host \ "" --live --config @@ -223,16 +249,9 @@ sudo virt-install \ --graphics none \ --network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \ --cloud-init user-data=/tmp/user-data \ - --disk $DISK,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ + --disk $DISK,bus=virtio,cache=none,format=raw,driver.discard=unmap \ --import --noautoconsole >/dev/null -# enable KSM on Linux -if [ ${OS:0:7} != "freebsd" ]; then - sudo virsh dommemstat --domain "openzfs" --period 5 - sudo virsh node-memory-tune 100 50 1 - echo 1 | sudo tee /sys/kernel/mm/ksm/run > /dev/null -fi - # Give the VMs hostnames so we don't have to refer to them with # hardcoded IP addresses. # @@ -252,3 +271,29 @@ StrictHostKeyChecking no # small timeout, used in while loops later ConnectTimeout 1 EOF + +if [ ${OS:0:7} != "freebsd" ]; then + # enable KSM on Linux + sudo virsh dommemstat --domain "openzfs" --period 5 + sudo virsh node-memory-tune 100 50 1 + echo 1 | sudo tee /sys/kernel/mm/ksm/run > /dev/null +else + # on FreeBSD we need some more init stuff, because of nuageinit + BASH="/usr/local/bin/bash" + while pidof /usr/bin/qemu-system-x86_64 >/dev/null; do + ssh 2>/dev/null root@vm0 "uname -a" && break + done + ssh root@vm0 "pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" + ssh root@vm0 "chsh -s $BASH root" + ssh root@vm0 'sysrc qemu_guest_agent_enable="YES"' + ssh root@vm0 'sysrc cloudinit_enable="YES"' + ssh root@vm0 "pw add user zfs -w no -s $BASH" + ssh root@vm0 'mkdir -p ~zfs/.ssh' + ssh root@vm0 'echo "zfs ALL=(ALL:ALL) NOPASSWD: ALL" >> /usr/local/etc/sudoers' + ssh root@vm0 'echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config' + scp ~/.ssh/id_ed25519.pub "root@vm0:~zfs/.ssh/authorized_keys" + ssh root@vm0 'chown -R zfs ~zfs' + ssh root@vm0 'service sshd restart' + scp ~/src.txz "root@vm0:/tmp/src.txz" + ssh root@vm0 'tar -C / -zxf /tmp/src.txz' +fi diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index 1d9899ae895f..035d8be7e227 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -39,8 +39,8 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-4r", "freebsd14-2s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' - QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-2r", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-4r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" if ${{ github.event_name == 'pull_request' }}; then @@ -84,9 +84,9 @@ jobs: # rhl: almalinux8, almalinux9, centos-stream9, fedora41 # debian: debian11, debian12, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed - # FreeBSD variants of 2024-12: - # FreeBSD Release: freebsd13-4r, freebsd14-2r - # FreeBSD Stable: freebsd13-4s, freebsd14-2s + # FreeBSD variants of 2025-06: + # FreeBSD Release: freebsd13-4r, freebsd13-5r, freebsd14-1r, freebsd14-2r, freebsd14-3r + # FreeBSD Stable: freebsd13-5s, freebsd14-3s # FreeBSD Current: freebsd15-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} runs-on: ubuntu-24.04 From 9abb79767792e6e26e4e469f0523fc0e1f31aa2f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 3 Jul 2025 10:27:05 -0700 Subject: [PATCH 02/57] CI: run ztest on compressed zpool When running ztest under the CI a common failure mode is for the underlying filesystem to run out of available free space. Since the storage associated with a GitHub-hosted running is fixed, we instead create a pool and use a compressed ZFS dataset to store the ztest vdev files. This significantly increases the available capacity since the data written by ztest is highly compressible. A compression ratio of over 40:1 is conservatively achieved using the default lz4 compression. Autotrimming is enabled to ensure freed blocks are discarded from the backing cipool vdev file. Reviewed-by: Tino Reichardt Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #17501 --- .github/workflows/zloop.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/zloop.yml b/.github/workflows/zloop.yml index 7b3bf49d90d5..4ae3ccdc5484 100644 --- a/.github/workflows/zloop.yml +++ b/.github/workflows/zloop.yml @@ -12,7 +12,8 @@ jobs: zloop: runs-on: ubuntu-24.04 env: - TEST_DIR: /var/tmp/zloop + WORK_DIR: /mnt/zloop + CORE_DIR: /mnt/zloop/cores steps: - uses: actions/checkout@v4 with: @@ -40,38 +41,37 @@ jobs: sudo modprobe zfs - name: Tests run: | - sudo mkdir -p $TEST_DIR - # run for 10 minutes or at most 6 iterations for a maximum runner - # time of 60 minutes. - sudo /usr/share/zfs/zloop.sh -t 600 -I 6 -l -m 1 -- -T 120 -P 60 + sudo truncate -s 256G /mnt/vdev + sudo zpool create cipool -m $WORK_DIR -O compression=on -o autotrim=on /mnt/vdev + sudo /usr/share/zfs/zloop.sh -t 600 -I 6 -l -m 1 -c $CORE_DIR -f $WORK_DIR -- -T 120 -P 60 - name: Prepare artifacts if: failure() run: | - sudo chmod +r -R $TEST_DIR/ + sudo chmod +r -R $WORK_DIR/ - name: Ztest log if: failure() run: | - grep -B10 -A1000 'ASSERT' $TEST_DIR/*/ztest.out || tail -n 1000 $TEST_DIR/*/ztest.out + grep -B10 -A1000 'ASSERT' $CORE_DIR/*/ztest.out || tail -n 1000 $CORE_DIR/*/ztest.out - name: Gdb log if: failure() run: | - sed -n '/Backtraces (full)/q;p' $TEST_DIR/*/ztest.gdb + sed -n '/Backtraces (full)/q;p' $CORE_DIR/*/ztest.gdb - name: Zdb log if: failure() run: | - cat $TEST_DIR/*/ztest.zdb + cat $CORE_DIR/*/ztest.zdb - uses: actions/upload-artifact@v4 if: failure() with: name: Logs path: | - /var/tmp/zloop/*/ - !/var/tmp/zloop/*/vdev/ + /mnt/zloop/*/ + !/mnt/zloop/cores/*/vdev/ if-no-files-found: ignore - uses: actions/upload-artifact@v4 if: failure() with: name: Pool files path: | - /var/tmp/zloop/*/vdev/ + /mnt/zloop/cores/*/vdev/ if-no-files-found: ignore From 3c5e1bae38671dbc822ce8a0c0550507b8d582d0 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 9 Jul 2025 17:38:32 -0400 Subject: [PATCH 03/57] CI: Switch from FreeBSD 13.4 to 13.5 FreeBSD 13.4 is EOL since June 30, 2025. Reviewed-by: Brian Behlendorf Reviewed-by: Tino Reichardt Signed-off-by: Alexander Motin Closes #17519 --- .github/workflows/scripts/qemu-2-start.sh | 8 -------- .github/workflows/zfs-qemu.yml | 4 ++-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 7e20a98c2faf..885a64037f89 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -71,14 +71,6 @@ case "$OS" in OSv="fedora-unknown" URL="https://download.fedoraproject.org/pub/fedora/linux/releases/42/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-42-1.1.x86_64.qcow2" ;; - freebsd13-4r) - FreeBSD="13.4-RELEASE" - OSNAME="FreeBSD $FreeBSD" - OSv="freebsd13.0" - URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" - KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" - NIC="rtl8139" - ;; freebsd13-5r) FreeBSD="13.5-RELEASE" OSNAME="FreeBSD $FreeBSD" diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index 035d8be7e227..ea17014a117f 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -39,7 +39,7 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-4r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" @@ -85,7 +85,7 @@ jobs: # debian: debian11, debian12, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD variants of 2025-06: - # FreeBSD Release: freebsd13-4r, freebsd13-5r, freebsd14-1r, freebsd14-2r, freebsd14-3r + # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r # FreeBSD Stable: freebsd13-5s, freebsd14-3s # FreeBSD Current: freebsd15-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} From c6a32e57caa7d0bd44c49e8a4b6ed03fa7229212 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 9 Jul 2025 23:40:32 +0200 Subject: [PATCH 04/57] ZTS: Fix FreeBSD 15.0 ksh errors The package ksh93 is replaced by ksh now. This works for FreeBSD 13 and 14 also. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Alexander Motin Signed-off-by: Tino Reichardt Closes #17523 --- .github/workflows/scripts/qemu-3-deps-vm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index a581b13c2f58..904fbfbf1e1e 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -51,7 +51,7 @@ function freebsd() { echo "##[group]Install Development Tools" sudo pkg install -y autoconf automake autotools base64 checkbashisms fio \ - gdb gettext gettext-runtime git gmake gsed jq ksh93 lcov libtool lscpu \ + gdb gettext gettext-runtime git gmake gsed jq ksh lcov libtool lscpu \ pkgconf python python3 pamtester pamtester qemu-guest-agent rsync xxhash sudo pkg install -xy \ '^samba4[[:digit:]]+$' \ From 4712c76c9f21a71425e39596bf1f47cc12bbbb8e Mon Sep 17 00:00:00 2001 From: Carl George Date: Tue, 15 Jul 2025 12:00:35 -0500 Subject: [PATCH 05/57] CI: Add CentOS Stream 9/10 to the FULL_OS runner list Testing on CentOS Stream provides several months advance notice of changes coming to the RHEL kernel. This should help OpenZFS be proactive instead of reactive to new RHEL minor versions. Reviewed-by: Brian Behlendorf Reviewed-by: Tino Reichardt Signed-off-by: Carl George ZFS-CI-Type: full Closes #16904 Closes #17526 --- .github/workflows/zfs-qemu.yml | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index ea17014a117f..cda620313189 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -5,16 +5,6 @@ on: pull_request: workflow_dispatch: inputs: - include_stream9: - type: boolean - required: false - default: false - description: 'Test on CentOS 9 stream' - include_stream10: - type: boolean - required: false - default: false - description: 'Test on CentOS 10 stream' fedora_kernel_ver: type: string required: false @@ -39,7 +29,7 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" @@ -63,14 +53,6 @@ jobs: os_json=$(echo ${os_selection} | jq -c) fi - # Add optional runners - if [ "${{ github.event.inputs.include_stream9 }}" == 'true' ]; then - os_json=$(echo $os_json | jq -c '. += ["centos-stream9"]') - fi - if [ "${{ github.event.inputs.include_stream10 }}" == 'true' ]; then - os_json=$(echo $os_json | jq -c '. += ["centos-stream10"]') - fi - echo $os_json echo "os=$os_json" >> $GITHUB_OUTPUT echo "ci_type=$ci_type" >> $GITHUB_OUTPUT From 82d516c2f4f66f75c70a7a42558454edc63e6382 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 25 Jul 2025 15:47:21 -0700 Subject: [PATCH 06/57] CI: Remove Debian backports The latest Debian 11 image includes bullseye-backports as a default repository in the /etc/apt/sources.list. However, this repository has gone end of life which effectively breaks the default install. We shouldn't need anything in backports so lets unconditionally remove backports on all Debian builders to resolve the issue. Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #17569 --- .github/workflows/scripts/qemu-3-deps-vm.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index 904fbfbf1e1e..c41ecd09d52e 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -28,6 +28,7 @@ function debian() { export DEBIAN_FRONTEND="noninteractive" echo "##[group]Running apt-get update+upgrade" + sudo sed -i '/[[:alpha:]]-backports/d' /etc/apt/sources.list sudo apt-get update -y sudo apt-get upgrade -y echo "##[endgroup]" From 629108efc9d7b2f7ac53ba6357a3e3284bf14714 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Wed, 30 Jul 2025 12:45:28 -0400 Subject: [PATCH 07/57] Add CodeQL mismatched dsl_dataset_hold/_rele pairs check This check is currently limited to checking mismatches that occur in the same stack frame. It does not detect across stack frames. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Richard Yao Closes #17352 --- .github/codeql-cpp.yml | 1 + .../cpp/dslDatasetHoldReleMismatch.ql | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 .github/codeql/custom-queries/cpp/dslDatasetHoldReleMismatch.ql diff --git a/.github/codeql-cpp.yml b/.github/codeql-cpp.yml index 88b8c6086025..d99cdb559244 100644 --- a/.github/codeql-cpp.yml +++ b/.github/codeql-cpp.yml @@ -2,3 +2,4 @@ name: "Custom CodeQL Analysis" queries: - uses: ./.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql + - uses: ./.github/codeql/custom-queries/cpp/dslDatasetHoldReleMismatch.ql diff --git a/.github/codeql/custom-queries/cpp/dslDatasetHoldReleMismatch.ql b/.github/codeql/custom-queries/cpp/dslDatasetHoldReleMismatch.ql new file mode 100644 index 000000000000..fb5dae35092f --- /dev/null +++ b/.github/codeql/custom-queries/cpp/dslDatasetHoldReleMismatch.ql @@ -0,0 +1,34 @@ +/** + * @name Detect mismatched dsl_dataset_hold/_rele pairs + * @description Flags instances of issue #12014 where + * - a dataset held with dsl_dataset_hold_obj() ends up in dsl_dataset_rele_flags(), or + * - a dataset held with dsl_dataset_hold_obj_flags() ends up in dsl_dataset_rele(). + * @kind problem + * @severity error + * @tags correctness + * @id cpp/dslDatasetHoldReleMismatch + */ + +import cpp + +from Variable ds, Call holdCall, Call releCall, string message +where + ds.getType().toString() = "dsl_dataset_t *" and + holdCall.getASuccessor*() = releCall and + ( + (holdCall.getTarget().getName() = "dsl_dataset_hold_obj_flags" and + holdCall.getArgument(4).(AddressOfExpr).getOperand().(VariableAccess).getTarget() = ds and + releCall.getTarget().getName() = "dsl_dataset_rele" and + releCall.getArgument(0).(VariableAccess).getTarget() = ds and + message = "Held with dsl_dataset_hold_obj_flags but released with dsl_dataset_rele") + or + (holdCall.getTarget().getName() = "dsl_dataset_hold_obj" and + holdCall.getArgument(3).(AddressOfExpr).getOperand().(VariableAccess).getTarget() = ds and + releCall.getTarget().getName() = "dsl_dataset_rele_flags" and + releCall.getArgument(0).(VariableAccess).getTarget() = ds and + message = "Held with dsl_dataset_hold_obj but released with dsl_dataset_rele_flags") + ) +select releCall, + "Mismatched release: held with $@ but released with " + releCall.getTarget().getName() + " for dataset $@", + holdCall, holdCall.getTarget().getName(), + ds, ds.toString() From 03825197eda434f32b0574d45875437f95266a97 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 5 Aug 2025 11:18:06 +1000 Subject: [PATCH 08/57] CI: match and trim out internal timestamp for test prefix Adjust the regexes to match the test line with timestamps, then remove them for the summary. The internal timestamp is still in the full logs. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Tino Reichardt Signed-off-by: Rob Norris Closes #17045 --- .github/workflows/scripts/qemu-6-tests.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scripts/qemu-6-tests.sh b/.github/workflows/scripts/qemu-6-tests.sh index e8e6adecd62f..5ab822f4f076 100755 --- a/.github/workflows/scripts/qemu-6-tests.sh +++ b/.github/workflows/scripts/qemu-6-tests.sh @@ -21,11 +21,13 @@ function prefix() { S=$((DIFF-(M*60))) CTR=$(cat /tmp/ctr) - echo $LINE| grep -q "^Test[: ]" && CTR=$((CTR+1)) && echo $CTR > /tmp/ctr + echo $LINE| grep -q '^\[.*] Test[: ]' && CTR=$((CTR+1)) && echo $CTR > /tmp/ctr BASE="$HOME/work/zfs/zfs" COLOR="$BASE/scripts/zfs-tests-color.sh" - CLINE=$(echo $LINE| grep "^Test[ :]" | sed -e 's|/usr/local|/usr|g' \ + CLINE=$(echo $LINE| grep '^\[.*] Test[: ]' \ + | sed -e 's|^\[.*] Test|Test|g' \ + | sed -e 's|/usr/local|/usr|g' \ | sed -e 's| /usr/share/zfs/zfs-tests/tests/| |g' | $COLOR) if [ -z "$CLINE" ]; then printf "vm${ID}: %s\n" "$LINE" From 387886009163d13d735c854c64c2825db622000a Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Tue, 12 Aug 2025 13:38:55 -0700 Subject: [PATCH 09/57] FreeBSD 15.0 is now "PRERELEASE" Chase URL change from the FreeBSD project. Reviewed-by: Brian Behlendorf Signed-off-by: Colin Percival Closes #17617 --- .github/workflows/scripts/qemu-2-start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 885a64037f89..70a2364f1fc6 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -109,7 +109,7 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-CURRENT" + FreeBSD="15.0-PRERELEASE" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" From 5a5bff98f73dfd8e017e8c55544b2d82f1cd98f5 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 15 Aug 2025 09:21:23 -0700 Subject: [PATCH 10/57] CI: Add optional patch level, fix hostname on F42 In the past there have been times when we need to generate new RPMs for an existing ZFS release. Typically this happens when a new RHEL version comes out and the kernel symbols no longer match. To get users to auto-update we just bump the patch number. For example, we had to create zfs-2.1.13-1 for EL8.8 and zfs-2.1.13-2 for EL8.9. This commit adds an optional patch level text box to the github package builder runner. In addition, this commit also uses `hostnamectl` instead of `hostname` for F42+ compatibility, if available. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #17638 --- .github/workflows/scripts/qemu-4-build-vm.sh | 23 +++++++++++++++++--- .github/workflows/zfs-qemu-packages.yml | 13 ++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/.github/workflows/scripts/qemu-4-build-vm.sh b/.github/workflows/scripts/qemu-4-build-vm.sh index 17e976ebcc39..2807d9e77127 100755 --- a/.github/workflows/scripts/qemu-4-build-vm.sh +++ b/.github/workflows/scripts/qemu-4-build-vm.sh @@ -5,12 +5,13 @@ # # Usage: # -# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--poweroff] -# [--release][--repo][--tarball] +# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--patch-level NUM] +# [--poweroff][--release][--repo][--tarball] # # OS: OS name like 'fedora41' # --enable-debug: Build RPMs with '--enable-debug' (for testing) # --dkms: Build DKMS RPMs as well +# --patch-level NUM: Use a custom patch level number for packages. # --poweroff: Power-off the VM after building # --release Build zfs-release*.rpm as well # --repo After building everything, copy RPMs into /tmp/repo @@ -21,6 +22,7 @@ ENABLE_DEBUG="" DKMS="" +PATCH_LEVEL="" POWEROFF="" RELEASE="" REPO="" @@ -35,6 +37,11 @@ while [[ $# -gt 0 ]]; do DKMS=1 shift ;; + --patch-level) + PATCH_LEVEL=$2 + shift + shift + ;; --poweroff) POWEROFF=1 shift @@ -215,6 +222,10 @@ function rpm_build_and_install() { run ./autogen.sh echo "##[endgroup]" + if [ -n "$PATCH_LEVEL" ] ; then + sed -i -E 's/(Release:\s+)1/\1'$PATCH_LEVEL'/g' META + fi + echo "##[group]Configure" run ./configure --enable-debuginfo $extra echo "##[endgroup]" @@ -328,7 +339,13 @@ fi # almalinux9.5 # fedora42 source /etc/os-release -sudo hostname "$ID$VERSION_ID" + if which hostnamectl &> /dev/null ; then + # Fedora 42+ use hostnamectl + sudo hostnamectl set-hostname "$ID$VERSION_ID" + sudo hostnamectl set-hostname --pretty "$ID$VERSION_ID" +else + sudo hostname "$ID$VERSION_ID" +fi # save some sysinfo uname -a > /var/tmp/uname.txt diff --git a/.github/workflows/zfs-qemu-packages.yml b/.github/workflows/zfs-qemu-packages.yml index 5b5afe746859..d8a95954fe1a 100644 --- a/.github/workflows/zfs-qemu-packages.yml +++ b/.github/workflows/zfs-qemu-packages.yml @@ -32,6 +32,11 @@ on: options: - "Build RPMs" - "Test repo" + patch_level: + type: string + required: false + default: "" + description: "(optional) patch level number" repo_url: type: string required: false @@ -78,7 +83,13 @@ jobs: mkdir -p /tmp/repo ssh zfs@vm0 '$HOME/zfs/.github/workflows/scripts/qemu-test-repo-vm.sh' ${{ github.event.inputs.repo_url }} else - .github/workflows/scripts/qemu-4-build.sh --repo --release --dkms --tarball ${{ matrix.os }} + EXTRA="" + if [ -n "${{ github.event.inputs.patch_level }}" ] ; then + EXTRA="--patch-level ${{ github.event.inputs.patch_level }}" + fi + + .github/workflows/scripts/qemu-4-build.sh $EXTRA \ + --repo --release --dkms --tarball ${{ matrix.os }} fi - name: Prepare artifacts From 2b486e15212809be96ad72343f3312628b91ecd5 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 20 Aug 2025 17:03:34 +0200 Subject: [PATCH 11/57] CI: Add Debian 13 to the FULL_OS runner list This commit adds Debian 13 alias Trixie to the checked operating systems. The image needs to be run with UEFI support. Current Debian version overview: - Debian 11 (Bullseye) -> "oldoldstable" - Debian 12 (Bookworm) -> "oldstable" - Debian 13 (Trixie) -> new "stable" The CI will be run on Debian 12 and Debian 13 now. Debian 11 is kept, but won't be used automatically. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Tino Reichardt Closes #17648 --- .github/workflows/scripts/qemu-2-start.sh | 14 +++++++++++++- .github/workflows/scripts/qemu-3-deps-vm.sh | 2 +- .github/workflows/scripts/qemu-5-setup.sh | 14 ++++++++++++-- .github/workflows/zfs-qemu.yml | 6 +++--- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 70a2364f1fc6..62e06926e268 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -25,6 +25,10 @@ UBMIRROR="https://cloud-images.ubuntu.com" # default nic model for vm's NIC="virtio" +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in almalinux8) OSNAME="AlmaLinux 8" @@ -61,6 +65,14 @@ case "$OS" in OSNAME="Debian 12" URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" ;; + debian13) + OSNAME="Debian 13" + # TODO: Overwrite OSv to debian13 for virt-install until it's added to osinfo + OSv="debian12" + URL="https://cloud.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2" + OPTS[0]="--boot" + OPTS[1]="uefi=on" + ;; fedora41) OSNAME="Fedora 41" OSv="fedora-unknown" @@ -242,7 +254,7 @@ sudo virt-install \ --network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \ --cloud-init user-data=/tmp/user-data \ --disk $DISK,bus=virtio,cache=none,format=raw,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} >/dev/null # Give the VMs hostnames so we don't have to refer to them with # hardcoded IP addresses. diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index c41ecd09d52e..ee058b488088 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -41,7 +41,7 @@ function debian() { libelf-dev libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev \ libtool libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ - python3-cffi python3-dev python3-distlib python3-packaging \ + python3-cffi python3-dev python3-distlib python3-packaging libtirpc-dev \ python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev echo "##[endgroup]" diff --git a/.github/workflows/scripts/qemu-5-setup.sh b/.github/workflows/scripts/qemu-5-setup.sh index 6bf10024a1a6..0adcad2a99bc 100755 --- a/.github/workflows/scripts/qemu-5-setup.sh +++ b/.github/workflows/scripts/qemu-5-setup.sh @@ -12,16 +12,26 @@ source /var/tmp/env.txt # wait for poweroff to succeed PID=$(pidof /usr/bin/qemu-system-x86_64) tail --pid=$PID -f /dev/null -sudo virsh undefine openzfs +sudo virsh undefine --nvram openzfs # cpu pinning CPUSET=("0,1" "2,3") +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in freebsd*) # FreeBSD needs only 6GiB RAM=6 ;; + debian13) + RAM=8 + # Boot Debian 13 with uefi=on and secureboot=off (ZFS Kernel Module not signed) + OPTS[0]="--boot" + OPTS[1]="firmware=efi,firmware.feature0.name=secure-boot,firmware.feature0.enabled=no" + ;; *) # Linux needs more memory, but can be optimized to share it via KSM RAM=8 @@ -79,7 +89,7 @@ EOF --network bridge=virbr0,model=$NIC,mac="52:54:00:83:79:0$i" \ --disk $DISK-system,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ --disk $DISK-tests,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} done # generate some memory stats diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index cda620313189..4ebb80af1f03 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -29,7 +29,7 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" @@ -63,8 +63,8 @@ jobs: strategy: fail-fast: false matrix: - # rhl: almalinux8, almalinux9, centos-stream9, fedora41 - # debian: debian11, debian12, ubuntu22, ubuntu24 + # rhl: almalinux8, almalinux9, centos-stream9, fedora4x + # debian: debian12, debian13, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r From a70e507b5933707d8b48e58b64d3702bbb43af9e Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 5 Sep 2025 09:08:15 -0700 Subject: [PATCH 12/57] CI: Increase 'Setup QEMU' timeout to 15 minutes We've seen Fedora 42 still setting up after 10 min. Change the timeout to 15 min. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Tony Hutter Closes #17697 --- .github/workflows/zfs-qemu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index 4ebb80af1f03..a071c26a09b5 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Setup QEMU - timeout-minutes: 10 + timeout-minutes: 15 run: .github/workflows/scripts/qemu-1-setup.sh - name: Start build machine From e72a630c90cde38ee37ad75f4caf9e6074119911 Mon Sep 17 00:00:00 2001 From: Shengqi Chen Date: Thu, 4 Sep 2025 11:09:58 +0800 Subject: [PATCH 13/57] ci: use real head sha instead of GITHUB_SHA when generating CI type Because GitHub creates a merge commit on top of real head, so the check on HEAD will fail regardlessly. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Shengqi Chen Closes #17695 --- .github/workflows/scripts/generate-ci-type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/generate-ci-type.py b/.github/workflows/scripts/generate-ci-type.py index b49255e8381d..08021aabcb61 100755 --- a/.github/workflows/scripts/generate-ci-type.py +++ b/.github/workflows/scripts/generate-ci-type.py @@ -65,7 +65,7 @@ def output_type(type, reason): # check last (HEAD) commit message last_commit_message_raw = subprocess.run([ - 'git', 'show', '-s', '--format=%B', 'HEAD' + 'git', 'show', '-s', '--format=%B', head ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in last_commit_message_raw.stdout.decode().splitlines(): From 2617d6271709d7392f8836a5c742936df766c270 Mon Sep 17 00:00:00 2001 From: Shengqi Chen Date: Thu, 4 Sep 2025 11:18:01 +0800 Subject: [PATCH 14/57] ci: fix syntax issues in zfs-qemu.yml Otherwise it might become `if [ == "" ]` which is ill-formed. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Shengqi Chen Closes #17695 --- .github/workflows/zfs-qemu.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index a071c26a09b5..a5dbfc099c90 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -44,7 +44,7 @@ jobs: os_selection="$FULL_OS" fi - if [ ${{ github.event.inputs.fedora_kernel_ver }} != "" ] ; then + if ${{ github.event.inputs.fedora_kernel_ver != '' }}; then # They specified a custom kernel version for Fedora. Use only # Fedora runners. os_json=$(echo ${os_selection} | jq -c '[.[] | select(startswith("fedora"))]') @@ -53,9 +53,8 @@ jobs: os_json=$(echo ${os_selection} | jq -c) fi - echo $os_json - echo "os=$os_json" >> $GITHUB_OUTPUT - echo "ci_type=$ci_type" >> $GITHUB_OUTPUT + echo "os=$os_json" | tee -a $GITHUB_OUTPUT + echo "ci_type=$ci_type" | tee -a $GITHUB_OUTPUT qemu-vm: name: qemu-x86 From 6eca9f33e9894320856b718bc2d03d40c73fd3e3 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Wed, 10 Sep 2025 10:25:58 -0700 Subject: [PATCH 15/57] CI: Increase setup timeout to 20min, add timestamps - Increase qemu-1-setup.sh timeout to 20min since it sometimes fails to complete after 15min. - Timestamp all qemu-1-setup.sh lines to look for hangs. - Add a 'watchdog' process to print out the top running process every 30sec to help with debugging. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #17714 --- .github/workflows/scripts/qemu-1-setup.sh | 10 ++++++++++ .github/workflows/zfs-qemu.yml | 8 ++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scripts/qemu-1-setup.sh b/.github/workflows/scripts/qemu-1-setup.sh index de29ad1f57b6..0278264d9279 100755 --- a/.github/workflows/scripts/qemu-1-setup.sh +++ b/.github/workflows/scripts/qemu-1-setup.sh @@ -6,6 +6,13 @@ set -eu +# We've been seeing this script take over 15min to run. This may or +# may not be normal. Just to get a little more insight, print out +# a message to stdout with the top running process, and do this every +# 30 seconds. We can delete this watchdog later once we get a better +# handle on what the timeout value should be. +(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) & + # install needed packages export DEBIAN_FRONTEND="noninteractive" sudo apt-get -y update @@ -65,3 +72,6 @@ sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \ for i in /sys/block/s*/queue/scheduler; do echo "none" | sudo tee $i done + +# Kill off our watchdog +kill $(jobs -p) diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index a5dbfc099c90..69349678d84c 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -77,8 +77,12 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Setup QEMU - timeout-minutes: 15 - run: .github/workflows/scripts/qemu-1-setup.sh + timeout-minutes: 20 + run: | + # Add a timestamp to each line to debug timeouts + while IFS=$'\n' read -r line; do + echo "$(date +'%H:%M:%S') $line" + done < <(.github/workflows/scripts/qemu-1-setup.sh) - name: Start build machine timeout-minutes: 10 From 62e871a98bdd0bb7ce6f4ea627241768c5c90667 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 15 Sep 2025 15:15:31 -0400 Subject: [PATCH 16/57] CI: Switch FreeBSD 15 to 15.0-ALPHA2 Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Alexander Motin Closes #17749 --- .github/workflows/scripts/qemu-2-start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 62e06926e268..8439942c5a41 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -121,7 +121,7 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-PRERELEASE" + FreeBSD="15.0-ALPHA2" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" From 323e7ee2e4f85ab7e043fdd62b2e3b86bd41c128 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 25 Sep 2025 17:47:32 -0700 Subject: [PATCH 17/57] CI: update perf and bpftools with the kernel packages When updating a Fedora instance to an experimental kernel make sure to include the matching versioned perf and bpftool packages. This helps ensure there are no unexpected conflicts which would prevent the new packages from being installed. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #17791 --- .github/workflows/scripts/qemu-3-deps-vm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index ee058b488088..4a7e724586f0 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -104,7 +104,7 @@ function install_fedora_experimental_kernel { our_version="$1" sudo dnf -y copr enable @kernel-vanilla/stable sudo dnf -y copr enable @kernel-vanilla/mainline - all="$(sudo dnf list --showduplicates kernel-*)" + all="$(sudo dnf list --showduplicates kernel-* python3-perf* perf* bpftool*)" echo "Available versions:" echo "$all" From bc21a1c2c38c085c8bbb27b5d86396ac0026e700 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 26 Sep 2025 15:32:41 -0700 Subject: [PATCH 18/57] CI: Remove Buildbot references The Buildbot CI infrastructure has been fully replaced by GitHub Actions. Remove any lingering references from the repository. Reviewed-by: Alexander Motin Signed-off-by: Brian Behlendorf Closes #17794 --- .github/PULL_REQUEST_TEMPLATE.md | 5 ----- contrib/pyzfs/libzfs_core/test/test_libzfs_core.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 79809179cf13..47edc8174603 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,11 +2,6 @@ - - ### Motivation and Context diff --git a/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py b/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py index c94ae6de6bbf..136d48350ef1 100644 --- a/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py @@ -4222,7 +4222,7 @@ def reset(self): self.getRoot().reset() return - # On the Buildbot builders this may fail with "pool is busy" + # On the CI builders this may fail with "pool is busy" # Retry 5 times before raising an error retry = 0 while True: From 9d48e0150ce69624b96996c7fda8242d2ab2aff9 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 26 Sep 2025 17:52:57 -0700 Subject: [PATCH 19/57] CI: Switch FreeBSD 15 to 15.0-ALPHA3 Signed-off-by: Brian Behlendorf Reviewed-by: Alexander Motin Closes #17795 --- .github/workflows/scripts/qemu-2-start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 8439942c5a41..1c608348ffcd 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -121,7 +121,7 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-ALPHA2" + FreeBSD="15.0-ALPHA3" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" From c2a641e4b8e746b501a07bf988592351ec8430c3 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Mon, 29 Sep 2025 16:32:05 -0700 Subject: [PATCH 20/57] CI: Add ZTS -O option, log Setup Testing Machines step Add a -O option to zfs-test.sh to dump debug information on test timeout. The debug info includes: - 30 lines from 'top' - /proc//stack output of process with highest CPU usage - Last lines strace-ing process with highest CPU usage - /proc/sysrq-trigger kernel stack traces All debug information gets dumped to /dev/kmsg (Linux only). In addition, print out the VM console lines from the "Setup Testing Machines" step. We have often see VMs timeout at this step and don't know why. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #17753 --- .github/workflows/scripts/qemu-3-deps-vm.sh | 9 +-- .github/workflows/scripts/qemu-5-setup.sh | 25 ++++++--- .github/workflows/scripts/qemu-6-tests.sh | 2 +- scripts/zfs-tests.sh | 9 ++- tests/test-runner/bin/test-runner.py.in | 61 +++++++++++++++++++++ 5 files changed, 93 insertions(+), 13 deletions(-) diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index 4a7e724586f0..f67bb2f68e94 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -20,7 +20,7 @@ function archlinux() { sudo pacman -Sy --noconfirm base-devel bc cpio cryptsetup dhclient dkms \ fakeroot fio gdb inetutils jq less linux linux-headers lsscsi nfs-utils \ parted pax perf python-packaging python-setuptools qemu-guest-agent ksh \ - samba sysstat rng-tools rsync wget xxhash + samba strace sysstat rng-tools rsync wget xxhash echo "##[endgroup]" } @@ -43,7 +43,8 @@ function debian() { lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ python3-cffi python3-dev python3-distlib python3-packaging libtirpc-dev \ python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ - rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev + rsync samba strace sysstat uuid-dev watchdog wget xfslibs-dev xxhash \ + zlib1g-dev echo "##[endgroup]" } @@ -87,8 +88,8 @@ function rhel() { libuuid-devel lsscsi mdadm nfs-utils openssl-devel pam-devel pamtester \ parted perf python3 python3-cffi python3-devel python3-packaging \ kernel-devel python3-setuptools qemu-guest-agent rng-tools rpcgen \ - rpm-build rsync samba sysstat systemd watchdog wget xfsprogs-devel xxhash \ - zlib-devel + rpm-build rsync samba strace sysstat systemd watchdog wget xfsprogs-devel \ + xxhash zlib-devel echo "##[endgroup]" } diff --git a/.github/workflows/scripts/qemu-5-setup.sh b/.github/workflows/scripts/qemu-5-setup.sh index 0adcad2a99bc..4869c1003e48 100755 --- a/.github/workflows/scripts/qemu-5-setup.sh +++ b/.github/workflows/scripts/qemu-5-setup.sh @@ -108,19 +108,30 @@ echo '*/5 * * * * /root/cronjob.sh' > crontab.txt sudo crontab crontab.txt rm crontab.txt -# check if the machines are okay -echo "Waiting for vm's to come up... (${VMs}x CPU=$CPU RAM=$RAM)" -for ((i=1; i<=VMs; i++)); do - .github/workflows/scripts/qemu-wait-for-vm.sh vm$i -done -echo "All $VMs VMs are up now." - # Save the VM's serial output (ttyS0) to /var/tmp/console.txt # - ttyS0 on the VM corresponds to a local /dev/pty/N entry # - use 'virsh ttyconsole' to lookup the /dev/pty/N entry for ((i=1; i<=VMs; i++)); do mkdir -p $RESPATH/vm$i read "pty" <<< $(sudo virsh ttyconsole vm$i) + + # Create the file so we can tail it, even if there's no output. + touch $RESPATH/vm$i/console.txt + sudo nohup bash -c "cat $pty > $RESPATH/vm$i/console.txt" & + + # Write all VM boot lines to the console to aid in debugging failed boots. + # The boot lines from all the VMs will be munged together, so prepend each + # line with the vm hostname (like 'vm1:'). + (while IFS=$'\n' read -r line; do echo "vm$i: $line" ; done < <(sudo tail -f $RESPATH/vm$i/console.txt)) & + done echo "Console logging for ${VMs}x $OS started." + + +# check if the machines are okay +echo "Waiting for vm's to come up... (${VMs}x CPU=$CPU RAM=$RAM)" +for ((i=1; i<=VMs; i++)); do + .github/workflows/scripts/qemu-wait-for-vm.sh vm$i +done +echo "All $VMs VMs are up now." diff --git a/.github/workflows/scripts/qemu-6-tests.sh b/.github/workflows/scripts/qemu-6-tests.sh index 5ab822f4f076..ca6ac77f146d 100755 --- a/.github/workflows/scripts/qemu-6-tests.sh +++ b/.github/workflows/scripts/qemu-6-tests.sh @@ -111,7 +111,7 @@ fi sudo dmesg -c > dmesg-prerun.txt mount > mount.txt df -h > df-prerun.txt -$TDIR/zfs-tests.sh -vK -s 3GB -T $TAGS +$TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS RV=$? df -h > df-postrun.txt echo $RV > tests-exitcode.txt diff --git a/scripts/zfs-tests.sh b/scripts/zfs-tests.sh index 2906d73442c2..15d3a0eb9503 100755 --- a/scripts/zfs-tests.sh +++ b/scripts/zfs-tests.sh @@ -37,6 +37,7 @@ DEBUG="" CLEANUP="yes" CLEANUPALL="no" KMSG="" +TIMEOUT_DEBUG="" LOOPBACK="yes" STACK_TRACER="no" FILESIZE="4G" @@ -363,6 +364,7 @@ OPTIONS: -k Disable cleanup after test failure -K Log test names to /dev/kmsg -f Use files only, disables block device tests + -O Dump debugging info to /dev/kmsg on test timeout -S Enable stack tracer (negative performance impact) -c Only create and populate constrained path -R Automatically rerun failing tests @@ -401,7 +403,7 @@ $0 -x EOF } -while getopts 'hvqxkKfScRmn:d:Ds:r:?t:T:u:I:' OPTION; do +while getopts 'hvqxkKfScRmOn:d:Ds:r:?t:T:u:I:' OPTION; do case $OPTION in h) usage @@ -444,6 +446,9 @@ while getopts 'hvqxkKfScRmn:d:Ds:r:?t:T:u:I:' OPTION; do export NFS=1 . "$nfsfile" ;; + O) + TIMEOUT_DEBUG="yes" + ;; d) FILEDIR="$OPTARG" ;; @@ -766,6 +771,7 @@ msg "${TEST_RUNNER}" \ "${DEBUG:+-D}" \ "${KMEMLEAK:+-m}" \ "${KMSG:+-K}" \ + "${TIMEOUT_DEBUG:+-O}" \ "-c \"${RUNFILES}\"" \ "-T \"${TAGS}\"" \ "-i \"${STF_SUITE}\"" \ @@ -776,6 +782,7 @@ msg "${TEST_RUNNER}" \ ${DEBUG:+-D} \ ${KMEMLEAK:+-m} \ ${KMSG:+-K} \ + ${TIMEOUT_DEBUG:+-O} \ -c "${RUNFILES}" \ -T "${TAGS}" \ -i "${STF_SUITE}" \ diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in index ea01b473b590..835b3b35f547 100755 --- a/tests/test-runner/bin/test-runner.py.in +++ b/tests/test-runner/bin/test-runner.py.in @@ -32,6 +32,7 @@ from select import select from subprocess import PIPE from subprocess import Popen from subprocess import check_output +from subprocess import run from threading import Timer from time import time, CLOCK_MONOTONIC from os.path import exists @@ -185,6 +186,63 @@ User: %s ''' % (self.pathname, self.identifier, self.outputdir, self.timeout, self.user) def kill_cmd(self, proc, options, kmemleak, keyboard_interrupt=False): + + """ + We're about to kill a command due to a timeout. + If we're running with the -O option, then dump debug info about the + process with the highest CPU usage to /dev/kmsg (Linux only). This can + help debug the timeout. + + Debug info includes: + - 30 lines from 'top' + - /proc//stack output of process with highest CPU usage + - Last lines strace-ing process with highest CPU usage + """ + if exists("/dev/kmsg"): + c = """ +TOP_OUT="$(COLUMNS=160 top -b -n 1 | head -n 30)" +read -r PID CMD <<< $(echo "$TOP_OUT" | /usr/bin/awk \ +"/COMMAND/{ + print_next=1 + next +} +{ + if (print_next == 1) { + print \\$1\\" \\"\\$12 + exit + } +}") +echo "##### ZTS timeout debug #####" +echo "----- top -----" +echo "$TOP_OUT" +echo "----- /proc/$PID/stack ($CMD)) -----" +cat /proc/$PID/stack +echo "----- strace ($CMD) -----" +TMPFILE="$(mktemp --suffix=ZTS)" +/usr/bin/strace -k --stack-traces -p $PID &> "$TMPFILE" & +sleep 0.1 +killall strace +tail -n 30 $TMPFILE +rm "$TMPFILE" +echo "##### /proc/sysrq-trigger stack #####" +""" + c = "sudo bash -c '" + c + "'" + data = run(c, capture_output=True, shell=True, text=True) + out = data.stdout + try: + kp = Popen([SUDO, "sh", "-c", + "echo '" + out + "' > /dev/kmsg"]) + kp.wait() + + """ + Trigger kernel stack traces + """ + kp = Popen([SUDO, "sh", "-c", + "echo l > /proc/sysrq-trigger"]) + kp.wait() + except Exception: + pass + """ Kill a running command due to timeout, or ^C from the keyboard. If sudo is required, this user was verified previously. @@ -1097,6 +1155,9 @@ def parse_args(): parser.add_option('-o', action='callback', callback=options_cb, default=BASEDIR, dest='outputdir', type='string', metavar='outputdir', help='Specify an output directory.') + parser.add_option('-O', action='store_true', default=False, + dest='timeout_debug', + help='Dump debugging info to /dev/kmsg on test timeout') parser.add_option('-i', action='callback', callback=options_cb, default=TESTDIR, dest='testdir', type='string', metavar='testdir', help='Specify a test directory.') From 46ee121c39cc9097b4e688ad5eef0d3244215fb0 Mon Sep 17 00:00:00 2001 From: Shreshth3 <66148173+Shreshth3@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:15:46 -0700 Subject: [PATCH 21/57] docs: fix a few small typos (#17804) Signed-off-by: Shreshth Srivastava Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Tony Hutter --- .github/ISSUE_TEMPLATE/feature_request.md | 2 +- contrib/intel_qat/readme.md | 2 +- etc/init.d/README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 9b50a4a3d96e..f3d4316f6f67 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -14,7 +14,7 @@ Please check our issue tracker before opening a new feature request. Filling out the following template will help other contributors better understand your proposed feature. --> -### Describe the feature would like to see added to OpenZFS +### Describe the feature you would like to see added to OpenZFS