diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 4a5f9fd93f4f..0687b574a1fe 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -4,6 +4,7 @@ .\" Copyright (c) 2019, 2021 by Delphix. All rights reserved. .\" Copyright (c) 2019 Datto Inc. .\" Copyright (c) 2023, 2024, 2025, Klara, Inc. +.\" Copyright (c) 2025 ConnectWise, Inc. .\" The contents of this file are subject to the terms of the Common Development .\" and Distribution License (the "License"). You may not use this file except .\" in compliance with the License. You can obtain a copy of the license at @@ -17,7 +18,7 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd May 29, 2025 +.Dd June 30, 2025 .Dt ZFS 4 .Os . @@ -2087,7 +2088,12 @@ working on a scrub between TXG flushes. .It Sy zfs_scrub_error_blocks_per_txg Ns = Ns Sy 4096 Pq uint Error blocks to be scrubbed in one txg. . -.It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2 hour Pc Pq uint +.It Sy zfs_scrub_decompress Ns = Ns Sy 0 Ns | Ns 1 Pq uint +When set will cause scrub to decompress blocks it reads so that it will catch +the rare type of corruption where the checksum matches the data, but +decompression fails. +. +.It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2 hours Pc Pq uint To preserve progress across reboots, the sequential scan algorithm periodically needs to stop metadata scanning and issue all the verification I/O to disk. The frequency of this flushing is determined by this tunable. diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 5052992d775c..87a21f3c9203 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -26,6 +26,7 @@ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright 2025 ConnectWise, Inc. */ #include @@ -56,6 +57,7 @@ #include #include #include +#include #ifdef _KERNEL #include #endif @@ -246,6 +248,13 @@ static int zfs_free_bpobj_enabled = 1; /* Error blocks to be scrubbed in one txg. */ static uint_t zfs_scrub_error_blocks_per_txg = 1 << 12; +/* + * When set to a non-zero value will cause scrub to decompress blocks it + * reads so that it will catch the rare type of corruption where the + * checksum matches the data, but decompression fails. + */ +static uint_t zfs_scrub_decompress = 0; + /* the order has to match pool_scan_type */ static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = { NULL, @@ -4874,6 +4883,41 @@ dsl_scan_scrub_done(zio_t *zio) blkptr_t *bp = zio->io_bp; dsl_scan_io_queue_t *queue = zio->io_private; + /* + * If the block was read without error, is compressed, and we're doing + * a decompression scrub we will now attempt to decompress it to + * further verify its integrity. + */ + if (zio->io_error == 0 && (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) && + zfs_scrub_decompress != 0) { + abd_t *dabd = abd_alloc_linear(BP_GET_LSIZE(bp), B_FALSE); + if (dabd != NULL) { + if (zio_decompress_data(BP_GET_COMPRESS(bp), + zio->io_abd, dabd, + abd_get_size(zio->io_abd), abd_get_size(dabd), + &zio->io_prop.zp_complevel) != 0) { + // checksum was valid but decompression failed + if (dsl_errorscrubbing(spa->spa_dsl_pool) && + !dsl_errorscrub_is_paused( + spa->spa_dsl_pool->dp_scan)) { + atomic_inc_64(&spa->spa_dsl_pool-> + dp_scan->errorscrub_phys + .dep_errors); + } else { + atomic_inc_64(&spa->spa_dsl_pool-> + dp_scan->scn_phys.scn_errors); + } + // errlog this so it's in the zpool status -v + spa_log_error(zio->io_spa, &zio->io_bookmark, + BP_GET_LOGICAL_BIRTH(zio->io_bp)); + (void) zfs_ereport_post(FM_EREPORT_ZFS_DATA, + zio->io_spa, NULL, &zio->io_bookmark, + zio, 0); + } + abd_free(dabd); + } + } + abd_free(zio->io_abd); if (queue == NULL) { @@ -5362,3 +5406,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, resilver_defer_percent, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, "Error blocks to be scrubbed in one txg"); + +ZFS_MODULE_PARAM(zfs, zfs_, scrub_decompress, UINT, ZMOD_RW, + "Scrub will decompress compressed blocks"); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 9fad8946f4f3..7662f95d7cbd 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -546,7 +546,7 @@ tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_multiple_pools', 'zpool_error_scrub_001_pos', 'zpool_error_scrub_002_pos', 'zpool_error_scrub_003_pos', 'zpool_error_scrub_004_pos', - 'zpool_scrub_date_range_001'] + 'zpool_scrub_date_range_001', 'zpool_scrub_decompress'] tags = ['functional', 'cli_root', 'zpool_scrub'] [tests/functional/cli_root/zpool_set] diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index e273c9f85c28..b9d912c2af1d 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -81,6 +81,7 @@ SCAN_LEGACY scan_legacy zfs_scan_legacy SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit SCRUB_AFTER_EXPAND scrub_after_expand zfs_scrub_after_expand +SCRUB_DECOMPRESS scrub_decompress zfs_scrub_decompress SEND_HOLES_WITHOUT_BIRTH_TIME send_holes_without_birth_time send_holes_without_birth_time SLOW_IO_EVENTS_PER_SECOND slow_io_events_per_second zfs_slow_io_events_per_second SPA_ASIZE_INFLATION spa.asize_inflation spa_asize_inflation diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index c2542287c1d7..e8daaaf446bd 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1250,6 +1250,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_004_pos.ksh \ + functional/cli_root/zpool_scrub/zpool_scrub_decompress.ksh \ functional/cli_root/zpool_set/cleanup.ksh \ functional/cli_root/zpool_set/setup.ksh \ functional/cli_root/zpool/setup.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_decompress.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_decompress.ksh new file mode 100755 index 000000000000..d4b8f667a9e0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_decompress.ksh @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2025 ConnectWise Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg + +# +# DESCRIPTION: +# 'zpool scrub' with zfs_scrub_decompress set works +# +# STRATEGY: +# 1 Set zfs_scrub_decompress tunable. +# 2. Start a scrub and wait for it to finish. +# 3. Repeat this with zfs_scrub_decompress not set. +# + +function cleanup +{ + set_tunable32 SCRUB_DECOMPRESS 0 + zfs destroy $TESTPOOL/newfs +} + +verify_runnable "global" + +log_onexit cleanup + +log_assert "Scrub with decompression tunable set - works." + +# Create out testing dataset +log_must zfs create $TESTPOOL/newfs +# Make sure compression is on +log_must zfs set compression=on $TESTPOOL/newfs +typeset file="/$TESTPOOL/newfs/$TESTFILE0" +# Create some data in our dataset +log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync +# Make sure data is compressible +log_must eval "echo 'aaaaaaaa' >> "$file + +# Enable decompression of blocks read by scrub +log_must set_tunable32 SCRUB_DECOMPRESS 1 +# Run and wait for scrub +log_must zpool scrub -w $TESTPOOL + +# Disable decompression of blocks read by scrub +log_must set_tunable32 SCRUB_DECOMPRESS 0 +# Run and wait for scrub +log_must zpool scrub -w $TESTPOOL + +log_pass "Scrub with decompression tunable set - works."