diff --git a/include/sys/zio.h b/include/sys/zio.h index 211a7d0b08ae..b9c4374a5526 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -486,7 +486,7 @@ struct zio { spa_t *io_spa; blkptr_t *io_bp; blkptr_t *io_bp_override; - blkptr_t io_bp_copy; + blkptr_t *io_bp_copy; list_t io_parent_list; list_t io_child_list; zio_t *io_logical; @@ -498,7 +498,7 @@ struct zio { zio_done_func_t *io_done; void *io_private; int64_t io_prev_space_delta; /* DMU private */ - blkptr_t io_bp_orig; + blkptr_t *io_bp_orig; /* io_lsize != io_orig_size iff this is a raw write */ uint64_t io_lsize; @@ -688,6 +688,8 @@ extern void zio_resume_wait(spa_t *spa); extern int zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, enum blk_config_flag blk_config, enum blk_verify_flag blk_verify); +extern void zio_force_bp(zio_t *zio, const blkptr_t *bp); + /* * Initial setup and teardown. */ diff --git a/module/zfs/arc.c b/module/zfs/arc.c index df41e3b49204..56d138e0c476 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -6985,7 +6985,7 @@ arc_write_done(zio_t *zio) * buffers from the hash table when we arc_free(). */ if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { - if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp)) + if (!BP_EQUAL(zio->io_bp_orig, zio->io_bp)) panic("bad overwrite, hdr=%p exists=%p", (void *)hdr, (void *)exists); ASSERT(zfs_refcount_is_zero( @@ -6998,7 +6998,7 @@ arc_write_done(zio_t *zio) } else if (zio->io_flags & ZIO_FLAG_NOPWRITE) { /* nopwrite */ ASSERT(zio->io_prop.zp_nopwrite); - if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp)) + if (!BP_EQUAL(zio->io_bp_orig, zio->io_bp)) panic("bad nopwrite, hdr=%p exists=%p", (void *)hdr, (void *)exists); } else { @@ -8964,8 +8964,7 @@ l2arc_read_done(zio_t *zio) */ ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd || (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd)); - zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ - zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ + zio_force_bp(zio, &cb->l2rcb_bp); /* XXX fix in L2ARC 2.0 */ zio->io_prop.zp_complevel = hdr->b_complevel; valid_cksum = arc_cksum_is_equal(hdr, zio); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 7403f10d91b7..c56fbc6b11fb 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -4667,10 +4667,10 @@ dbuf_lightweight_done(zio_t *zio) dmu_tx_t *tx = os->os_synctx; if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) { - ASSERT(BP_EQUAL(zio->io_bp, &zio->io_bp_orig)); + ASSERT(BP_EQUAL(zio->io_bp, zio->io_bp_orig)); } else { dsl_dataset_t *ds = os->os_dsl_dataset; - (void) dsl_dataset_block_kill(ds, &zio->io_bp_orig, tx, B_TRUE); + (void) dsl_dataset_block_kill(ds, zio->io_bp_orig, tx, B_TRUE); dsl_dataset_block_born(ds, zio->io_bp, tx); } @@ -4929,7 +4929,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) dmu_buf_impl_t *db = vdb; dnode_t *dn; blkptr_t *bp = zio->io_bp; - blkptr_t *bp_orig = &zio->io_bp_orig; + blkptr_t *bp_orig = zio->io_bp_orig; spa_t *spa = zio->io_spa; int64_t delta; uint64_t fill = 0; @@ -5077,7 +5077,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) { (void) buf; dmu_buf_impl_t *db = vdb; - blkptr_t *bp_orig = &zio->io_bp_orig; + blkptr_t *bp_orig = zio->io_bp_orig; blkptr_t *bp = db->db_blkptr; objset_t *os = db->db_objset; dmu_tx_t *tx = os->os_synctx; diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index f7f808d5b8f7..bd9d125f2fc0 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1908,7 +1908,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE); if (dr->dt.dl.dr_nopwrite) { blkptr_t *bp = zio->io_bp; - blkptr_t *bp_orig = &zio->io_bp_orig; + blkptr_t *bp_orig = zio->io_bp_orig; uint8_t chksum = BP_GET_CHECKSUM(bp_orig); ASSERT(BP_EQUAL(bp, bp_orig)); @@ -1963,7 +1963,7 @@ dmu_sync_late_arrival_done(zio_t *zio) zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); if (!BP_IS_HOLE(bp)) { - blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig; + blkptr_t *bp_orig __maybe_unused = zio->io_bp_orig; ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE)); ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); ASSERT(BP_GET_BIRTH(zio->io_bp) == zio->io_txg); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index a77f338bdfd3..dc9b944bbaa1 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1528,7 +1528,7 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) { (void) abuf; blkptr_t *bp = zio->io_bp; - blkptr_t *bp_orig = &zio->io_bp_orig; + blkptr_t *bp_orig = zio->io_bp_orig; objset_t *os = arg; if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 4cf8912d4269..588e9ea3b4ab 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -78,6 +78,7 @@ static int zio_deadman_log_all = B_FALSE; */ static kmem_cache_t *zio_cache; static kmem_cache_t *zio_link_cache; +static kmem_cache_t *zio_bp_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; #if defined(ZFS_DEBUG) && !defined(_KERNEL) @@ -151,6 +152,8 @@ typedef struct zio_stats { kstat_named_t ziostat_alloc_class_fallbacks; kstat_named_t ziostat_gang_writes; kstat_named_t ziostat_gang_multilevel; + kstat_named_t ziostat_zio_total; + kstat_named_t ziostat_zio_bp_total; } zio_stats_t; static zio_stats_t zio_stats = { @@ -158,6 +161,8 @@ static zio_stats_t zio_stats = { { "alloc_class_fallbacks", KSTAT_DATA_UINT64 }, { "gang_writes", KSTAT_DATA_UINT64 }, { "gang_multilevel", KSTAT_DATA_UINT64 }, + { "zio_total", KSTAT_DATA_UINT64 }, + { "zio_bp_total", KSTAT_DATA_UINT64 }, }; struct { @@ -165,6 +170,8 @@ struct { wmsum_t ziostat_alloc_class_fallbacks; wmsum_t ziostat_gang_writes; wmsum_t ziostat_gang_multilevel; + wmsum_t ziostat_zio_total; + wmsum_t ziostat_zio_bp_total; } ziostat_sums; #define ZIOSTAT_BUMP(stat) wmsum_add(&ziostat_sums.stat, 1); @@ -190,6 +197,10 @@ zio_kstats_update(kstat_t *ksp, int rw) wmsum_value(&ziostat_sums.ziostat_gang_writes); zs->ziostat_gang_multilevel.value.ui64 = wmsum_value(&ziostat_sums.ziostat_gang_multilevel); + zs->ziostat_zio_total.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_zio_total); + zs->ziostat_zio_bp_total.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_zio_bp_total); return (0); } @@ -202,11 +213,15 @@ zio_init(void) sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0); zio_link_cache = kmem_cache_create("zio_link_cache", sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + zio_bp_cache = kmem_cache_create("zio_bp_cache", + sizeof (blkptr_t), 0, NULL, NULL, NULL, NULL, NULL, 0); wmsum_init(&ziostat_sums.ziostat_total_allocations, 0); wmsum_init(&ziostat_sums.ziostat_alloc_class_fallbacks, 0); wmsum_init(&ziostat_sums.ziostat_gang_writes, 0); wmsum_init(&ziostat_sums.ziostat_gang_multilevel, 0); + wmsum_init(&ziostat_sums.ziostat_zio_total, 0); + wmsum_init(&ziostat_sums.ziostat_zio_bp_total, 0); zio_ksp = kstat_create("zfs", 0, "zio_stats", "misc", KSTAT_TYPE_NAMED, sizeof (zio_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); @@ -352,7 +367,10 @@ zio_fini(void) wmsum_fini(&ziostat_sums.ziostat_alloc_class_fallbacks); wmsum_fini(&ziostat_sums.ziostat_gang_writes); wmsum_fini(&ziostat_sums.ziostat_gang_multilevel); + wmsum_fini(&ziostat_sums.ziostat_zio_total); + wmsum_fini(&ziostat_sums.ziostat_zio_bp_total); + kmem_cache_destroy(zio_bp_cache); kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_cache); @@ -939,6 +957,14 @@ zio_bookmark_compare(const void *x1, const void *x2) return (0); } +static inline blkptr_t * +zio_dup_bp(const blkptr_t *bp) +{ + blkptr_t *nbp = kmem_cache_alloc(zio_bp_cache, KM_SLEEP); + *nbp = *bp; + return (nbp); +} + /* * ========================================================================== * Create the various types of I/O (read, write, free, etc) @@ -965,6 +991,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0); zio = kmem_cache_alloc(zio_cache, KM_SLEEP); + ZIOSTAT_BUMP(ziostat_zio_total); memset(zio, 0, sizeof (zio_t)); mutex_init(&zio->io_lock, NULL, MUTEX_NOLOCKDEP, NULL); @@ -988,12 +1015,17 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, if (bp != NULL) { if (type != ZIO_TYPE_WRITE || zio->io_child_type == ZIO_CHILD_DDT) { - zio->io_bp_copy = *bp; - zio->io_bp = &zio->io_bp_copy; /* so caller can free */ + /* so caller can free */ + zio->io_bp = zio->io_bp_copy = zio_dup_bp(bp); + ZIOSTAT_BUMP(ziostat_zio_bp_total); } else { zio->io_bp = (blkptr_t *)bp; } - zio->io_bp_orig = *bp; + if (type == ZIO_TYPE_WRITE && + zio->io_child_type != ZIO_CHILD_VDEV) { + zio->io_bp_orig = zio_dup_bp(bp); + ZIOSTAT_BUMP(ziostat_zio_bp_total); + } if (zio->io_child_type == ZIO_CHILD_LOGICAL) zio->io_logical = zio; if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp)) @@ -1050,9 +1082,31 @@ zio_destroy(zio_t *zio) list_destroy(&zio->io_child_list); mutex_destroy(&zio->io_lock); cv_destroy(&zio->io_cv); + if (zio->io_bp_copy != NULL) + kmem_cache_free(zio_bp_cache, zio->io_bp_copy); + if (zio->io_bp_orig != NULL) + kmem_cache_free(zio_bp_cache, zio->io_bp_orig); kmem_cache_free(zio_cache, zio); } +/* + * Forcibly set the block pointer in a completed ZIO. This is only used from + * l2arc_read_done() to set the original BP on the L2ARC read ZIO so that it + * looks sensible when sent back to arc_read_done(). Shouldn't be used anywhere + * else. If that changes, that is, you do something about the "XXX fix in L2ARC + * 2.0" comment in arc.c that has existed since prehistory, please rewrite this + * comment too. + */ +void +zio_force_bp(zio_t *zio, const blkptr_t *bp) +{ + ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); + ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DONE); + ASSERT0P(zio->io_bp_copy); + zio->io_bp = zio->io_bp_copy = zio_dup_bp(bp); + ZIOSTAT_BUMP(ziostat_zio_bp_total); +} + /* * ZIO intended to be between others. Provides synchronization at READY * and DONE pipeline stages and calls the respective callbacks. @@ -1810,7 +1864,7 @@ zio_read_bp_init(zio_t *zio) uint64_t psize = BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); - ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy); + ASSERT3P(zio->io_bp, ==, zio->io_bp_copy); if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && @@ -1900,7 +1954,7 @@ zio_write_bp_init(zio_t *zio) * it as a regular write I/O. */ zio->io_bp_override = NULL; - *bp = zio->io_bp_orig; + *bp = *zio->io_bp_orig; zio->io_pipeline = zio->io_orig_pipeline; } @@ -2032,7 +2086,7 @@ zio_write_compress(zio_t *zio) * it as a regular write I/O. */ zio->io_bp_override = NULL; - *bp = zio->io_bp_orig; + *bp = *zio->io_bp_orig; zio->io_pipeline = zio->io_orig_pipeline; } else if ((zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) != 0 && @@ -2094,7 +2148,7 @@ zio_write_compress(zio_t *zio) } if (psize == 0) { - if (BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig) != 0 && + if (BP_GET_LOGICAL_BIRTH(zio->io_bp_orig) != 0 && spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) { BP_SET_LSIZE(bp, lsize); BP_SET_TYPE(bp, zp->zp_type); @@ -2138,7 +2192,7 @@ zio_free_bp_init(zio_t *zio) zio->io_pipeline = ZIO_DDT_FREE_PIPELINE; } - ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy); + ASSERT3P(zio->io_bp, ==, zio->io_bp_copy); return (zio); } @@ -3109,7 +3163,7 @@ zio_write_gang_member_ready(zio_t *zio) * If we're getting direct-invoked from zio_write_gang_block(), * the bp_orig will be set. */ - ASSERT(BP_IS_HOLE(&zio->io_bp_orig) || + ASSERT(BP_IS_HOLE(zio->io_bp_orig) || zio->io_flags & ZIO_FLAG_PREALLOCATED); ASSERT(zio->io_child_type == ZIO_CHILD_GANG); @@ -3289,7 +3343,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) if (allocated) { metaslab_trace_move(&cio_list, &cio->io_alloc_list); metaslab_group_alloc_increment_all(spa, - &cio->io_bp_orig, zio->io_allocator, flags, psize, + cio->io_bp_orig, zio->io_allocator, flags, psize, cio); } /* @@ -3358,7 +3412,7 @@ static zio_t * zio_nop_write(zio_t *zio) { blkptr_t *bp = zio->io_bp; - blkptr_t *bp_orig = &zio->io_bp_orig; + blkptr_t *bp_orig = zio->io_bp_orig; zio_prop_t *zp = &zio->io_prop; ASSERT(BP_IS_HOLE(bp)); @@ -3930,9 +3984,9 @@ zio_ddt_write(zio_t *zio) */ if (zp->zp_rewrite) { uint64_t orig_logical_birth = - BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig); + BP_GET_LOGICAL_BIRTH(zio->io_bp_orig); ddt_bp_fill(ddp, v, bp, orig_logical_birth); - if (BP_EQUAL(bp, &zio->io_bp_orig)) { + if (BP_EQUAL(bp, zio->io_bp_orig)) { /* We can skip accounting. */ zio->io_flags |= ZIO_FLAG_NOPWRITE; ddt_exit(ddt); @@ -4240,12 +4294,12 @@ zio_dva_allocate(zio_t *zio) } if (zio->io_flags & ZIO_FLAG_PREALLOCATED) { ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_GANG); - memcpy(zio->io_bp->blk_dva, zio->io_bp_orig.blk_dva, + memcpy(zio->io_bp->blk_dva, zio->io_bp_orig->blk_dva, 3 * sizeof (dva_t)); BP_SET_LOGICAL_BIRTH(zio->io_bp, - BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig)); + BP_GET_LOGICAL_BIRTH(zio->io_bp_orig)); BP_SET_PHYSICAL_BIRTH(zio->io_bp, - BP_GET_RAW_PHYSICAL_BIRTH(&zio->io_bp_orig)); + BP_GET_RAW_PHYSICAL_BIRTH(zio->io_bp_orig)); return (zio); } @@ -4377,7 +4431,7 @@ zio_dva_allocate(zio_t *zio) * For rewrite operations, preserve the logical birth time * but set the physical birth time to the current txg. */ - uint64_t logical_birth = BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig); + uint64_t logical_birth = BP_GET_LOGICAL_BIRTH(zio->io_bp_orig); ASSERT3U(logical_birth, <=, zio->io_txg); BP_SET_BIRTH(zio->io_bp, logical_birth, zio->io_txg); BP_SET_REWRITE(zio->io_bp, 1); @@ -5315,8 +5369,13 @@ zio_ready(zio_t *zio) } #ifdef ZFS_DEBUG - if (bp != NULL && bp != &zio->io_bp_copy) - zio->io_bp_copy = *bp; + if (bp != NULL && bp != zio->io_bp_copy) { + if (zio->io_bp_copy == NULL) + zio->io_bp_copy = zio_dup_bp(bp); + /* Not bumping ziostat_zio_bp_total for debug. */ + else + *zio->io_bp_copy = *bp; + } #endif if (zio->io_error != 0) { @@ -5462,9 +5521,11 @@ zio_done(zio_t *zio) ASSERT0(zio->io_children[c][w]); if (zio->io_bp != NULL && !BP_IS_EMBEDDED(zio->io_bp)) { - ASSERT(memcmp(zio->io_bp, &zio->io_bp_copy, - sizeof (blkptr_t)) == 0 || - (zio->io_bp == zio_unique_parent(zio)->io_bp)); + if (zio->io_bp_copy) { + ASSERT(memcmp(zio->io_bp, zio->io_bp_copy, + sizeof (blkptr_t)) == 0 || + (zio->io_bp == zio_unique_parent(zio)->io_bp)); + } if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { @@ -5475,7 +5536,7 @@ zio_done(zio_t *zio) BP_GET_NDVAS(zio->io_bp))); } if (zio->io_flags & ZIO_FLAG_NOPWRITE) - VERIFY(BP_EQUAL(zio->io_bp, &zio->io_bp_orig)); + VERIFY(BP_EQUAL(zio->io_bp, zio->io_bp_orig)); } /*