diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 6439b1bc9..bf44d9c32 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -208,7 +208,7 @@ sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, sublivelist_verify_block_t svb = { .svb_dva = bp->blk_dva[i], .svb_allocated_txg = - BP_GET_LOGICAL_BIRTH(bp) + BP_GET_BIRTH(bp) }; if (zfs_btree_find(&sv->sv_leftover, &svb, @@ -2569,7 +2569,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, (u_longlong_t)BP_GET_PSIZE(bp), (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), - (u_longlong_t)BP_GET_BIRTH(bp)); + (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp)); if (bp_freed) (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), " %s", "FREE"); @@ -2619,7 +2619,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, { int err = 0; - if (BP_GET_LOGICAL_BIRTH(bp) == 0) + if (BP_GET_BIRTH(bp) == 0) return (0); print_indirect(spa, bp, zb, dnp); @@ -2807,7 +2807,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) (void) arg, (void) tx; char blkbuf[BP_SPRINTF_LEN]; - if (BP_GET_LOGICAL_BIRTH(bp) != 0) { + if (BP_GET_BIRTH(bp) != 0) { snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("\t%s\n", blkbuf); } @@ -2848,7 +2848,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) (void) arg, (void) tx; char blkbuf[BP_SPRINTF_LEN]; - ASSERT(BP_GET_LOGICAL_BIRTH(bp) != 0); + ASSERT(BP_GET_BIRTH(bp) != 0); snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed); (void) printf("\t%s\n", blkbuf); return (0); @@ -5922,11 +5922,11 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, * entry back to the block pointer before we claim it. */ if (v == DDT_PHYS_FLAT) { - ASSERT3U(BP_GET_BIRTH(bp), ==, + ASSERT3U(BP_GET_PHYSICAL_BIRTH(bp), ==, ddt_phys_birth(dde->dde_phys, v)); tempbp = *bp; ddt_bp_fill(dde->dde_phys, v, &tempbp, - BP_GET_BIRTH(bp)); + BP_GET_PHYSICAL_BIRTH(bp)); bp = &tempbp; } @@ -6152,7 +6152,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (zb->zb_level == ZB_DNODE_LEVEL) return (0); - if (dump_opt['b'] >= 5 && BP_GET_LOGICAL_BIRTH(bp) > 0) { + if (dump_opt['b'] >= 5 && BP_GET_BIRTH(bp) > 0) { char blkbuf[BP_SPRINTF_LEN]; snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("objset %llu object %llu " diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index 6b90b08ca..62e290cd1 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -176,7 +176,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", tab_prefix, - !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >= + !BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, tab_prefix); @@ -189,7 +189,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) (void) printf("%s\n", tab_prefix); return; } - if (BP_GET_LOGICAL_BIRTH(bp) < zilog->zl_header->zh_claim_txg) { + if (BP_GET_BIRTH(bp) < zilog->zl_header->zh_claim_txg) { (void) printf("%s\n", tab_prefix); return; @@ -240,7 +240,7 @@ zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg) if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", tab_prefix, - !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >= + !BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, tab_prefix); @@ -476,7 +476,7 @@ print_log_block(zilog_t *zilog, const blkptr_t *bp, void *arg, if (claim_txg != 0) claim = "already claimed"; - else if (BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa)) + else if (BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa)) claim = "will claim"; else claim = "won't claim"; diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h index 3196b2add..70cafa4c7 100644 --- a/include/sys/dmu_traverse.h +++ b/include/sys/dmu_traverse.h @@ -59,6 +59,13 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ #define TRAVERSE_NO_DECRYPT (1<<5) +/* + * Always use logical birth time for birth time comparisons. This is useful + * for operations that care about user data changes rather than physical + * block rewrites (e.g., incremental replication). + */ +#define TRAVERSE_LOGICAL (1<<6) + /* Special traverse error return value to indicate skipping of children */ #define TRAVERSE_VISIT_NO_CHILDREN -1 diff --git a/include/sys/spa.h b/include/sys/spa.h index e0eed831d..db6de332a 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -140,7 +140,7 @@ typedef struct zio_cksum_salt { * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 7 | padding | + * 7 |R| padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -175,6 +175,7 @@ typedef struct zio_cksum_salt { * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type + * R rewrite (reallocated/rewritten at phys birth TXG) * phys birth txg when dva[0] was written; zero if same as logical birth txg * note that typically all the dva's would be written in this * txg, but they could be different if they were moved by @@ -204,7 +205,7 @@ typedef struct zio_cksum_salt { * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 7 | padding | + * 7 |R| padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -373,7 +374,8 @@ typedef enum bp_embedded_type { typedef struct blkptr { dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ uint64_t blk_prop; /* size, compression, type, etc */ - uint64_t blk_pad[2]; /* Extra space for the future */ + uint64_t blk_prop2; /* additional properties */ + uint64_t blk_pad; /* Extra space for the future */ uint64_t blk_birth_word[2]; uint64_t blk_fill; /* fill count */ zio_cksum_t blk_cksum; /* 256-bit checksum */ @@ -476,32 +478,51 @@ typedef struct blkptr { #define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1) #define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x) +/* + * Block birth time macros for different use cases: + * - BP_GET_LOGICAL_BIRTH(): When the block was logically modified by user. + * To be used with a focus on user data, like incremental replication. + * - BP_GET_PHYSICAL_BIRTH(): When the block was physically written to disks. + * For regular writes is equal to logical birth. For dedup and block cloning + * can be smaller than logical birth. For remapped and rewritten blocks can + * be bigger. To be used with focus on physical disk content: ARC, DDT, scrub. + * - BP_GET_RAW_PHYSICAL_BIRTH(): Raw physical birth value. Zero if equal + * to logical birth. Should only be used for BP copying and debugging. + * - BP_GET_BIRTH(): When the block was allocated, which is a physical birth + * for rewritten blocks (rewrite flag set) or logical birth otherwise. + */ #define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1] #define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x)) -#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0] +#define BP_GET_RAW_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0] #define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x)) -#define BP_GET_BIRTH(bp) \ - (BP_IS_EMBEDDED(bp) ? 0 : \ - BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \ +#define BP_GET_PHYSICAL_BIRTH(bp) \ + (BP_IS_EMBEDDED(bp) ? 0 : \ + BP_GET_RAW_PHYSICAL_BIRTH(bp) ? BP_GET_RAW_PHYSICAL_BIRTH(bp) : \ BP_GET_LOGICAL_BIRTH(bp)) -#define BP_SET_BIRTH(bp, logical, physical) \ -{ \ - ASSERT(!BP_IS_EMBEDDED(bp)); \ - BP_SET_LOGICAL_BIRTH(bp, logical); \ - BP_SET_PHYSICAL_BIRTH(bp, \ - ((logical) == (physical) ? 0 : (physical))); \ +#define BP_GET_BIRTH(bp) \ + ((BP_IS_EMBEDDED(bp) || !BP_GET_REWRITE(bp)) ? \ + BP_GET_LOGICAL_BIRTH(bp) : BP_GET_PHYSICAL_BIRTH(bp)) + +#define BP_SET_BIRTH(bp, logical, physical) \ +{ \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + BP_SET_LOGICAL_BIRTH(bp, logical); \ + BP_SET_PHYSICAL_BIRTH(bp, \ + ((logical) == (physical) ? 0 : (physical))); \ } #define BP_GET_FILL(bp) \ - ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \ - ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill)) + (BP_IS_EMBEDDED(bp) ? 1 : \ + BP_IS_ENCRYPTED(bp) ? BF64_GET((bp)->blk_fill, 0, 32) : \ + (bp)->blk_fill) #define BP_SET_FILL(bp, fill) \ { \ - if (BP_IS_ENCRYPTED(bp)) \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + if (BP_IS_ENCRYPTED(bp)) \ BF64_SET((bp)->blk_fill, 0, 32, fill); \ else \ (bp)->blk_fill = fill; \ @@ -516,6 +537,15 @@ typedef struct blkptr { BF64_SET((bp)->blk_fill, 32, 32, iv2); \ } +#define BP_GET_REWRITE(bp) \ + (BP_IS_EMBEDDED(bp) ? 0 : BF64_GET((bp)->blk_prop2, 63, 1)) + +#define BP_SET_REWRITE(bp, x) \ +{ \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + BF64_SET((bp)->blk_prop2, 63, 1, x); \ +} + #define BP_IS_METADATA(bp) \ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) @@ -545,7 +575,7 @@ typedef struct blkptr { (dva1)->dva_word[0] == (dva2)->dva_word[0]) #define BP_EQUAL(bp1, bp2) \ - (BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \ + (BP_GET_PHYSICAL_BIRTH(bp1) == BP_GET_PHYSICAL_BIRTH(bp2) && \ BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \ DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \ DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \ @@ -588,8 +618,8 @@ typedef struct blkptr { { \ BP_ZERO_DVAS(bp); \ (bp)->blk_prop = 0; \ - (bp)->blk_pad[0] = 0; \ - (bp)->blk_pad[1] = 0; \ + (bp)->blk_prop2 = 0; \ + (bp)->blk_pad = 0; \ (bp)->blk_birth_word[0] = 0; \ (bp)->blk_birth_word[1] = 0; \ (bp)->blk_fill = 0; \ @@ -696,7 +726,7 @@ typedef struct blkptr { (u_longlong_t)BP_GET_LSIZE(bp), \ (u_longlong_t)BP_GET_PSIZE(bp), \ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \ - (u_longlong_t)BP_GET_BIRTH(bp), \ + (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp), \ (u_longlong_t)BP_GET_FILL(bp), \ ws, \ (u_longlong_t)bp->blk_cksum.zc_word[0], \ diff --git a/lib/libzdb/libzdb.c b/lib/libzdb/libzdb.c index 12144dc65..cca1327b1 100644 --- a/lib/libzdb/libzdb.c +++ b/lib/libzdb/libzdb.c @@ -93,9 +93,9 @@ livelist_compare(const void *larg, const void *rarg) * Since we're storing blkptrs without cancelling FREE/ALLOC pairs, * it's possible the offsets are equal. In that case, sort by txg */ - if (BP_GET_LOGICAL_BIRTH(l) < BP_GET_LOGICAL_BIRTH(r)) { + if (BP_GET_BIRTH(l) < BP_GET_BIRTH(r)) { return (-1); - } else if (BP_GET_LOGICAL_BIRTH(l) > BP_GET_LOGICAL_BIRTH(r)) { + } else if (BP_GET_BIRTH(l) > BP_GET_BIRTH(r)) { return (+1); } return (0); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index a2cb3b8a5..3483be64e 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1052,7 +1052,7 @@ static arc_buf_hdr_t * buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp) { const dva_t *dva = BP_IDENTITY(bp); - uint64_t birth = BP_GET_BIRTH(bp); + uint64_t birth = BP_GET_PHYSICAL_BIRTH(bp); uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); kmutex_t *hash_lock = BUF_HASH_LOCK(idx); arc_buf_hdr_t *hdr; @@ -5587,7 +5587,7 @@ arc_read_done(zio_t *zio) if (HDR_IN_HASH_TABLE(hdr)) { arc_buf_hdr_t *found; - ASSERT3U(hdr->b_birth, ==, BP_GET_BIRTH(zio->io_bp)); + ASSERT3U(hdr->b_birth, ==, BP_GET_PHYSICAL_BIRTH(zio->io_bp)); ASSERT3U(hdr->b_dva.dva_word[0], ==, BP_IDENTITY(zio->io_bp)->dva_word[0]); ASSERT3U(hdr->b_dva.dva_word[1], ==, @@ -5690,7 +5690,7 @@ arc_read_done(zio_t *zio) error = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { spa_log_error(zio->io_spa, &acb->acb_zb, - BP_GET_LOGICAL_BIRTH(zio->io_bp)); + BP_GET_PHYSICAL_BIRTH(zio->io_bp)); (void) zfs_ereport_post( FM_EREPORT_ZFS_AUTHENTICATION, zio->io_spa, NULL, &acb->acb_zb, zio, 0); @@ -6109,7 +6109,7 @@ top: if (!embedded_bp) { hdr->b_dva = *BP_IDENTITY(bp); - hdr->b_birth = BP_GET_BIRTH(bp); + hdr->b_birth = BP_GET_PHYSICAL_BIRTH(bp); exists = buf_hash_insert(hdr, &hash_lock); } if (exists != NULL) { @@ -6957,7 +6957,7 @@ arc_write_done(zio_t *zio) buf_discard_identity(hdr); } else { hdr->b_dva = *BP_IDENTITY(zio->io_bp); - hdr->b_birth = BP_GET_BIRTH(zio->io_bp); + hdr->b_birth = BP_GET_PHYSICAL_BIRTH(zio->io_bp); } } else { ASSERT(HDR_EMPTY(hdr)); diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c index 8c19de93f..0a8a077ed 100644 --- a/module/zfs/bpobj.c +++ b/module/zfs/bpobj.c @@ -954,8 +954,8 @@ space_range_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) (void) bp_freed, (void) tx; struct space_range_arg *sra = arg; - if (BP_GET_LOGICAL_BIRTH(bp) > sra->mintxg && - BP_GET_LOGICAL_BIRTH(bp) <= sra->maxtxg) { + if (BP_GET_BIRTH(bp) > sra->mintxg && + BP_GET_BIRTH(bp) <= sra->maxtxg) { if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) sra->used += bp_get_dsize_sync(sra->spa, bp); else diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index a4cc79c35..a96666a46 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1243,11 +1243,9 @@ dbuf_verify(dmu_buf_impl_t *db) DVA_IS_EMPTY(&bp->blk_dva[1]) && DVA_IS_EMPTY(&bp->blk_dva[2])); ASSERT0(bp->blk_fill); - ASSERT0(bp->blk_pad[0]); - ASSERT0(bp->blk_pad[1]); ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(BP_IS_HOLE(bp)); - ASSERT0(BP_GET_PHYSICAL_BIRTH(bp)); + ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp)); } } } @@ -1623,7 +1621,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags, */ if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) { spa_log_error(db->db_objset->os_spa, &zb, - BP_GET_LOGICAL_BIRTH(bp)); + BP_GET_PHYSICAL_BIRTH(bp)); err = SET_ERROR(EIO); goto early_unlock; } @@ -4907,7 +4905,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) dnode_diduse_space(dn, delta - zio->io_prev_space_delta); zio->io_prev_space_delta = delta; - if (BP_GET_LOGICAL_BIRTH(bp) != 0) { + if (BP_GET_BIRTH(bp) != 0) { ASSERT((db->db_blkid != DMU_SPILL_BLKID && BP_GET_TYPE(bp) == dn->dn_type) || (db->db_blkid == DMU_SPILL_BLKID && @@ -5194,7 +5192,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx) ASSERT(dsl_pool_sync_context(spa_get_dsl(spa))); drica.drica_os = dn->dn_objset; - drica.drica_blk_birth = BP_GET_LOGICAL_BIRTH(bp); + drica.drica_blk_birth = BP_GET_BIRTH(bp); drica.drica_tx = tx; if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback, &drica)) { @@ -5209,8 +5207,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx) if (dn->dn_objset != spa_meta_objset(spa)) { dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset); if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && - BP_GET_LOGICAL_BIRTH(bp) > - ds->ds_dir->dd_origin_txg) { + BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) { ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(spa_feature_is_enabled(spa, @@ -5328,7 +5325,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) } ASSERT(db->db_level == 0 || data == db->db_buf); - ASSERT3U(BP_GET_LOGICAL_BIRTH(db->db_blkptr), <=, txg); + ASSERT3U(BP_GET_BIRTH(db->db_blkptr), <=, txg); ASSERT(pio); SET_BOOKMARK(&zb, os->os_dsl_dataset ? diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 60cbb7755..e0b9fc395 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -724,10 +724,13 @@ ddt_phys_extend(ddt_univ_phys_t *ddp, ddt_phys_variant_t v, const blkptr_t *bp) dvas[2] = bp->blk_dva[2]; if (ddt_phys_birth(ddp, v) == 0) { - if (v == DDT_PHYS_FLAT) - ddp->ddp_flat.ddp_phys_birth = BP_GET_BIRTH(bp); - else - ddp->ddp_trad[v].ddp_phys_birth = BP_GET_BIRTH(bp); + if (v == DDT_PHYS_FLAT) { + ddp->ddp_flat.ddp_phys_birth = + BP_GET_PHYSICAL_BIRTH(bp); + } else { + ddp->ddp_trad[v].ddp_phys_birth = + BP_GET_PHYSICAL_BIRTH(bp); + } } } @@ -891,14 +894,14 @@ ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp) if (ddt->ddt_flags & DDT_FLAG_FLAT) { if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_flat.ddp_dva[0]) && - BP_GET_BIRTH(bp) == ddp->ddp_flat.ddp_phys_birth) { + BP_GET_PHYSICAL_BIRTH(bp) == ddp->ddp_flat.ddp_phys_birth) { return (DDT_PHYS_FLAT); } } else /* traditional phys */ { for (int p = 0; p < DDT_PHYS_MAX; p++) { if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_trad[p].ddp_dva[0]) && - BP_GET_BIRTH(bp) == + BP_GET_PHYSICAL_BIRTH(bp) == ddp->ddp_trad[p].ddp_phys_birth) { return (p); } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 21c465328..690227a30 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1966,7 +1966,7 @@ dmu_sync_late_arrival_done(zio_t *zio) blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig; ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE)); ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); - ASSERT(BP_GET_LOGICAL_BIRTH(zio->io_bp) == zio->io_txg); + ASSERT(BP_GET_BIRTH(zio->io_bp) == zio->io_txg); ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); zio_free(zio->io_spa, zio->io_txg, zio->io_bp); } @@ -2655,11 +2655,12 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, * operation into ZIL, or it may be impossible to replay, since * the block may appear not yet allocated at that point. */ - if (BP_GET_BIRTH(bp) > spa_freeze_txg(os->os_spa)) { + if (BP_GET_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) { error = SET_ERROR(EINVAL); goto out; } - if (BP_GET_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) { + if (BP_GET_PHYSICAL_BIRTH(bp) > + spa_last_synced_txg(os->os_spa)) { error = SET_ERROR(EAGAIN); goto out; } @@ -2731,7 +2732,8 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) { if (!BP_IS_EMBEDDED(bp)) { BP_SET_BIRTH(&dl->dr_overridden_by, dr->dr_txg, - BP_GET_BIRTH(bp)); + BP_GET_PHYSICAL_BIRTH(bp)); + BP_SET_REWRITE(&dl->dr_overridden_by, 0); } else { BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg); diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index 86f751e88..fb13b2f87 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -224,8 +224,8 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name, * call the ZFS_IOC_OBJ_TO_STATS ioctl. */ error = traverse_dataset(tosnap, fromtxg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT, - diff_cb, &da); + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT | + TRAVERSE_LOGICAL, diff_cb, &da); if (error != 0) { da.da_err = error; diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index afc9823de..73227b58c 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -1403,7 +1403,7 @@ corrective_read_done(zio_t *zio) /* Corruption corrected; update error log if needed */ if (zio->io_error == 0) { spa_remove_error(data->spa, &data->zb, - BP_GET_LOGICAL_BIRTH(zio->io_bp)); + BP_GET_PHYSICAL_BIRTH(zio->io_bp)); } kmem_free(data, sizeof (cr_cb_data_t)); abd_free(zio->io_abd); @@ -1530,7 +1530,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw, } rrd->abd = abd; - io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_LOGICAL_BIRTH(bp), bp, + io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_BIRTH(bp), bp, abd, BP_GET_PSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, flags, &zb); diff --git a/module/zfs/dmu_redact.c b/module/zfs/dmu_redact.c index 65443d112..9226ac9e4 100644 --- a/module/zfs/dmu_redact.c +++ b/module/zfs/dmu_redact.c @@ -370,8 +370,8 @@ redact_traverse_thread(void *arg) #endif err = traverse_dataset_resume(rt_arg->ds, rt_arg->txg, - &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - redact_cb, rt_arg); + &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_LOGICAL, redact_cb, rt_arg); if (err != EINTR) rt_arg->error_code = err; diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 4f27f3df0..deeba29e1 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -1084,7 +1084,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ if (sta->os->os_encrypted && !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) { - spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp)); + spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp)); return (SET_ERROR(EIO)); } @@ -1210,7 +1210,7 @@ send_traverse_thread(void *arg) err = traverse_dataset_resume(st_arg->os->os_dsl_dataset, st_arg->fromtxg, &st_arg->resume, - st_arg->flags, send_cb, st_arg); + st_arg->flags | TRAVERSE_LOGICAL, send_cb, st_arg); if (err != EINTR) st_arg->error_code = err; diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index f534a7dd6..dd1df1705 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -74,6 +74,15 @@ static int traverse_dnode(traverse_data_t *td, const blkptr_t *bp, static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *, uint64_t objset, uint64_t object); +static inline uint64_t +get_birth_time(traverse_data_t *td, const blkptr_t *bp) +{ + if (td->td_flags & TRAVERSE_LOGICAL) + return (BP_GET_LOGICAL_BIRTH(bp)); + else + return (BP_GET_BIRTH(bp)); +} + static int traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t claim_txg) @@ -85,7 +94,7 @@ traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg, return (0); if (claim_txg == 0 && - BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(td->td_spa)) + get_birth_time(td, bp) >= spa_min_claim_txg(td->td_spa)) return (-1); SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, @@ -110,7 +119,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg, if (BP_IS_HOLE(bp)) return (0); - if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg) + if (claim_txg == 0 || get_birth_time(td, bp) < claim_txg) return (0); ASSERT3U(BP_GET_LSIZE(bp), !=, 0); @@ -194,7 +203,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const dnode_phys_t *dnp, */ if (resume_skip_check(td, dnp, zb) != RESUME_SKIP_NONE) return (B_FALSE); - if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg) + if (BP_IS_HOLE(bp) || get_birth_time(td, bp) <= td->td_min_txg) return (B_FALSE); if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) return (B_FALSE); @@ -265,7 +274,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, zb->zb_object == DMU_META_DNODE_OBJECT) && td->td_hole_birth_enabled_txg <= td->td_min_txg) return (0); - } else if (BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg) { + } else if (get_birth_time(td, bp) <= td->td_min_txg) { return (0); } diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index e301fe19f..fdc8b7b19 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -1523,7 +1523,7 @@ dsl_bookmark_block_killed(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) * If the block was live (referenced) at the time of this * bookmark, add its space to the bookmark's FBN. */ - if (BP_GET_LOGICAL_BIRTH(bp) <= + if (BP_GET_BIRTH(bp) <= dbn->dbn_phys.zbm_creation_txg && (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) { mutex_enter(&dbn->dbn_lock); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 21c8a6821..b767c9641 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -159,7 +159,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) return; } - ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >, + ASSERT3U(BP_GET_BIRTH(bp), >, dsl_dataset_phys(ds)->ds_prev_snap_txg); dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_lock); @@ -194,7 +194,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) * they do not need to be freed. */ if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && - BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg && + BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg && !(BP_IS_EMBEDDED(bp))) { ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(spa_feature_is_enabled(spa, @@ -263,7 +263,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, return (0); ASSERT(dmu_tx_is_syncing(tx)); - ASSERT(BP_GET_LOGICAL_BIRTH(bp) <= tx->tx_txg); + ASSERT(BP_GET_BIRTH(bp) <= tx->tx_txg); if (ds == NULL) { dsl_free(tx->tx_pool, tx->tx_txg, bp); @@ -281,7 +281,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, * they do not need to be freed. */ if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && - BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg && + BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg && !(BP_IS_EMBEDDED(bp))) { ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(spa_feature_is_enabled(spa, @@ -289,7 +289,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, bplist_append(&ds->ds_dir->dd_pending_frees, bp); } - if (BP_GET_LOGICAL_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) { + if (BP_GET_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) { int64_t delta; /* @@ -346,14 +346,14 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); /* if (logical birth > prev prev snap txg) prev unique += bs */ if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == - ds->ds_object && BP_GET_LOGICAL_BIRTH(bp) > + ds->ds_object && BP_GET_BIRTH(bp) > dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); mutex_enter(&ds->ds_prev->ds_lock); dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; mutex_exit(&ds->ds_prev->ds_lock); } - if (BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg) { + if (BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) { dsl_dir_transfer_space(ds->ds_dir, used, DD_USED_HEAD, DD_USED_SNAP, tx); } @@ -2944,7 +2944,7 @@ dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) if (snap == NULL) return (B_FALSE); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); - birth = BP_GET_LOGICAL_BIRTH(dsl_dataset_get_blkptr(ds)); + birth = BP_GET_BIRTH(dsl_dataset_get_blkptr(ds)); rrw_exit(&ds->ds_bp_rwlock, FTAG); if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { objset_t *os, *os_snap; diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c index 3113d932f..9ffc998ac 100644 --- a/module/zfs/dsl_deadlist.c +++ b/module/zfs/dsl_deadlist.c @@ -484,7 +484,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed, dl->dl_phys->dl_comp += sign * BP_GET_PSIZE(bp); dl->dl_phys->dl_uncomp += sign * BP_GET_UCSIZE(bp); - dle_tofind.dle_mintxg = BP_GET_LOGICAL_BIRTH(bp); + dle_tofind.dle_mintxg = BP_GET_BIRTH(bp); dle = avl_find(&dl->dl_tree, &dle_tofind, &where); if (dle == NULL) dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); @@ -493,7 +493,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed, if (dle == NULL) { zfs_panic_recover("blkptr at %p has invalid BLK_BIRTH %llu", - bp, (longlong_t)BP_GET_LOGICAL_BIRTH(bp)); + bp, (longlong_t)BP_GET_BIRTH(bp)); dle = avl_first(&dl->dl_tree); } diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index f5ec93b2d..fff49c97f 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -133,11 +133,11 @@ process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) ASSERT(!BP_IS_HOLE(bp)); - if (BP_GET_LOGICAL_BIRTH(bp) <= + if (BP_GET_BIRTH(bp) <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) { dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx); if (poa->ds_prev && !poa->after_branch_point && - BP_GET_LOGICAL_BIRTH(bp) > + BP_GET_BIRTH(bp) > dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) { dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes += bp_get_dsize_sync(dp->dp_spa, bp); @@ -315,8 +315,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); - ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, - tx->tx_txg); + ASSERT3U(BP_GET_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, tx->tx_txg); rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(zfs_refcount_is_zero(&ds->ds_longholds)); @@ -730,7 +729,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); } else { ASSERT(zilog == NULL); - ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >, + ASSERT3U(BP_GET_BIRTH(bp), >, dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); } @@ -1020,8 +1019,7 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(ds->ds_prev == NULL || dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); - ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, - tx->tx_txg); + ASSERT3U(BP_GET_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, tx->tx_txg); rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index f1088d872..4f1f66b83 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -1056,7 +1056,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) * will be wrong. */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); - ASSERT0(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(prev)->ds_bp)); + ASSERT0(BP_GET_BIRTH(&dsl_dataset_phys(prev)->ds_bp)); rrw_exit(&ds->ds_bp_rwlock, FTAG); /* The origin doesn't get attached to itself */ diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 1b2cd3e36..5052992d7 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -454,7 +454,7 @@ static inline void bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i) { sio->sio_blk_prop = bp->blk_prop; - sio->sio_phys_birth = BP_GET_PHYSICAL_BIRTH(bp); + sio->sio_phys_birth = BP_GET_RAW_PHYSICAL_BIRTH(bp); sio->sio_birth = BP_GET_LOGICAL_BIRTH(bp); sio->sio_cksum = bp->blk_cksum; sio->sio_nr_dvas = BP_GET_NDVAS(bp); @@ -1768,7 +1768,7 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg, ASSERT(!BP_IS_REDACTED(bp)); if (BP_IS_HOLE(bp) || - BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) + BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) return (0); /* @@ -1778,7 +1778,7 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg, * scrub there's nothing to do to it). */ if (claim_txg == 0 && - BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa)) + BP_GET_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa)) return (0); SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], @@ -1804,7 +1804,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg, ASSERT(!BP_IS_REDACTED(bp)); if (BP_IS_HOLE(bp) || - BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) + BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) return (0); /* @@ -1812,7 +1812,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg, * already txg sync'ed (but this log block contains * other records that are not synced) */ - if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg) + if (claim_txg == 0 || BP_GET_BIRTH(bp) < claim_txg) return (0); ASSERT3U(BP_GET_LSIZE(bp), !=, 0); @@ -1952,7 +1952,7 @@ dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb) return; if (BP_IS_HOLE(bp) || - BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg || + BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) return; @@ -2223,7 +2223,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, if (dnp != NULL && dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) { scn->scn_phys.scn_errors++; - spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp)); + spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp)); return (SET_ERROR(EINVAL)); } @@ -2319,7 +2319,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, * by arc_read() for the cases above. */ scn->scn_phys.scn_errors++; - spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp)); + spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp)); return (SET_ERROR(EINVAL)); } @@ -2396,7 +2396,12 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb, if (f != SPA_FEATURE_NONE) ASSERT(dsl_dataset_feature_is_active(ds, f)); - if (BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) { + /* + * Recurse any blocks that were written either logically or physically + * at or after cur_min_txg. About logical birth we care for traversal, + * looking for any changes, while about physical for the actual scan. + */ + if (BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) { scn->scn_lt_min_this_txg++; return; } @@ -2422,7 +2427,7 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb, * Don't scan it now unless we need to because something * under it was modified. */ - if (BP_GET_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) { + if (BP_GET_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) { scn->scn_gt_max_this_txg++; return; } @@ -4806,7 +4811,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, { dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; - uint64_t phys_birth = BP_GET_BIRTH(bp); + uint64_t phys_birth = BP_GET_PHYSICAL_BIRTH(bp); size_t psize = BP_GET_PSIZE(bp); boolean_t needs_io = B_FALSE; int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 102a43e11..0e5f09b27 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -5603,7 +5603,21 @@ remap_blkptr_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, vdev_indirect_births_t *vib = oldvd->vdev_indirect_births; uint64_t physical_birth = vdev_indirect_births_physbirth(vib, DVA_GET_OFFSET(&bp->blk_dva[0]), DVA_GET_ASIZE(&bp->blk_dva[0])); - BP_SET_PHYSICAL_BIRTH(bp, physical_birth); + + /* + * For rewritten blocks, use the old physical birth as the new logical + * birth (representing when the space was allocated) and the removal + * time as the new physical birth (representing when it was actually + * written). + */ + if (BP_GET_REWRITE(bp)) { + uint64_t old_physical_birth = BP_GET_PHYSICAL_BIRTH(bp); + ASSERT3U(old_physical_birth, <, physical_birth); + BP_SET_BIRTH(bp, old_physical_birth, physical_birth); + BP_SET_REWRITE(bp, 0); + } else { + BP_SET_PHYSICAL_BIRTH(bp, physical_birth); + } DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id); DVA_SET_OFFSET(&bp->blk_dva[0], offset); @@ -5972,7 +5986,7 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize, int error = 0; ASSERT0(BP_GET_LOGICAL_BIRTH(bp)); - ASSERT0(BP_GET_PHYSICAL_BIRTH(bp)); + ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp)); spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); @@ -6034,7 +6048,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) int ndvas = BP_GET_NDVAS(bp); ASSERT(!BP_IS_HOLE(bp)); - ASSERT(!now || BP_GET_LOGICAL_BIRTH(bp) >= spa_syncing_txg(spa)); + ASSERT(!now || BP_GET_BIRTH(bp) >= spa_syncing_txg(spa)); /* * If we have a checkpoint for the pool we need to make sure that @@ -6052,7 +6066,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) * normally as they will be referenced by the checkpointed uberblock. */ boolean_t checkpoint = B_FALSE; - if (BP_GET_LOGICAL_BIRTH(bp) <= spa->spa_checkpoint_txg && + if (BP_GET_BIRTH(bp) <= spa->spa_checkpoint_txg && spa_syncing_txg(spa) > spa->spa_checkpoint_txg) { /* * At this point, if the block is part of the checkpoint diff --git a/module/zfs/spa.c b/module/zfs/spa.c index bbf474ed6..5ecb175fb 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -2862,8 +2862,8 @@ spa_claim_notify(zio_t *zio) return; mutex_enter(&spa->spa_props_lock); /* any mutex will do */ - if (spa->spa_claim_max_txg < BP_GET_LOGICAL_BIRTH(zio->io_bp)) - spa->spa_claim_max_txg = BP_GET_LOGICAL_BIRTH(zio->io_bp); + if (spa->spa_claim_max_txg < BP_GET_BIRTH(zio->io_bp)) + spa->spa_claim_max_txg = BP_GET_BIRTH(zio->io_bp); mutex_exit(&spa->spa_props_lock); } diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index 3e08f261f..7252fd534 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -253,7 +253,7 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep, if (error == 0 && BP_IS_HOLE(&bp)) error = SET_ERROR(ENOENT); - *birth_txg = BP_GET_LOGICAL_BIRTH(&bp); + *birth_txg = BP_GET_PHYSICAL_BIRTH(&bp); rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (error); @@ -885,7 +885,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, if (error == EACCES) error = 0; else if (!error) - zep.zb_birth = BP_GET_LOGICAL_BIRTH(&bp); + zep.zb_birth = BP_GET_PHYSICAL_BIRTH(&bp); rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 2b78340cf..18efdaac0 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -532,7 +532,7 @@ vdev_mirror_child_select(zio_t *zio) uint64_t txg = zio->io_txg; int c, lowest_load; - ASSERT(zio->io_bp == NULL || BP_GET_BIRTH(zio->io_bp) == txg); + ASSERT(zio->io_bp == NULL || BP_GET_PHYSICAL_BIRTH(zio->io_bp) == txg); lowest_load = INT_MAX; mm->mm_preferred_cnt = 0; diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index a5fa9a604..210cdcab1 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2206,11 +2206,7 @@ vdev_raidz_close(vdev_t *vd) /* * Return the logical width to use, given the txg in which the allocation - * happened. Note that BP_GET_BIRTH() is usually the txg in which the - * BP was allocated. Remapped BP's (that were relocated due to device - * removal, see remap_blkptr_cb()), will have a more recent physical birth - * which reflects when the BP was relocated, but we can ignore these because - * they can't be on RAIDZ (device removal doesn't support RAIDZ). + * happened. */ static uint64_t vdev_raidz_get_logical_width(vdev_raidz_t *vdrz, uint64_t txg) @@ -2343,7 +2339,7 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, raidz_row_t *rr, int col) logical_rs.rs_start = rr->rr_offset; logical_rs.rs_end = logical_rs.rs_start + vdev_raidz_psize_to_asize(zio->io_vd, rr->rr_size, - BP_GET_BIRTH(zio->io_bp)); + BP_GET_PHYSICAL_BIRTH(zio->io_bp)); raidz_col_t *rc = &rr->rr_col[col]; vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx]; @@ -2566,7 +2562,7 @@ vdev_raidz_io_start(zio_t *zio) raidz_map_t *rm; uint64_t logical_width = vdev_raidz_get_logical_width(vdrz, - BP_GET_BIRTH(zio->io_bp)); + BP_GET_PHYSICAL_BIRTH(zio->io_bp)); if (logical_width != vdrz->vd_physical_width) { zfs_locked_range_t *lr = NULL; uint64_t synced_offset = UINT64_MAX; diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 3aa188a95..6e4f84257 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -589,7 +589,7 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, * that we rewind to is invalid. Thus, we return -1 so * zil_parse() doesn't attempt to read it. */ - if (BP_GET_LOGICAL_BIRTH(bp) >= first_txg) + if (BP_GET_BIRTH(bp) >= first_txg) return (-1); if (zil_bp_tree_add(zilog, bp) != 0) @@ -615,7 +615,7 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, * Claim log block if not already committed and not already claimed. * If tx == NULL, just verify that the block is claimable. */ - if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) < first_txg || + if (BP_IS_HOLE(bp) || BP_GET_BIRTH(bp) < first_txg || zil_bp_tree_add(zilog, bp) != 0) return (0); @@ -640,7 +640,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg) * waited for all writes to be stable first), so it is semantically * correct to declare this the end of the log. */ - if (BP_GET_LOGICAL_BIRTH(&lr->lr_blkptr) >= first_txg) { + if (BP_GET_BIRTH(&lr->lr_blkptr) >= first_txg) { error = zil_read_log_data(zilog, lr, NULL); if (error != 0) return (error); @@ -687,7 +687,7 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx, * just in case lets be safe and just stop here now instead of * corrupting the pool. */ - if (BP_GET_BIRTH(bp) >= first_txg) + if (BP_GET_PHYSICAL_BIRTH(bp) >= first_txg) return (SET_ERROR(ENOENT)); /* @@ -742,7 +742,7 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg) /* * If we previously claimed it, we need to free it. */ - if (BP_GET_LOGICAL_BIRTH(bp) >= claim_txg && + if (BP_GET_BIRTH(bp) >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) { zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); } @@ -1997,7 +1997,7 @@ next_lwb: &slog); } if (error == 0) { - ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), ==, txg); + ASSERT3U(BP_GET_BIRTH(bp), ==, txg); BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG); bp->blk_cksum = lwb->lwb_blk.blk_cksum; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 0fde2d6f7..41e0dc000 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -692,7 +692,7 @@ error: zio->io_error = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { spa_log_error(spa, &zio->io_bookmark, - BP_GET_LOGICAL_BIRTH(zio->io_bp)); + BP_GET_PHYSICAL_BIRTH(zio->io_bp)); (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, spa, NULL, &zio->io_bookmark, zio, 0); } @@ -1104,7 +1104,8 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp, "DVA[1]=%#llx/%#llx " "DVA[2]=%#llx/%#llx " "prop=%#llx " - "pad=%#llx,%#llx " + "prop2=%#llx " + "pad=%#llx " "phys_birth=%#llx " "birth=%#llx " "fill=%#llx " @@ -1117,9 +1118,9 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp, (long long)bp->blk_dva[2].dva_word[0], (long long)bp->blk_dva[2].dva_word[1], (long long)bp->blk_prop, - (long long)bp->blk_pad[0], - (long long)bp->blk_pad[1], - (long long)BP_GET_PHYSICAL_BIRTH(bp), + (long long)bp->blk_prop2, + (long long)bp->blk_pad, + (long long)BP_GET_RAW_PHYSICAL_BIRTH(bp), (long long)BP_GET_LOGICAL_BIRTH(bp), (long long)bp->blk_fill, (long long)bp->blk_cksum.zc_word[0], @@ -1334,7 +1335,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, { zio_t *zio; - zio = zio_create(pio, spa, BP_GET_BIRTH(bp), bp, + zio = zio_create(pio, spa, BP_GET_PHYSICAL_BIRTH(bp), bp, data, size, size, done, private, ZIO_TYPE_READ, priority, flags, NULL, 0, zb, ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? @@ -1854,7 +1855,7 @@ zio_write_bp_init(zio_t *zio) blkptr_t *bp = zio->io_bp; zio_prop_t *zp = &zio->io_prop; - ASSERT(BP_GET_LOGICAL_BIRTH(bp) != zio->io_txg); + ASSERT(BP_GET_BIRTH(bp) != zio->io_txg); *bp = *zio->io_bp_override; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; @@ -1942,7 +1943,7 @@ zio_write_compress(zio_t *zio) ASSERT(zio->io_child_type != ZIO_CHILD_DDT); ASSERT(zio->io_bp_override == NULL); - if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg) { + if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg) { /* * We're rewriting an existing block, which means we're * working on behalf of spa_sync(). For spa_sync() to @@ -2079,7 +2080,7 @@ zio_write_compress(zio_t *zio) * spa_sync() to allocate new blocks, but force rewrites after that. * There should only be a handful of blocks after pass 1 in any case. */ - if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg && + if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg && BP_GET_PSIZE(bp) == psize && pass >= zfs_sync_pass_rewrite) { VERIFY3U(psize, !=, 0); @@ -3894,7 +3895,7 @@ zio_ddt_write(zio_t *zio) * block and leave. */ if (have_dvas == 0) { - ASSERT(BP_GET_LOGICAL_BIRTH(bp) == txg); + ASSERT(BP_GET_BIRTH(bp) == txg); ASSERT(BP_EQUAL(bp, zio->io_bp_override)); ddt_phys_extend(ddp, v, bp); ddt_phys_addref(ddp, v); @@ -4224,8 +4225,10 @@ zio_dva_allocate(zio_t *zio) ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_GANG); memcpy(zio->io_bp->blk_dva, zio->io_bp_orig.blk_dva, 3 * sizeof (dva_t)); - BP_SET_BIRTH(zio->io_bp, BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig), - BP_GET_PHYSICAL_BIRTH(&zio->io_bp_orig)); + BP_SET_LOGICAL_BIRTH(zio->io_bp, + BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig)); + BP_SET_PHYSICAL_BIRTH(zio->io_bp, + BP_GET_RAW_PHYSICAL_BIRTH(&zio->io_bp_orig)); return (zio); } @@ -4385,12 +4388,11 @@ zio_dva_claim(zio_t *zio) static void zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) { - ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp)); + ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp)); ASSERT(zio->io_bp_override == NULL); if (!BP_IS_HOLE(bp)) { - metaslab_free(zio->io_spa, bp, BP_GET_LOGICAL_BIRTH(bp), - B_TRUE); + metaslab_free(zio->io_spa, bp, BP_GET_BIRTH(bp), B_TRUE); } if (gn != NULL) { @@ -5268,7 +5270,7 @@ zio_ready(zio_t *zio) if (zio->io_ready) { ASSERT(IO_IS_ALLOCATING(zio)); - ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg || + ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp) || (zio->io_flags & ZIO_FLAG_NOPWRITE)); ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); @@ -5423,8 +5425,6 @@ zio_done(zio_t *zio) ASSERT(zio->io_children[c][w] == 0); if (zio->io_bp != NULL && !BP_IS_EMBEDDED(zio->io_bp)) { - ASSERT(zio->io_bp->blk_pad[0] == 0); - ASSERT(zio->io_bp->blk_pad[1] == 0); ASSERT(memcmp(zio->io_bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || (zio->io_bp == zio_unique_parent(zio)->io_bp)); @@ -5539,7 +5539,7 @@ zio_done(zio_t *zio) * error and generate a logical data ereport. */ spa_log_error(zio->io_spa, &zio->io_bookmark, - BP_GET_LOGICAL_BIRTH(zio->io_bp)); + BP_GET_PHYSICAL_BIRTH(zio->io_bp)); (void) zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa, NULL, &zio->io_bookmark, zio, 0); } diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c index 4cb9da0db..63d0c6dad 100644 --- a/module/zfs/zio_checksum.c +++ b/module/zfs/zio_checksum.c @@ -279,7 +279,7 @@ static void zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp) { const dva_t *dva = BP_IDENTITY(bp); - uint64_t txg = BP_GET_BIRTH(bp); + uint64_t txg = BP_GET_PHYSICAL_BIRTH(bp); ASSERT(BP_IS_GANG(bp));