mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Allow physical rewrite without logical
During regular block writes ZFS sets both logical and physical birth times equal to the current TXG. During dedup and block cloning logical birth time is still set to the current TXG, but physical may be copied from the original block that was used. This represents the fact that logically user data has changed, but the physically it is the same old block. But block rewrite introduces a new situation, when block is not changed logically, but stored in a different place of the pool. From ARC, scrub and some other perspectives this is a new block, but for example for user applications or incremental replication it is not. Somewhat similar thing happen during remap phase of device removal, but in that case space blocks are still acounted as allocated at their logical birth times. This patch introduces a new "rewrite" flag in the block pointer structure, allowing to differentiate physical rewrite (when the block is actually reallocated at the physical birth time) from the device reval case (when the logical birth time is used). The new functionality is not used at this point, and the only expected change is that error log is now kept in terms of physical physical birth times, rather than logical, since if a block with logged error was somehow rewritten, then the previous error does not matter any more. This change also introduces a new TRAVERSE_LOGICAL flag to the traverse code, allowing zfs send, redact and diff to work in context of logical birth times, ignoring physical-only rewrites. It also changes nothing at this point due to lack of those writes, but they will come in a following patch. Reviewed-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #17565
This commit is contained in:
committed by
Brian Behlendorf
parent
894edd084e
commit
4ae8bf406b
@@ -59,6 +59,13 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
*/
|
||||
#define TRAVERSE_NO_DECRYPT (1<<5)
|
||||
|
||||
/*
|
||||
* Always use logical birth time for birth time comparisons. This is useful
|
||||
* for operations that care about user data changes rather than physical
|
||||
* block rewrites (e.g., incremental replication).
|
||||
*/
|
||||
#define TRAVERSE_LOGICAL (1<<6)
|
||||
|
||||
/* Special traverse error return value to indicate skipping of children */
|
||||
#define TRAVERSE_VISIT_NO_CHILDREN -1
|
||||
|
||||
|
||||
+50
-20
@@ -140,7 +140,7 @@ typedef struct zio_cksum_salt {
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* 7 |R| padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 8 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
@@ -175,6 +175,7 @@ typedef struct zio_cksum_salt {
|
||||
* E blkptr_t contains embedded data (see below)
|
||||
* lvl level of indirection
|
||||
* type DMU object type
|
||||
* R rewrite (reallocated/rewritten at phys birth TXG)
|
||||
* phys birth txg when dva[0] was written; zero if same as logical birth txg
|
||||
* note that typically all the dva's would be written in this
|
||||
* txg, but they could be different if they were moved by
|
||||
@@ -204,7 +205,7 @@ typedef struct zio_cksum_salt {
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* 7 |R| padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 8 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
@@ -373,7 +374,8 @@ typedef enum bp_embedded_type {
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
uint64_t blk_pad[2]; /* Extra space for the future */
|
||||
uint64_t blk_prop2; /* additional properties */
|
||||
uint64_t blk_pad; /* Extra space for the future */
|
||||
uint64_t blk_birth_word[2];
|
||||
uint64_t blk_fill; /* fill count */
|
||||
zio_cksum_t blk_cksum; /* 256-bit checksum */
|
||||
@@ -476,32 +478,51 @@ typedef struct blkptr {
|
||||
#define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1)
|
||||
#define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x)
|
||||
|
||||
/*
|
||||
* Block birth time macros for different use cases:
|
||||
* - BP_GET_LOGICAL_BIRTH(): When the block was logically modified by user.
|
||||
* To be used with a focus on user data, like incremental replication.
|
||||
* - BP_GET_PHYSICAL_BIRTH(): When the block was physically written to disks.
|
||||
* For regular writes is equal to logical birth. For dedup and block cloning
|
||||
* can be smaller than logical birth. For remapped and rewritten blocks can
|
||||
* be bigger. To be used with focus on physical disk content: ARC, DDT, scrub.
|
||||
* - BP_GET_RAW_PHYSICAL_BIRTH(): Raw physical birth value. Zero if equal
|
||||
* to logical birth. Should only be used for BP copying and debugging.
|
||||
* - BP_GET_BIRTH(): When the block was allocated, which is a physical birth
|
||||
* for rewritten blocks (rewrite flag set) or logical birth otherwise.
|
||||
*/
|
||||
#define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1]
|
||||
#define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x))
|
||||
|
||||
#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
|
||||
#define BP_GET_RAW_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
|
||||
#define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x))
|
||||
|
||||
#define BP_GET_BIRTH(bp) \
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \
|
||||
#define BP_GET_PHYSICAL_BIRTH(bp) \
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
BP_GET_RAW_PHYSICAL_BIRTH(bp) ? BP_GET_RAW_PHYSICAL_BIRTH(bp) : \
|
||||
BP_GET_LOGICAL_BIRTH(bp))
|
||||
|
||||
#define BP_SET_BIRTH(bp, logical, physical) \
|
||||
{ \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
BP_SET_LOGICAL_BIRTH(bp, logical); \
|
||||
BP_SET_PHYSICAL_BIRTH(bp, \
|
||||
((logical) == (physical) ? 0 : (physical))); \
|
||||
#define BP_GET_BIRTH(bp) \
|
||||
((BP_IS_EMBEDDED(bp) || !BP_GET_REWRITE(bp)) ? \
|
||||
BP_GET_LOGICAL_BIRTH(bp) : BP_GET_PHYSICAL_BIRTH(bp))
|
||||
|
||||
#define BP_SET_BIRTH(bp, logical, physical) \
|
||||
{ \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
BP_SET_LOGICAL_BIRTH(bp, logical); \
|
||||
BP_SET_PHYSICAL_BIRTH(bp, \
|
||||
((logical) == (physical) ? 0 : (physical))); \
|
||||
}
|
||||
|
||||
#define BP_GET_FILL(bp) \
|
||||
((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
|
||||
((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
|
||||
(BP_IS_EMBEDDED(bp) ? 1 : \
|
||||
BP_IS_ENCRYPTED(bp) ? BF64_GET((bp)->blk_fill, 0, 32) : \
|
||||
(bp)->blk_fill)
|
||||
|
||||
#define BP_SET_FILL(bp, fill) \
|
||||
{ \
|
||||
if (BP_IS_ENCRYPTED(bp)) \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
if (BP_IS_ENCRYPTED(bp)) \
|
||||
BF64_SET((bp)->blk_fill, 0, 32, fill); \
|
||||
else \
|
||||
(bp)->blk_fill = fill; \
|
||||
@@ -516,6 +537,15 @@ typedef struct blkptr {
|
||||
BF64_SET((bp)->blk_fill, 32, 32, iv2); \
|
||||
}
|
||||
|
||||
#define BP_GET_REWRITE(bp) \
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : BF64_GET((bp)->blk_prop2, 63, 1))
|
||||
|
||||
#define BP_SET_REWRITE(bp, x) \
|
||||
{ \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET((bp)->blk_prop2, 63, 1, x); \
|
||||
}
|
||||
|
||||
#define BP_IS_METADATA(bp) \
|
||||
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
|
||||
|
||||
@@ -545,7 +575,7 @@ typedef struct blkptr {
|
||||
(dva1)->dva_word[0] == (dva2)->dva_word[0])
|
||||
|
||||
#define BP_EQUAL(bp1, bp2) \
|
||||
(BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \
|
||||
(BP_GET_PHYSICAL_BIRTH(bp1) == BP_GET_PHYSICAL_BIRTH(bp2) && \
|
||||
BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
|
||||
@@ -588,8 +618,8 @@ typedef struct blkptr {
|
||||
{ \
|
||||
BP_ZERO_DVAS(bp); \
|
||||
(bp)->blk_prop = 0; \
|
||||
(bp)->blk_pad[0] = 0; \
|
||||
(bp)->blk_pad[1] = 0; \
|
||||
(bp)->blk_prop2 = 0; \
|
||||
(bp)->blk_pad = 0; \
|
||||
(bp)->blk_birth_word[0] = 0; \
|
||||
(bp)->blk_birth_word[1] = 0; \
|
||||
(bp)->blk_fill = 0; \
|
||||
@@ -696,7 +726,7 @@ typedef struct blkptr {
|
||||
(u_longlong_t)BP_GET_LSIZE(bp), \
|
||||
(u_longlong_t)BP_GET_PSIZE(bp), \
|
||||
(u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \
|
||||
(u_longlong_t)BP_GET_BIRTH(bp), \
|
||||
(u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp), \
|
||||
(u_longlong_t)BP_GET_FILL(bp), \
|
||||
ws, \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[0], \
|
||||
|
||||
Reference in New Issue
Block a user