mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 11:40:25 +03:00
Fix send/recv lost spill block
When receiving a DRR_OBJECT record the receive_object() function needs to determine how to handle a spill block associated with the object. It may need to be removed or kept depending on how the object was modified at the source. This determination is currently accomplished using a heuristic which takes in to account the DRR_OBJECT record and the existing object properties. This is a problem because there isn't quite enough information available to do the right thing under all circumstances. For example, when only the block size changes the spill block is removed when it should be kept. What's needed to resolve this is an additional flag in the DRR_OBJECT which indicates if the object being received references a spill block. The DRR_OBJECT_SPILL flag was added for this purpose. When set then the object references a spill block and it must be kept. Either it is update to date, or it will be replaced by a subsequent DRR_SPILL record. Conversely, if the object being received doesn't reference a spill block then any existing spill block should always be removed. Since previous versions of ZFS do not understand this new flag additional DRR_SPILL records will be inserted in to the stream. This has the advantage of being fully backward compatible. Existing ZFS systems receiving this stream will recreate the spill block if it was incorrectly removed. Updated ZFS versions will correctly ignore the additional spill blocks which can be identified by checking for the DRR_SPILL_UNMODIFIED flag. The small downside to this approach is that is may increase the size of the stream and of the received snapshot on previous versions of ZFS. Additionally, when receiving streams generated by previous unpatched versions of ZFS spill blocks may still be lost. OpenZFS-issue: https://www.illumos.org/issues/9952 FreeBSD-issue: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=233277 Reviewed-by: Paul Dagnelie <pcd@delphix.com> Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: Tom Caputi <tcaputi@datto.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #8668
This commit is contained in:
parent
9c53e51616
commit
caf9dd209f
@ -420,7 +420,8 @@ int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
|
||||
int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object,
|
||||
dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype,
|
||||
int bonuslen, int dnodesize, dmu_tx_t *txp);
|
||||
int bonuslen, int dnodesize, boolean_t keep_spill, dmu_tx_t *tx);
|
||||
int dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free an object from this objset.
|
||||
|
@ -265,6 +265,7 @@ typedef struct dmu_sendarg {
|
||||
objset_t *dsa_os;
|
||||
zio_cksum_t dsa_zc;
|
||||
uint64_t dsa_toguid;
|
||||
uint64_t dsa_fromtxg;
|
||||
int dsa_err;
|
||||
dmu_pendop_t dsa_pending_op;
|
||||
uint64_t dsa_featureflags;
|
||||
|
@ -48,6 +48,7 @@ typedef struct dmu_recv_cookie {
|
||||
boolean_t drc_resumable;
|
||||
boolean_t drc_raw;
|
||||
boolean_t drc_clone;
|
||||
boolean_t drc_spill;
|
||||
struct avl_tree *drc_guid_to_ds_map;
|
||||
nvlist_t *drc_keynvl;
|
||||
zio_cksum_t drc_cksum;
|
||||
|
@ -267,8 +267,8 @@ typedef struct dnode_phys {
|
||||
};
|
||||
} dnode_phys_t;
|
||||
|
||||
#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \
|
||||
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
|
||||
#define DN_SPILL_BLKPTR(dnp) ((blkptr_t *)((char *)(dnp) + \
|
||||
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)))
|
||||
|
||||
struct dnode {
|
||||
/*
|
||||
@ -420,7 +420,8 @@ void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
|
||||
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
|
||||
dmu_object_type_t bonustype, int bonuslen, int dn_slots,
|
||||
boolean_t keep_spill, dmu_tx_t *tx);
|
||||
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_byteswap(dnode_phys_t *dnp);
|
||||
void dnode_buf_byteswap(void *buf, size_t size);
|
||||
|
@ -1317,6 +1317,7 @@ typedef enum {
|
||||
ZFS_ERR_WRONG_PARENT,
|
||||
ZFS_ERR_FROM_IVSET_GUID_MISSING,
|
||||
ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
|
||||
ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
|
||||
} zfs_errno_t;
|
||||
|
||||
/*
|
||||
|
@ -101,7 +101,7 @@ typedef enum drr_headertype {
|
||||
/* flag #18 is reserved for a Delphix feature */
|
||||
#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
|
||||
#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
|
||||
/* flag #21 is reserved for a Delphix feature */
|
||||
/* flag #21 is reserved for the redacted send/receive feature */
|
||||
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
|
||||
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
|
||||
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
|
||||
@ -160,16 +160,38 @@ typedef enum dmu_send_resume_token_version {
|
||||
* cannot necessarily be received as a clone correctly.
|
||||
*/
|
||||
#define DRR_FLAG_FREERECORDS (1<<2)
|
||||
/*
|
||||
* When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL
|
||||
* and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream.
|
||||
*
|
||||
* When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have
|
||||
* DRR_OBJECT_SPILL set if and only if they should have a spill block
|
||||
* (either an existing one, or a new one in the send stream). When clear
|
||||
* the object does not have a spill block and any existing spill block
|
||||
* should be freed.
|
||||
*
|
||||
* Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will
|
||||
* have DRR_SPILL_UNMODIFIED set if and only if they were included for
|
||||
* backward compatibility purposes, and can be safely ignored by new versions
|
||||
* of zfs receive. Previous versions of ZFS which do not understand the
|
||||
* DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing
|
||||
* spill blocks.
|
||||
*/
|
||||
#define DRR_FLAG_SPILL_BLOCK (1<<3)
|
||||
|
||||
/*
|
||||
* flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
|
||||
* DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
|
||||
*/
|
||||
#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
|
||||
#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for SPILL records */
|
||||
#define DRR_RAW_BYTESWAP (1<<1)
|
||||
#define DRR_OBJECT_SPILL (1<<2) /* OBJECT record has a spill block */
|
||||
#define DRR_SPILL_UNMODIFIED (1<<2) /* SPILL record for unmodified block */
|
||||
|
||||
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
|
||||
#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
|
||||
#define DRR_OBJECT_HAS_SPILL(flags) ((flags) & DRR_OBJECT_SPILL)
|
||||
#define DRR_SPILL_IS_UNMODIFIED(flags) ((flags) & DRR_SPILL_UNMODIFIED)
|
||||
|
||||
/* deal with compressed drr_write replay records */
|
||||
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
|
||||
|
@ -4466,6 +4466,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
|
||||
"of raw encrypted send streams."));
|
||||
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
|
||||
break;
|
||||
case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"Spill block flag missing for raw send.\n"
|
||||
"The zfs software on the sending system must "
|
||||
"be updated."));
|
||||
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
|
||||
break;
|
||||
case EBUSY:
|
||||
if (hastoken) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
|
@ -2337,6 +2337,21 @@ Allow sending of corrupt data (ignore read/checksum errors when sending data)
|
||||
Use \fB1\fR for yes and \fB0\fR for no (default).
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_send_unmodified_spill_blocks\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
Include unmodified spill blocks in the send stream. Under certain circumstances
|
||||
previous versions of ZFS could incorrectly remove the spill block from an
|
||||
existing object. Including unmodified copies of the spill blocks creates a
|
||||
backwards compatible stream which will recreate a spill block if it was
|
||||
incorrectly removed.
|
||||
.sp
|
||||
Use \fB1\fR for yes (default) and \fB0\fR for no.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
@ -2355,7 +2370,6 @@ Default value: \fB16,777,216\fR.
|
||||
\fBzfs_recv_queue_length\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
.sp
|
||||
The maximum number of bytes allowed in the \fBzfs receive\fR queue. This value
|
||||
must be at least twice the maximum block size in use.
|
||||
.sp
|
||||
|
@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
||||
ASSERT(db->db_level == 0);
|
||||
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
|
||||
ASSERT(buf != NULL);
|
||||
ASSERT(arc_buf_lsize(buf) == db->db.db_size);
|
||||
ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
|
||||
arc_return_buf(buf, db);
|
||||
|
@ -24,6 +24,7 @@
|
||||
* Copyright 2014 HybridCluster. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
@ -304,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
|
||||
{
|
||||
return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
|
||||
bonuslen, DNODE_MIN_SIZE, tx));
|
||||
bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
|
||||
}
|
||||
|
||||
int
|
||||
dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
|
||||
dmu_tx_t *tx)
|
||||
boolean_t keep_spill, dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int dn_slots = dnodesize >> DNODE_SHIFT;
|
||||
@ -327,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
|
||||
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
|
||||
keep_spill, tx);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int err;
|
||||
|
||||
err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
|
||||
FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
||||
dbuf_rm_spill(dn, tx);
|
||||
dnode_rm_spill(dn, tx);
|
||||
}
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
return (err);
|
||||
@ -489,6 +513,7 @@ EXPORT_SYMBOL(dmu_object_claim);
|
||||
EXPORT_SYMBOL(dmu_object_claim_dnsize);
|
||||
EXPORT_SYMBOL(dmu_object_reclaim);
|
||||
EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
|
||||
EXPORT_SYMBOL(dmu_object_rm_spill);
|
||||
EXPORT_SYMBOL(dmu_object_free);
|
||||
EXPORT_SYMBOL(dmu_object_next);
|
||||
EXPORT_SYMBOL(dmu_object_zapify);
|
||||
|
@ -274,6 +274,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
|
||||
/* embedded data is incompatible with encryption and raw recv */
|
||||
if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* raw receives require spill block allocation flag */
|
||||
if (!(flags & DRR_FLAG_SPILL_BLOCK))
|
||||
return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
|
||||
} else {
|
||||
dsflags |= DS_HOLD_FLAG_DECRYPT;
|
||||
}
|
||||
@ -615,8 +619,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
|
||||
(void) snprintf(recvname, sizeof (recvname), "%s/%s",
|
||||
tofs, recv_clone_name);
|
||||
|
||||
if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0)
|
||||
if (featureflags & DMU_BACKUP_FEATURE_RAW) {
|
||||
/* raw receives require spill block allocation flag */
|
||||
if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK))
|
||||
return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
|
||||
} else {
|
||||
dsflags |= DS_HOLD_FLAG_DECRYPT;
|
||||
}
|
||||
|
||||
if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
|
||||
/* %recv does not exist; continue in tofs */
|
||||
@ -764,6 +773,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)
|
||||
drc->drc_spill = B_TRUE;
|
||||
|
||||
drba.drba_origin = origin;
|
||||
drba.drba_cookie = drc;
|
||||
drba.drba_cred = CRED();
|
||||
@ -835,7 +847,8 @@ struct receive_writer_arg {
|
||||
/* A map from guid to dataset to help handle dedup'd streams. */
|
||||
avl_tree_t *guid_to_ds_map;
|
||||
boolean_t resumable;
|
||||
boolean_t raw;
|
||||
boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */
|
||||
boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
|
||||
uint64_t last_object;
|
||||
uint64_t last_offset;
|
||||
uint64_t max_object; /* highest object ID referenced in stream */
|
||||
@ -1151,12 +1164,21 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
||||
drro->drr_raw_bonuslen)
|
||||
return (SET_ERROR(EINVAL));
|
||||
} else {
|
||||
if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 ||
|
||||
drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 ||
|
||||
drro->drr_nblkptr != 0)
|
||||
/*
|
||||
* The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN
|
||||
* record indicates this by setting DRR_FLAG_SPILL_BLOCK.
|
||||
*/
|
||||
if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) ||
|
||||
(!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) {
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 ||
|
||||
drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) {
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
}
|
||||
|
||||
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
|
||||
if (err != 0 && err != ENOENT && err != EEXIST)
|
||||
return (SET_ERROR(EINVAL));
|
||||
@ -1312,7 +1334,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
||||
}
|
||||
|
||||
if (object == DMU_NEW_OBJECT) {
|
||||
/* currently free, want to be allocated */
|
||||
/* Currently free, wants to be allocated */
|
||||
err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
|
||||
drro->drr_type, drro->drr_blksz,
|
||||
drro->drr_bonustype, drro->drr_bonuslen,
|
||||
@ -1321,11 +1343,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
||||
drro->drr_blksz != doi.doi_data_block_size ||
|
||||
drro->drr_bonustype != doi.doi_bonus_type ||
|
||||
drro->drr_bonuslen != doi.doi_bonus_size) {
|
||||
/* currently allocated, but with different properties */
|
||||
/* Currently allocated, but with different properties */
|
||||
err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
|
||||
drro->drr_type, drro->drr_blksz,
|
||||
drro->drr_bonustype, drro->drr_bonuslen,
|
||||
dn_slots << DNODE_SHIFT, tx);
|
||||
dn_slots << DNODE_SHIFT, rwa->spill ?
|
||||
DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
|
||||
} else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) {
|
||||
/*
|
||||
* Currently allocated, the existing version of this object
|
||||
* may reference a spill block that is no longer allocated
|
||||
* at the source and needs to be freed.
|
||||
*/
|
||||
err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx);
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
@ -1665,6 +1695,17 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/*
|
||||
* This is an unmodified spill block which was added to the stream
|
||||
* to resolve an issue with incorrectly removing spill blocks. It
|
||||
* should be ignored by current versions of the code which support
|
||||
* the DRR_FLAG_SPILL_BLOCK flag.
|
||||
*/
|
||||
if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
|
||||
dmu_return_arcbuf(abuf);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (rwa->raw) {
|
||||
if (!DMU_OT_IS_VALID(drrs->drr_type) ||
|
||||
drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
|
||||
@ -1699,9 +1740,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
return (err);
|
||||
}
|
||||
|
||||
if (db_spill->db_size < drrs->drr_length)
|
||||
/*
|
||||
* Spill blocks may both grow and shrink. When a change in size
|
||||
* occurs any existing dbuf must be updated to match the logical
|
||||
* size of the provided arc_buf_t.
|
||||
*/
|
||||
if (db_spill->db_size != drrs->drr_length) {
|
||||
dmu_buf_will_fill(db_spill, tx);
|
||||
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
|
||||
drrs->drr_length, tx));
|
||||
}
|
||||
|
||||
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
|
||||
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
|
||||
@ -2575,6 +2623,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
||||
rwa->byteswap = drc->drc_byteswap;
|
||||
rwa->resumable = drc->drc_resumable;
|
||||
rwa->raw = drc->drc_raw;
|
||||
rwa->spill = drc->drc_spill;
|
||||
rwa->os->os_raw_receive = drc->drc_raw;
|
||||
|
||||
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
|
||||
|
@ -64,6 +64,8 @@ int zfs_send_corrupt_data = B_FALSE;
|
||||
int zfs_send_queue_length = SPA_MAXBLOCKSIZE;
|
||||
/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
|
||||
int zfs_send_set_freerecords_bit = B_TRUE;
|
||||
/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
|
||||
int zfs_send_unmodified_spill_blocks = B_TRUE;
|
||||
|
||||
/*
|
||||
* Use this to override the recordsize calculation for fast zfs send estimates.
|
||||
@ -99,6 +101,8 @@ typedef struct dump_bytes_io {
|
||||
int dbi_len;
|
||||
} dump_bytes_io_t;
|
||||
|
||||
static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
|
||||
|
||||
static void
|
||||
dump_bytes_cb(void *arg)
|
||||
{
|
||||
@ -436,6 +440,12 @@ dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
|
||||
drrs->drr_length = blksz;
|
||||
drrs->drr_toguid = dsp->dsa_toguid;
|
||||
|
||||
/* See comment in dump_dnode() for full details */
|
||||
if (zfs_send_unmodified_spill_blocks &&
|
||||
(bp->blk_birth <= dsp->dsa_fromtxg)) {
|
||||
drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
|
||||
}
|
||||
|
||||
/* handle raw send fields */
|
||||
if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
|
||||
ASSERT(BP_IS_PROTECTED(bp));
|
||||
@ -587,6 +597,14 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* DRR_OBJECT_SPILL is set for every dnode which references a
|
||||
* spill block. This allows the receiving pool to definitively
|
||||
* determine when a spill block should be kept or freed.
|
||||
*/
|
||||
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
|
||||
drro->drr_flags |= DRR_OBJECT_SPILL;
|
||||
|
||||
if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
|
||||
return (SET_ERROR(EINTR));
|
||||
|
||||
@ -594,8 +612,34 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
|
||||
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
|
||||
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
|
||||
return (SET_ERROR(EINTR));
|
||||
|
||||
/*
|
||||
* Send DRR_SPILL records for unmodified spill blocks. This is useful
|
||||
* because changing certain attributes of the object (e.g. blocksize)
|
||||
* can cause old versions of ZFS to incorrectly remove a spill block.
|
||||
* Including these records in the stream forces an up to date version
|
||||
* to always be written ensuring they're never lost. Current versions
|
||||
* of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
|
||||
* ignore these unmodified spill blocks.
|
||||
*/
|
||||
if (zfs_send_unmodified_spill_blocks &&
|
||||
(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
|
||||
(DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
|
||||
struct send_block_record record;
|
||||
|
||||
bzero(&record, sizeof (struct send_block_record));
|
||||
record.eos_marker = B_FALSE;
|
||||
record.bp = *DN_SPILL_BLKPTR(dnp);
|
||||
SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
|
||||
object, 0, DMU_SPILL_BLKID);
|
||||
|
||||
if (do_dump(dsp, &record) != 0)
|
||||
return (SET_ERROR(EINTR));
|
||||
}
|
||||
|
||||
if (dsp->dsa_err != 0)
|
||||
return (SET_ERROR(EINTR));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1036,6 +1080,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
|
||||
/* raw send implies compressok */
|
||||
if (compressok || rawok)
|
||||
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
|
||||
|
||||
if (rawok && os->os_encrypted)
|
||||
featureflags |= DMU_BACKUP_FEATURE_RAW;
|
||||
|
||||
@ -1064,6 +1109,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
|
||||
if (zfs_send_set_freerecords_bit)
|
||||
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
|
||||
|
||||
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
|
||||
|
||||
if (ancestor_zb != NULL) {
|
||||
drr->drr_u.drr_begin.drr_fromguid =
|
||||
ancestor_zb->zbm_guid;
|
||||
@ -1084,6 +1131,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
|
||||
dsp->dsa_os = os;
|
||||
dsp->dsa_off = off;
|
||||
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
|
||||
dsp->dsa_fromtxg = fromtxg;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
dsp->dsa_featureflags = featureflags;
|
||||
dsp->dsa_resume_object = resumeobj;
|
||||
@ -1552,4 +1600,8 @@ MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data");
|
||||
|
||||
module_param(zfs_send_queue_length, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length");
|
||||
|
||||
module_param(zfs_send_unmodified_spill_blocks, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks,
|
||||
"Send unmodified spill blocks");
|
||||
#endif
|
||||
|
@ -660,7 +660,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
|
||||
void
|
||||
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
|
||||
dmu_object_type_t bonustype, int bonuslen, int dn_slots,
|
||||
boolean_t keep_spill, dmu_tx_t *tx)
|
||||
{
|
||||
int nblkptr;
|
||||
|
||||
@ -710,7 +711,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = bonustype;
|
||||
if (dn->dn_nblkptr != nblkptr)
|
||||
dn->dn_next_nblkptr[tx->tx_txg & TXG_MASK] = nblkptr;
|
||||
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
||||
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) {
|
||||
dbuf_rm_spill(dn, tx);
|
||||
dnode_rm_spill(dn, tx);
|
||||
}
|
||||
|
@ -807,8 +807,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
|
||||
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy',
|
||||
'send_encrypted_props', 'send_encrypted_truncated_files',
|
||||
'send_freeobjects', 'send_realloc_dnode_size', 'send_realloc_files',
|
||||
'send_realloc_encrypted_files', 'send_holds', 'send_hole_birth',
|
||||
'send_mixed_raw', 'send-wDR_encrypted_zvol']
|
||||
'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
|
||||
'send_hole_birth', 'send_mixed_raw', 'send-wDR_encrypted_zvol']
|
||||
tags = ['functional', 'rsend']
|
||||
|
||||
[tests/functional/scrub_mirror]
|
||||
|
@ -44,6 +44,7 @@ dist_pkgdata_SCRIPTS = \
|
||||
send_realloc_dnode_size.ksh \
|
||||
send_realloc_files.ksh \
|
||||
send_realloc_encrypted_files.ksh \
|
||||
send_spill_block.ksh \
|
||||
send_holds.ksh \
|
||||
send_hole_birth.ksh \
|
||||
send_mixed_raw.ksh \
|
||||
|
@ -30,6 +30,7 @@
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/include/math.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
|
||||
. $STF_SUITE/tests/functional/rsend/rsend.cfg
|
||||
|
||||
#
|
||||
@ -518,9 +519,13 @@ function churn_files
|
||||
value=$((RANDOM % 5))
|
||||
if [ $value -eq 0 -a $xattrs -ne 0 ]; then
|
||||
attrname="testattr$((RANDOM % 3))"
|
||||
attrlen="$(((RANDOM % 1000) + 1))"
|
||||
attrvalue="$(random_string VALID_NAME_CHAR \
|
||||
$attrlen)"
|
||||
attr -qr $attrname $file_name || \
|
||||
log_fail "Failed to remove $attrname"
|
||||
attr -qs $attrname -V TestValue $file_name || \
|
||||
attr -qs $attrname \
|
||||
-V "$attrvalue" $file_name || \
|
||||
log_fail "Failed to set $attrname"
|
||||
elif [ $value -eq 1 ]; then
|
||||
dd if=/dev/urandom of=$file_name \
|
||||
@ -548,9 +553,12 @@ function churn_files
|
||||
if [ $xattrs -ne 0 ]; then
|
||||
for j in {0..2}; do
|
||||
attrname="testattr$j"
|
||||
attr -qs $attrname -V TestValue \
|
||||
$file_name || log_fail \
|
||||
"Failed to set $attrname"
|
||||
attrlen="$(((RANDOM % 1000) + 1))"
|
||||
attrvalue="$(random_string \
|
||||
VALID_NAME_CHAR $attrlen)"
|
||||
attr -qs $attrname \
|
||||
-V "$attrvalue" $file_name || \
|
||||
log_fail "Failed to set $attrname"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
@ -791,10 +799,11 @@ function rand_set_prop
|
||||
log_must eval "zfs set $prop='$value' $dtst"
|
||||
}
|
||||
|
||||
# Generate a recursive checksum of a filesystems contents. Only file
|
||||
# data is included in the checksum (no meta data, or xattrs).
|
||||
# Generate a recursive checksum of a filesystem which includes the file
|
||||
# contents and any associated xattrs.
|
||||
function recursive_cksum
|
||||
{
|
||||
find $1 -type f -exec sha256sum {} \; | \
|
||||
find $1 -type f -exec sh -c 'sha256sum {}; getfattr \
|
||||
--absolute-names --only-values -d {} | sha256sum' \; | \
|
||||
sort -k 2 | awk '{ print $1 }' | sha256sum
|
||||
}
|
||||
|
@ -65,7 +65,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}"
|
||||
# Set atime=off to prevent the recursive_cksum from modifying newfs.
|
||||
log_must zfs set atime=off $POOL/newfs
|
||||
|
||||
for i in {1..5}; do
|
||||
# Due to reduced performance on debug kernels use fewer files by default.
|
||||
if is_kmemleak; then
|
||||
nr_files=100
|
||||
passes=2
|
||||
else
|
||||
nr_files=1000
|
||||
passes=3
|
||||
fi
|
||||
|
||||
for i in {1..$passes}; do
|
||||
# Randomly modify several dataset properties in order to generate
|
||||
# more interesting incremental send streams.
|
||||
rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
|
||||
@ -76,12 +85,8 @@ for i in {1..5}; do
|
||||
|
||||
# Churn the filesystem in such a way that we're likely to be both
|
||||
# allocating and reallocating objects in the incremental stream.
|
||||
#
|
||||
# Disable xattrs until the following spill block issue is resolved:
|
||||
# https://github.com/openzfs/openzfs/pull/705
|
||||
#
|
||||
log_must churn_files 1000 524288 $POOL/fs 0
|
||||
expected_cksum=$(recursive_cksum /$fs)
|
||||
log_must churn_files $nr_files 524288 $POOL/fs
|
||||
expected_cksum=$(recursive_cksum /$POOL/fs)
|
||||
|
||||
# Create a snapshot and use it to send an incremental stream.
|
||||
this_snap=$((last_snap + 1))
|
||||
|
@ -35,6 +35,8 @@
|
||||
# e) Destroy the incremental stream and old snapshot.
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
log_assert "Verify incremental receive handles reallocation"
|
||||
|
||||
function cleanup
|
||||
@ -56,7 +58,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}"
|
||||
# Set atime=off to prevent the recursive_cksum from modifying newfs.
|
||||
log_must zfs set atime=off $POOL/newfs
|
||||
|
||||
for i in {1..5}; do
|
||||
# Due to reduced performance on debug kernels use fewer files by default.
|
||||
if is_kmemleak; then
|
||||
nr_files=100
|
||||
passes=2
|
||||
else
|
||||
nr_files=1000
|
||||
passes=3
|
||||
fi
|
||||
|
||||
for i in {1..$passes}; do
|
||||
# Randomly modify several dataset properties in order to generate
|
||||
# more interesting incremental send streams.
|
||||
rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
|
||||
@ -67,8 +78,8 @@ for i in {1..5}; do
|
||||
|
||||
# Churn the filesystem in such a way that we're likely to be both
|
||||
# allocating and reallocating objects in the incremental stream.
|
||||
log_must churn_files 1000 524288 $POOL/fs
|
||||
expected_cksum=$(recursive_cksum /$fs)
|
||||
log_must churn_files $nr_files 524288 $POOL/fs
|
||||
expected_cksum=$(recursive_cksum /$POOL/fs)
|
||||
|
||||
# Create a snapshot and use it to send an incremental stream.
|
||||
this_snap=$((last_snap + 1))
|
||||
|
155
tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
Executable file
155
tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
Executable file
@ -0,0 +1,155 @@
|
||||
#!/bin/ksh
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/rsend/rsend.kshlib
|
||||
|
||||
#
|
||||
# Description:
|
||||
# Verify spill blocks are correctly preserved.
|
||||
#
|
||||
# Strategy:
|
||||
# 1) Create a set of files each containing some file data.
|
||||
# 2) Add enough xattrs to the file to require a spill block.
|
||||
# 3) Snapshot and send these files to a new dataset.
|
||||
# 4) Modify the files and spill blocks in a variety of ways.
|
||||
# 5) Send the changes using an incremental send stream.
|
||||
# 6) Verify that all the xattrs (and thus the spill block) were
|
||||
# preserved when receiving the incremental stream.
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
log_assert "Verify spill blocks are correctly preserved"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
rm -f $BACKDIR/fs@*
|
||||
destroy_dataset $POOL/fs "-rR"
|
||||
destroy_dataset $POOL/newfs "-rR"
|
||||
}
|
||||
|
||||
attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zfs create $POOL/fs
|
||||
log_must zfs set xattr=sa $POOL/fs
|
||||
log_must zfs set dnodesize=legacy $POOL/fs
|
||||
log_must zfs set recordsize=128k $POOL/fs
|
||||
|
||||
# Create 40 files each with a spill block containing xattrs. Each file
|
||||
# will be modified in a different way to validate the incremental receive.
|
||||
for i in {1..40}; do
|
||||
file="/$POOL/fs/file$i"
|
||||
|
||||
log_must mkfile 16384 $file
|
||||
for j in {1..20}; do
|
||||
log_must attr -qs "testattr$j" -V "$attrvalue" $file
|
||||
done
|
||||
done
|
||||
|
||||
# Snapshot the pool and send it to the new dataset.
|
||||
log_must zfs snapshot $POOL/fs@snap1
|
||||
log_must eval "zfs send -e $POOL/fs@snap1 >$BACKDIR/fs@snap1"
|
||||
log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap1"
|
||||
|
||||
#
|
||||
# Modify file[1-6]'s contents but not the spill blocks.
|
||||
#
|
||||
# file1 - Increase record size; single block
|
||||
# file2 - Increase record size; multiple blocks
|
||||
# file3 - Truncate file to zero size; single block
|
||||
# file4 - Truncate file to smaller size; single block
|
||||
# file5 - Truncate file to much larger size; add holes
|
||||
# file6 - Truncate file to embedded size; embedded data
|
||||
#
|
||||
log_must mkfile 32768 /$POOL/fs/file1
|
||||
log_must mkfile 1048576 /$POOL/fs/file2
|
||||
log_must truncate -s 0 /$POOL/fs/file3
|
||||
log_must truncate -s 8192 /$POOL/fs/file4
|
||||
log_must truncate -s 1073741824 /$POOL/fs/file5
|
||||
log_must truncate -s 50 /$POOL/fs/file6
|
||||
|
||||
#
|
||||
# Modify file[11-16]'s contents and their spill blocks.
|
||||
#
|
||||
# file11 - Increase record size; single block
|
||||
# file12 - Increase record size; multiple blocks
|
||||
# file13 - Truncate file to zero size; single block
|
||||
# file14 - Truncate file to smaller size; single block
|
||||
# file15 - Truncate file to much larger size; add holes
|
||||
# file16 - Truncate file to embedded size; embedded data
|
||||
#
|
||||
log_must mkfile 32768 /$POOL/fs/file11
|
||||
log_must mkfile 1048576 /$POOL/fs/file12
|
||||
log_must truncate -s 0 /$POOL/fs/file13
|
||||
log_must truncate -s 8192 /$POOL/fs/file14
|
||||
log_must truncate -s 1073741824 /$POOL/fs/file15
|
||||
log_must truncate -s 50 /$POOL/fs/file16
|
||||
|
||||
for i in {11..20}; do
|
||||
log_must attr -qr testattr1 /$POOL/fs/file$i
|
||||
done
|
||||
|
||||
#
|
||||
# Modify file[21-26]'s contents and remove their spill blocks.
|
||||
#
|
||||
# file21 - Increase record size; single block
|
||||
# file22 - Increase record size; multiple blocks
|
||||
# file23 - Truncate file to zero size; single block
|
||||
# file24 - Truncate file to smaller size; single block
|
||||
# file25 - Truncate file to much larger size; add holes
|
||||
# file26 - Truncate file to embedded size; embedded data
|
||||
#
|
||||
log_must mkfile 32768 /$POOL/fs/file21
|
||||
log_must mkfile 1048576 /$POOL/fs/file22
|
||||
log_must truncate -s 0 /$POOL/fs/file23
|
||||
log_must truncate -s 8192 /$POOL/fs/file24
|
||||
log_must truncate -s 1073741824 /$POOL/fs/file25
|
||||
log_must truncate -s 50 /$POOL/fs/file26
|
||||
|
||||
for i in {21..30}; do
|
||||
for j in {1..20}; do
|
||||
log_must attr -qr testattr$j /$POOL/fs/file$i
|
||||
done
|
||||
done
|
||||
|
||||
#
|
||||
# Modify file[31-40]'s spill blocks but not the file contents.
|
||||
#
|
||||
for i in {31..40}; do
|
||||
file="/$POOL/fs/file$i"
|
||||
log_must attr -qr testattr$(((RANDOM % 20) + 1)) $file
|
||||
log_must attr -qs testattr$(((RANDOM % 20) + 1)) -V "$attrvalue" $file
|
||||
done
|
||||
|
||||
# Calculate the expected recursive checksum for the source.
|
||||
expected_cksum=$(recursive_cksum /$POOL/fs)
|
||||
|
||||
# Snapshot the pool and send the incremental snapshot.
|
||||
log_must zfs snapshot $POOL/fs@snap2
|
||||
log_must eval "zfs send -e -i $POOL/fs@snap1 $POOL/fs@snap2 >$BACKDIR/fs@snap2"
|
||||
log_must eval "zfs recv -F $POOL/newfs < $BACKDIR/fs@snap2"
|
||||
|
||||
# Validate the received copy using the received recursive checksum.
|
||||
actual_cksum=$(recursive_cksum /$POOL/newfs)
|
||||
if [[ "$expected_cksum" != "$actual_cksum" ]]; then
|
||||
log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
|
||||
fi
|
||||
|
||||
log_pass "Verify spill blocks are correctly preserved"
|
Loading…
Reference in New Issue
Block a user