diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 93d05aac4..88c836171 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -420,7 +420,8 @@ int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp); int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, - int bonuslen, int dnodesize, dmu_tx_t *txp); + int bonuslen, int dnodesize, boolean_t keep_spill, dmu_tx_t *tx); +int dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx); /* * Free an object from this objset. diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 03a63077f..5e1901da4 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -265,6 +265,7 @@ typedef struct dmu_sendarg { objset_t *dsa_os; zio_cksum_t dsa_zc; uint64_t dsa_toguid; + uint64_t dsa_fromtxg; int dsa_err; dmu_pendop_t dsa_pending_op; uint64_t dsa_featureflags; diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h index 90002026b..ffa89249d 100644 --- a/include/sys/dmu_recv.h +++ b/include/sys/dmu_recv.h @@ -48,6 +48,7 @@ typedef struct dmu_recv_cookie { boolean_t drc_resumable; boolean_t drc_raw; boolean_t drc_clone; + boolean_t drc_spill; struct avl_tree *drc_guid_to_ds_map; nvlist_t *drc_keynvl; zio_cksum_t drc_cksum; diff --git a/include/sys/dnode.h b/include/sys/dnode.h index accbe6945..c60258bbc 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -267,8 +267,8 @@ typedef struct dnode_phys { }; } dnode_phys_t; -#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \ - (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)) +#define DN_SPILL_BLKPTR(dnp) ((blkptr_t *)((char *)(dnp) + \ + (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))) struct dnode { /* @@ -420,7 +420,8 @@ void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); + dmu_object_type_t bonustype, int bonuslen, int dn_slots, + boolean_t keep_spill, dmu_tx_t *tx); void dnode_free(dnode_t *dn, dmu_tx_t *tx); void dnode_byteswap(dnode_phys_t *dnp); void dnode_buf_byteswap(void *buf, size_t size); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 8a532ec7e..3bcefdbfd 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1317,6 +1317,7 @@ typedef enum { ZFS_ERR_WRONG_PARENT, ZFS_ERR_FROM_IVSET_GUID_MISSING, ZFS_ERR_FROM_IVSET_GUID_MISMATCH, + ZFS_ERR_SPILL_BLOCK_FLAG_MISSING, } zfs_errno_t; /* diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index bb5b48c91..a883c3358 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -101,7 +101,7 @@ typedef enum drr_headertype { /* flag #18 is reserved for a Delphix feature */ #define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19) #define DMU_BACKUP_FEATURE_RESUMING (1 << 20) -/* flag #21 is reserved for a Delphix feature */ +/* flag #21 is reserved for the redacted send/receive feature */ #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) #define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23) #define DMU_BACKUP_FEATURE_RAW (1 << 24) @@ -131,7 +131,7 @@ typedef enum dmu_send_resume_token_version { * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | reserved | feature-flags |C|S| + * | reserved | feature-flags |C|S| * +-------+-------+-------+-------+-------+-------+-------+-------+ * * The low order two bits indicate the header type: SUBSTREAM (0x1) @@ -160,16 +160,38 @@ typedef enum dmu_send_resume_token_version { * cannot necessarily be received as a clone correctly. */ #define DRR_FLAG_FREERECORDS (1<<2) +/* + * When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL + * and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream. + * + * When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have + * DRR_OBJECT_SPILL set if and only if they should have a spill block + * (either an existing one, or a new one in the send stream). When clear + * the object does not have a spill block and any existing spill block + * should be freed. + * + * Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will + * have DRR_SPILL_UNMODIFIED set if and only if they were included for + * backward compatibility purposes, and can be safely ignored by new versions + * of zfs receive. Previous versions of ZFS which do not understand the + * DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing + * spill blocks. + */ +#define DRR_FLAG_SPILL_BLOCK (1<<3) /* * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT, * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks */ -#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */ +#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for SPILL records */ #define DRR_RAW_BYTESWAP (1<<1) +#define DRR_OBJECT_SPILL (1<<2) /* OBJECT record has a spill block */ +#define DRR_SPILL_UNMODIFIED (1<<2) /* SPILL record for unmodified block */ #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) #define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP) +#define DRR_OBJECT_HAS_SPILL(flags) ((flags) & DRR_OBJECT_SPILL) +#define DRR_SPILL_IS_UNMODIFIED(flags) ((flags) & DRR_SPILL_UNMODIFIED) /* deal with compressed drr_write replay records */ #define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 2c2eca8db..f69a46430 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -4466,6 +4466,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "of raw encrypted send streams.")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; + case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Spill block flag missing for raw send.\n" + "The zfs software on the sending system must " + "be updated.")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; case EBUSY: if (hastoken) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index ad6cd4e94..5c49670f1 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2337,6 +2337,21 @@ Allow sending of corrupt data (ignore read/checksum errors when sending data) Use \fB1\fR for yes and \fB0\fR for no (default). .RE +.sp +.ne 2 +.na +\fBzfs_send_unmodified_spill_blocks\fR (int) +.ad +.RS 12n +Include unmodified spill blocks in the send stream. Under certain circumstances +previous versions of ZFS could incorrectly remove the spill block from an +existing object. Including unmodified copies of the spill blocks creates a +backwards compatible stream which will recreate a spill block if it was +incorrectly removed. +.sp +Use \fB1\fR for yes (default) and \fB0\fR for no. +.RE + .sp .ne 2 .na @@ -2355,7 +2370,6 @@ Default value: \fB16,777,216\fR. \fBzfs_recv_queue_length\fR (int) .ad .RS 12n -.sp The maximum number of bytes allowed in the \fBzfs receive\fR queue. This value must be at least twice the maximum block size in use. .sp diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index d52a520fa..07e616f6f 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) ASSERT(db->db_level == 0); ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf)); ASSERT(buf != NULL); - ASSERT(arc_buf_lsize(buf) == db->db.db_size); + ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size); ASSERT(tx->tx_txg != 0); arc_return_buf(buf, db); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index e77ebeca5..ec78ebbdc 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -24,6 +24,7 @@ * Copyright 2014 HybridCluster. All rights reserved. */ +#include #include #include #include @@ -304,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype, - bonuslen, DNODE_MIN_SIZE, tx)); + bonuslen, DNODE_MIN_SIZE, B_FALSE, tx)); } int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize, - dmu_tx_t *tx) + boolean_t keep_spill, dmu_tx_t *tx) { dnode_t *dn; int dn_slots = dnodesize >> DNODE_SHIFT; @@ -327,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, if (err) return (err); - dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx); + dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, + keep_spill, tx); + + dnode_rele(dn, FTAG); + return (err); +} + +int +dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, + FTAG, &dn); + if (err) + return (err); + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + dbuf_rm_spill(dn, tx); + dnode_rm_spill(dn, tx); + } + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); @@ -489,6 +513,7 @@ EXPORT_SYMBOL(dmu_object_claim); EXPORT_SYMBOL(dmu_object_claim_dnsize); EXPORT_SYMBOL(dmu_object_reclaim); EXPORT_SYMBOL(dmu_object_reclaim_dnsize); +EXPORT_SYMBOL(dmu_object_rm_spill); EXPORT_SYMBOL(dmu_object_free); EXPORT_SYMBOL(dmu_object_next); EXPORT_SYMBOL(dmu_object_zapify); diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index fc5d47f5f..976b1bd46 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -274,6 +274,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* embedded data is incompatible with encryption and raw recv */ if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) return (SET_ERROR(EINVAL)); + + /* raw receives require spill block allocation flag */ + if (!(flags & DRR_FLAG_SPILL_BLOCK)) + return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING)); } else { dsflags |= DS_HOLD_FLAG_DECRYPT; } @@ -615,8 +619,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + /* raw receives require spill block allocation flag */ + if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)) + return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING)); + } else { dsflags |= DS_HOLD_FLAG_DECRYPT; + } if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ @@ -764,6 +773,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, return (SET_ERROR(EINVAL)); } + if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK) + drc->drc_spill = B_TRUE; + drba.drba_origin = origin; drba.drba_cookie = drc; drba.drba_cred = CRED(); @@ -835,7 +847,8 @@ struct receive_writer_arg { /* A map from guid to dataset to help handle dedup'd streams. */ avl_tree_t *guid_to_ds_map; boolean_t resumable; - boolean_t raw; + boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */ + boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */ uint64_t last_object; uint64_t last_offset; uint64_t max_object; /* highest object ID referenced in stream */ @@ -1151,10 +1164,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_raw_bonuslen) return (SET_ERROR(EINVAL)); } else { - if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 || - drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 || - drro->drr_nblkptr != 0) + /* + * The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN + * record indicates this by setting DRR_FLAG_SPILL_BLOCK. + */ + if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) || + (!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) { return (SET_ERROR(EINVAL)); + } + + if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 || + drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) { + return (SET_ERROR(EINVAL)); + } } err = dmu_object_info(rwa->os, drro->drr_object, &doi); @@ -1312,7 +1334,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, } if (object == DMU_NEW_OBJECT) { - /* currently free, want to be allocated */ + /* Currently free, wants to be allocated */ err = dmu_object_claim_dnsize(rwa->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, @@ -1321,11 +1343,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_blksz != doi.doi_data_block_size || drro->drr_bonustype != doi.doi_bonus_type || drro->drr_bonuslen != doi.doi_bonus_size) { - /* currently allocated, but with different properties */ + /* Currently allocated, but with different properties */ err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, - dn_slots << DNODE_SHIFT, tx); + dn_slots << DNODE_SHIFT, rwa->spill ? + DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx); + } else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) { + /* + * Currently allocated, the existing version of this object + * may reference a spill block that is no longer allocated + * at the source and needs to be freed. + */ + err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx); } if (err != 0) { @@ -1665,6 +1695,17 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os))) return (SET_ERROR(EINVAL)); + /* + * This is an unmodified spill block which was added to the stream + * to resolve an issue with incorrectly removing spill blocks. It + * should be ignored by current versions of the code which support + * the DRR_FLAG_SPILL_BLOCK flag. + */ + if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) { + dmu_return_arcbuf(abuf); + return (0); + } + if (rwa->raw) { if (!DMU_OT_IS_VALID(drrs->drr_type) || drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS || @@ -1699,9 +1740,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, return (err); } - if (db_spill->db_size < drrs->drr_length) + /* + * Spill blocks may both grow and shrink. When a change in size + * occurs any existing dbuf must be updated to match the logical + * size of the provided arc_buf_t. + */ + if (db_spill->db_size != drrs->drr_length) { + dmu_buf_will_fill(db_spill, tx); VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); + } if (rwa->byteswap && !arc_is_encrypted(abuf) && arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { @@ -2575,6 +2623,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, rwa->byteswap = drc->drc_byteswap; rwa->resumable = drc->drc_resumable; rwa->raw = drc->drc_raw; + rwa->spill = drc->drc_spill; rwa->os->os_raw_receive = drc->drc_raw; (void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc, diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index ad64d666b..a6ff5ce3e 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -64,6 +64,8 @@ int zfs_send_corrupt_data = B_FALSE; int zfs_send_queue_length = SPA_MAXBLOCKSIZE; /* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */ int zfs_send_set_freerecords_bit = B_TRUE; +/* Set this tunable to FALSE is disable sending unmodified spill blocks. */ +int zfs_send_unmodified_spill_blocks = B_TRUE; /* * Use this to override the recordsize calculation for fast zfs send estimates. @@ -99,6 +101,8 @@ typedef struct dump_bytes_io { int dbi_len; } dump_bytes_io_t; +static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data); + static void dump_bytes_cb(void *arg) { @@ -436,6 +440,12 @@ dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data) drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; + /* See comment in dump_dnode() for full details */ + if (zfs_send_unmodified_spill_blocks && + (bp->blk_birth <= dsp->dsa_fromtxg)) { + drrs->drr_flags |= DRR_SPILL_UNMODIFIED; + } + /* handle raw send fields */ if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { ASSERT(BP_IS_PROTECTED(bp)); @@ -587,6 +597,14 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, } } + /* + * DRR_OBJECT_SPILL is set for every dnode which references a + * spill block. This allows the receiving pool to definitively + * determine when a spill block should be kept or freed. + */ + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) + drro->drr_flags |= DRR_OBJECT_SPILL; + if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0) return (SET_ERROR(EINTR)); @@ -594,8 +612,34 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0) return (SET_ERROR(EINTR)); + + /* + * Send DRR_SPILL records for unmodified spill blocks. This is useful + * because changing certain attributes of the object (e.g. blocksize) + * can cause old versions of ZFS to incorrectly remove a spill block. + * Including these records in the stream forces an up to date version + * to always be written ensuring they're never lost. Current versions + * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can + * ignore these unmodified spill blocks. + */ + if (zfs_send_unmodified_spill_blocks && + (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && + (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) { + struct send_block_record record; + + bzero(&record, sizeof (struct send_block_record)); + record.eos_marker = B_FALSE; + record.bp = *DN_SPILL_BLKPTR(dnp); + SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os), + object, 0, DMU_SPILL_BLKID); + + if (do_dump(dsp, &record) != 0) + return (SET_ERROR(EINTR)); + } + if (dsp->dsa_err != 0) return (SET_ERROR(EINTR)); + return (0); } @@ -1036,6 +1080,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, /* raw send implies compressok */ if (compressok || rawok) featureflags |= DMU_BACKUP_FEATURE_COMPRESSED; + if (rawok && os->os_encrypted) featureflags |= DMU_BACKUP_FEATURE_RAW; @@ -1064,6 +1109,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (zfs_send_set_freerecords_bit) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS; + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK; + if (ancestor_zb != NULL) { drr->drr_u.drr_begin.drr_fromguid = ancestor_zb->zbm_guid; @@ -1084,6 +1131,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid; + dsp->dsa_fromtxg = fromtxg; dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_featureflags = featureflags; dsp->dsa_resume_object = resumeobj; @@ -1552,4 +1600,8 @@ MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data"); module_param(zfs_send_queue_length, int, 0644); MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length"); + +module_param(zfs_send_unmodified_spill_blocks, int, 0644); +MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks, + "Send unmodified spill blocks"); #endif diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 78a90f68f..38ec646ba 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -660,7 +660,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx) + dmu_object_type_t bonustype, int bonuslen, int dn_slots, + boolean_t keep_spill, dmu_tx_t *tx) { int nblkptr; @@ -710,7 +711,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = bonustype; if (dn->dn_nblkptr != nblkptr) dn->dn_next_nblkptr[tx->tx_txg & TXG_MASK] = nblkptr; - if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) { dbuf_rm_spill(dn, tx); dnode_rm_spill(dn, tx); } diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 746d42a22..8219cf42b 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -807,8 +807,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy', 'send_encrypted_props', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_dnode_size', 'send_realloc_files', - 'send_realloc_encrypted_files', 'send_holds', 'send_hole_birth', - 'send_mixed_raw', 'send-wDR_encrypted_zvol'] + 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds', + 'send_hole_birth', 'send_mixed_raw', 'send-wDR_encrypted_zvol'] tags = ['functional', 'rsend'] [tests/functional/scrub_mirror] diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am index 8669a51fb..585018ac2 100644 --- a/tests/zfs-tests/tests/functional/rsend/Makefile.am +++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am @@ -44,6 +44,7 @@ dist_pkgdata_SCRIPTS = \ send_realloc_dnode_size.ksh \ send_realloc_files.ksh \ send_realloc_encrypted_files.ksh \ + send_spill_block.ksh \ send_holds.ksh \ send_hole_birth.ksh \ send_mixed_raw.ksh \ diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 2ef6775e6..521a1c7eb 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -30,6 +30,7 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib . $STF_SUITE/tests/functional/rsend/rsend.cfg # @@ -518,9 +519,13 @@ function churn_files value=$((RANDOM % 5)) if [ $value -eq 0 -a $xattrs -ne 0 ]; then attrname="testattr$((RANDOM % 3))" + attrlen="$(((RANDOM % 1000) + 1))" + attrvalue="$(random_string VALID_NAME_CHAR \ + $attrlen)" attr -qr $attrname $file_name || \ log_fail "Failed to remove $attrname" - attr -qs $attrname -V TestValue $file_name || \ + attr -qs $attrname \ + -V "$attrvalue" $file_name || \ log_fail "Failed to set $attrname" elif [ $value -eq 1 ]; then dd if=/dev/urandom of=$file_name \ @@ -548,9 +553,12 @@ function churn_files if [ $xattrs -ne 0 ]; then for j in {0..2}; do attrname="testattr$j" - attr -qs $attrname -V TestValue \ - $file_name || log_fail \ - "Failed to set $attrname" + attrlen="$(((RANDOM % 1000) + 1))" + attrvalue="$(random_string \ + VALID_NAME_CHAR $attrlen)" + attr -qs $attrname \ + -V "$attrvalue" $file_name || \ + log_fail "Failed to set $attrname" done fi fi @@ -791,10 +799,11 @@ function rand_set_prop log_must eval "zfs set $prop='$value' $dtst" } -# Generate a recursive checksum of a filesystems contents. Only file -# data is included in the checksum (no meta data, or xattrs). +# Generate a recursive checksum of a filesystem which includes the file +# contents and any associated xattrs. function recursive_cksum { - find $1 -type f -exec sha256sum {} \; | \ + find $1 -type f -exec sh -c 'sha256sum {}; getfattr \ + --absolute-names --only-values -d {} | sha256sum' \; | \ sort -k 2 | awk '{ print $1 }' | sha256sum } diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh index 0649beaa3..3c3de86d9 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh @@ -65,7 +65,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}" # Set atime=off to prevent the recursive_cksum from modifying newfs. log_must zfs set atime=off $POOL/newfs -for i in {1..5}; do +# Due to reduced performance on debug kernels use fewer files by default. +if is_kmemleak; then + nr_files=100 + passes=2 +else + nr_files=1000 + passes=3 +fi + +for i in {1..$passes}; do # Randomly modify several dataset properties in order to generate # more interesting incremental send streams. rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256" @@ -76,12 +85,8 @@ for i in {1..5}; do # Churn the filesystem in such a way that we're likely to be both # allocating and reallocating objects in the incremental stream. - # - # Disable xattrs until the following spill block issue is resolved: - # https://github.com/openzfs/openzfs/pull/705 - # - log_must churn_files 1000 524288 $POOL/fs 0 - expected_cksum=$(recursive_cksum /$fs) + log_must churn_files $nr_files 524288 $POOL/fs + expected_cksum=$(recursive_cksum /$POOL/fs) # Create a snapshot and use it to send an incremental stream. this_snap=$((last_snap + 1)) diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh index 80464e05e..4b89a73d8 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh @@ -35,6 +35,8 @@ # e) Destroy the incremental stream and old snapshot. # +verify_runnable "both" + log_assert "Verify incremental receive handles reallocation" function cleanup @@ -56,7 +58,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}" # Set atime=off to prevent the recursive_cksum from modifying newfs. log_must zfs set atime=off $POOL/newfs -for i in {1..5}; do +# Due to reduced performance on debug kernels use fewer files by default. +if is_kmemleak; then + nr_files=100 + passes=2 +else + nr_files=1000 + passes=3 +fi + +for i in {1..$passes}; do # Randomly modify several dataset properties in order to generate # more interesting incremental send streams. rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256" @@ -67,8 +78,8 @@ for i in {1..5}; do # Churn the filesystem in such a way that we're likely to be both # allocating and reallocating objects in the incremental stream. - log_must churn_files 1000 524288 $POOL/fs - expected_cksum=$(recursive_cksum /$fs) + log_must churn_files $nr_files 524288 $POOL/fs + expected_cksum=$(recursive_cksum /$POOL/fs) # Create a snapshot and use it to send an incremental stream. this_snap=$((last_snap + 1)) diff --git a/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh b/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh new file mode 100755 index 000000000..9de732e22 --- /dev/null +++ b/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh @@ -0,0 +1,155 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# Description: +# Verify spill blocks are correctly preserved. +# +# Strategy: +# 1) Create a set of files each containing some file data. +# 2) Add enough xattrs to the file to require a spill block. +# 3) Snapshot and send these files to a new dataset. +# 4) Modify the files and spill blocks in a variety of ways. +# 5) Send the changes using an incremental send stream. +# 6) Verify that all the xattrs (and thus the spill block) were +# preserved when receiving the incremental stream. +# + +verify_runnable "both" + +log_assert "Verify spill blocks are correctly preserved" + +function cleanup +{ + rm -f $BACKDIR/fs@* + destroy_dataset $POOL/fs "-rR" + destroy_dataset $POOL/newfs "-rR" +} + +attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + +log_onexit cleanup + +log_must zfs create $POOL/fs +log_must zfs set xattr=sa $POOL/fs +log_must zfs set dnodesize=legacy $POOL/fs +log_must zfs set recordsize=128k $POOL/fs + +# Create 40 files each with a spill block containing xattrs. Each file +# will be modified in a different way to validate the incremental receive. +for i in {1..40}; do + file="/$POOL/fs/file$i" + + log_must mkfile 16384 $file + for j in {1..20}; do + log_must attr -qs "testattr$j" -V "$attrvalue" $file + done +done + +# Snapshot the pool and send it to the new dataset. +log_must zfs snapshot $POOL/fs@snap1 +log_must eval "zfs send -e $POOL/fs@snap1 >$BACKDIR/fs@snap1" +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap1" + +# +# Modify file[1-6]'s contents but not the spill blocks. +# +# file1 - Increase record size; single block +# file2 - Increase record size; multiple blocks +# file3 - Truncate file to zero size; single block +# file4 - Truncate file to smaller size; single block +# file5 - Truncate file to much larger size; add holes +# file6 - Truncate file to embedded size; embedded data +# +log_must mkfile 32768 /$POOL/fs/file1 +log_must mkfile 1048576 /$POOL/fs/file2 +log_must truncate -s 0 /$POOL/fs/file3 +log_must truncate -s 8192 /$POOL/fs/file4 +log_must truncate -s 1073741824 /$POOL/fs/file5 +log_must truncate -s 50 /$POOL/fs/file6 + +# +# Modify file[11-16]'s contents and their spill blocks. +# +# file11 - Increase record size; single block +# file12 - Increase record size; multiple blocks +# file13 - Truncate file to zero size; single block +# file14 - Truncate file to smaller size; single block +# file15 - Truncate file to much larger size; add holes +# file16 - Truncate file to embedded size; embedded data +# +log_must mkfile 32768 /$POOL/fs/file11 +log_must mkfile 1048576 /$POOL/fs/file12 +log_must truncate -s 0 /$POOL/fs/file13 +log_must truncate -s 8192 /$POOL/fs/file14 +log_must truncate -s 1073741824 /$POOL/fs/file15 +log_must truncate -s 50 /$POOL/fs/file16 + +for i in {11..20}; do + log_must attr -qr testattr1 /$POOL/fs/file$i +done + +# +# Modify file[21-26]'s contents and remove their spill blocks. +# +# file21 - Increase record size; single block +# file22 - Increase record size; multiple blocks +# file23 - Truncate file to zero size; single block +# file24 - Truncate file to smaller size; single block +# file25 - Truncate file to much larger size; add holes +# file26 - Truncate file to embedded size; embedded data +# +log_must mkfile 32768 /$POOL/fs/file21 +log_must mkfile 1048576 /$POOL/fs/file22 +log_must truncate -s 0 /$POOL/fs/file23 +log_must truncate -s 8192 /$POOL/fs/file24 +log_must truncate -s 1073741824 /$POOL/fs/file25 +log_must truncate -s 50 /$POOL/fs/file26 + +for i in {21..30}; do + for j in {1..20}; do + log_must attr -qr testattr$j /$POOL/fs/file$i + done +done + +# +# Modify file[31-40]'s spill blocks but not the file contents. +# +for i in {31..40}; do + file="/$POOL/fs/file$i" + log_must attr -qr testattr$(((RANDOM % 20) + 1)) $file + log_must attr -qs testattr$(((RANDOM % 20) + 1)) -V "$attrvalue" $file +done + +# Calculate the expected recursive checksum for the source. +expected_cksum=$(recursive_cksum /$POOL/fs) + +# Snapshot the pool and send the incremental snapshot. +log_must zfs snapshot $POOL/fs@snap2 +log_must eval "zfs send -e -i $POOL/fs@snap1 $POOL/fs@snap2 >$BACKDIR/fs@snap2" +log_must eval "zfs recv -F $POOL/newfs < $BACKDIR/fs@snap2" + +# Validate the received copy using the received recursive checksum. +actual_cksum=$(recursive_cksum /$POOL/newfs) +if [[ "$expected_cksum" != "$actual_cksum" ]]; then + log_fail "Checksums differ ($expected_cksum != $actual_cksum)" +fi + +log_pass "Verify spill blocks are correctly preserved"