Illumos 4757, 4913

4757 ZFS embedded-data block pointers ("zero block compression")
4913 zfs release should not be subject to space checks

Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Max Grossman <max.grossman@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/4757
  https://www.illumos.org/issues/4913
  https://github.com/illumos/illumos-gate/commit/5d7b4d4

Porting notes:

For compatibility with the fastpath code the zio_done() function
needed to be updated.  Because embedded-data block pointers do
not require DVAs to be allocated the associated vdevs will not
be marked and therefore should not be unmarked.

Ported by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2544
This commit is contained in:
Matthew Ahrens
2014-06-05 13:19:08 -08:00
committed by Brian Behlendorf
parent faf0f58c69
commit 9b67f60560
46 changed files with 1195 additions and 258 deletions
+188 -27
View File
@@ -50,7 +50,9 @@
#include <sys/zfs_onexit.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
#include <sys/blkptr.h>
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
@@ -197,7 +199,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
}
static int
dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
{
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
@@ -232,13 +234,22 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_offset = offset;
drrw->drr_length = blksz;
drrw->drr_toguid = dsp->dsa_toguid;
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
drrw->drr_key.ddk_cksum = bp->blk_cksum;
if (BP_IS_EMBEDDED(bp)) {
/*
* There's no pre-computed checksum of embedded BP's, so
* (like fletcher4-checkummed blocks) userland will have
* to compute a dedup-capable checksum itself.
*/
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
} else {
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
drrw->drr_key.ddk_cksum = bp->blk_cksum;
}
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
return (SET_ERROR(EINTR));
@@ -247,6 +258,43 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
return (0);
}
static int
dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
int blksz, const blkptr_t *bp)
{
char buf[BPE_PAYLOAD_SIZE];
struct drr_write_embedded *drrw =
&(dsp->dsa_drr->drr_u.drr_write_embedded);
if (dsp->dsa_pending_op != PENDING_NONE) {
if (dump_bytes(dsp, dsp->dsa_drr,
sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
dsp->dsa_pending_op = PENDING_NONE;
}
ASSERT(BP_IS_EMBEDDED(bp));
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED;
drrw->drr_object = object;
drrw->drr_offset = offset;
drrw->drr_length = blksz;
drrw->drr_toguid = dsp->dsa_toguid;
drrw->drr_compression = BP_GET_COMPRESS(bp);
drrw->drr_etype = BPE_GET_ETYPE(bp);
drrw->drr_lsize = BPE_GET_LSIZE(bp);
drrw->drr_psize = BPE_GET_PSIZE(bp);
decode_embedded_bp_compressed(bp, buf);
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
return (EINTR);
return (0);
}
static int
dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
{
@@ -367,6 +415,33 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
return (0);
}
static boolean_t
backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
{
if (!BP_IS_EMBEDDED(bp))
return (B_FALSE);
/*
* Compression function must be legacy, or explicitly enabled.
*/
if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS &&
!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)))
return (B_FALSE);
/*
* Embed type must be explicitly enabled.
*/
switch (BPE_GET_ETYPE(bp)) {
case BP_EMBEDDED_TYPE_DATA:
if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
return (B_TRUE);
break;
default:
return (B_FALSE);
}
return (B_FALSE);
}
#define BP_SPAN(dnp, level) \
(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
@@ -435,11 +510,17 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
} else if (backup_do_embed(dsp, bp)) {
/* it's an embedded level-0 block of a regular object */
int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
err = dump_write_embedded(dsp, zb->zb_object,
zb->zb_blkid * blksz, blksz, bp);
} else { /* it's a level-0 block of a regular object */
uint32_t aflags = ARC_WAIT;
arc_buf_t *abuf;
int blksz = BP_GET_LSIZE(bp);
ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
ASSERT0(zb->zb_level);
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
@@ -458,7 +539,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
}
err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
blksz, bp, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
}
@@ -472,14 +553,15 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
static int
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd,
vnode_t *vp, offset_t *off)
zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok,
int outfd, vnode_t *vp, offset_t *off)
{
objset_t *os;
dmu_replay_record_t *drr;
dmu_sendarg_t *dsp;
int err;
uint64_t fromtxg = 0;
uint64_t featureflags = 0;
err = dmu_objset_from_ds(ds, &os);
if (err != 0) {
@@ -502,13 +584,23 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
return (SET_ERROR(EINVAL));
}
if (version >= ZPL_VERSION_SA) {
DMU_SET_FEATUREFLAGS(
drr->drr_u.drr_begin.drr_versioninfo,
DMU_BACKUP_FEATURE_SA_SPILL);
featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
}
}
#endif
if (embedok &&
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
} else {
embedok = B_FALSE;
}
DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
featureflags);
drr->drr_u.drr_begin.drr_creation_time =
ds->ds_phys->ds_creation_time;
drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
@@ -540,6 +632,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_incremental = (fromzb != NULL);
dsp->dsa_featureflags = featureflags;
mutex_enter(&ds->ds_sendstream_lock);
list_insert_head(&ds->ds_sendstreams, dsp);
@@ -591,7 +684,7 @@ out:
int
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
int outfd, vnode_t *vp, offset_t *off)
boolean_t embedok, int outfd, vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
@@ -625,10 +718,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
zb.zbm_guid = fromds->ds_phys->ds_guid;
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok,
outfd, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok,
outfd, vp, off);
}
dsl_dataset_rele(ds, FTAG);
@@ -636,7 +729,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
}
int
dmu_send(const char *tosnap, const char *fromsnap,
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
int outfd, vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
@@ -703,10 +796,10 @@ dmu_send(const char *tosnap, const char *fromsnap,
dsl_pool_rele(dp, FTAG);
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok,
outfd, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok,
outfd, vp, off);
}
if (owned)
@@ -861,6 +954,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
uint64_t fromguid = drrb->drr_fromguid;
int flags = drrb->drr_flags;
int error;
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
const char *tofs = drba->drba_cookie->drc_tofs;
@@ -874,11 +968,22 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EINVAL));
/* Verify pool version supports SA if SA_SPILL feature set */
if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_SA_SPILL) &&
spa_version(dp->dp_spa) < SPA_VERSION_SA) {
if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
spa_version(dp->dp_spa) < SPA_VERSION_SA)
return (SET_ERROR(ENOTSUP));
/*
* The receiving code doesn't know how to translate a WRITE_EMBEDDED
* record to a plan WRITE record, so the pool must have the
* EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
* records. Same with WRITE_EMBEDDED records that use LZ4 compression.
*/
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
return (SET_ERROR(ENOTSUP));
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
return (SET_ERROR(ENOTSUP));
}
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
if (error == 0) {
@@ -1153,7 +1258,6 @@ backup_byteswap(dmu_replay_record_t *drr)
break;
case DRR_OBJECT:
DO64(drr_object.drr_object);
/* DO64(drr_object.drr_allocation_txg); */
DO32(drr_object.drr_type);
DO32(drr_object.drr_bonustype);
DO32(drr_object.drr_blksz);
@@ -1191,6 +1295,14 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
DO64(drr_write_byref.drr_key.ddk_prop);
break;
case DRR_WRITE_EMBEDDED:
DO64(drr_write_embedded.drr_object);
DO64(drr_write_embedded.drr_offset);
DO64(drr_write_embedded.drr_length);
DO64(drr_write_embedded.drr_toguid);
DO32(drr_write_embedded.drr_lsize);
DO32(drr_write_embedded.drr_psize);
break;
case DRR_FREE:
DO64(drr_free.drr_object);
DO64(drr_free.drr_offset);
@@ -1380,7 +1492,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
int err;
guid_map_entry_t gmesrch;
guid_map_entry_t *gmep;
avl_index_t where;
avl_index_t where;
objset_t *ref_os = NULL;
dmu_buf_t *dbp;
@@ -1405,7 +1517,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
if (err)
if (err != 0)
return (err);
tx = dmu_tx_create(os);
@@ -1424,6 +1536,48 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
return (0);
}
static int
restore_write_embedded(struct restorearg *ra, objset_t *os,
struct drr_write_embedded *drrwnp)
{
dmu_tx_t *tx;
int err;
void *data;
if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
return (EINVAL);
if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
return (EINVAL);
if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
return (EINVAL);
if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (EINVAL);
data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8));
if (data == NULL)
return (ra->err);
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, drrwnp->drr_object,
drrwnp->drr_offset, drrwnp->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
dmu_write_embedded(os, drrwnp->drr_object,
drrwnp->drr_offset, data, drrwnp->drr_etype,
drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
ra->byteswap ^ ZFS_HOST_BYTEORDER, tx);
dmu_tx_commit(tx);
return (0);
}
static int
restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
{
@@ -1618,6 +1772,13 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
ra.err = restore_write_byref(&ra, os, &drrwbr);
break;
}
case DRR_WRITE_EMBEDDED:
{
struct drr_write_embedded drrwe =
drr->drr_u.drr_write_embedded;
ra.err = restore_write_embedded(&ra, os, &drrwe);
break;
}
case DRR_FREE:
{
struct drr_free drrf = drr->drr_u.drr_free;