OpenZFS 6513 - partially filled holes lose birth time

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Boris Protopopov <bprotopopov@hotmail.com>
Approved by: Richard Lowe <richlowe@richlowe.net>a
Ported by: Boris Protopopov <bprotopopov@actifio.com>
Signed-off-by: Boris Protopopov <bprotopopov@actifio.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>

OpenZFS-issue: https://www.illumos.org/issues/6513
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/8df0bcf0

If a ZFS object contains a hole at level one, and then a data block is
created at level 0 underneath that l1 block, l0 holes will be created.
However, these l0 holes do not have the birth time property set; as a
result, incremental sends will not send those holes.

Fix is to modify the dbuf_read code to fill in birth time data.
This commit is contained in:
Paul Dagnelie
2016-05-15 08:02:28 -07:00
committed by Brian Behlendorf
parent 100a91aa3e
commit bc77ba73fe
9 changed files with 192 additions and 54 deletions
+118 -7
View File
@@ -543,13 +543,50 @@ dbuf_verify(dmu_buf_impl_t *db)
* If the blkptr isn't set but they have nonzero data,
* it had better be dirty, otherwise we'll lose that
* data when we evict this buffer.
*
* There is an exception to this rule for indirect blocks; in
* this case, if the indirect block is a hole, we fill in a few
* fields on each of the child blocks (importantly, birth time)
* to prevent hole birth times from being lost when you
* partially fill in a hole.
*/
if (db->db_dirtycnt == 0) {
ASSERTV(uint64_t *buf = db->db.db_data);
int i;
if (db->db_level == 0) {
uint64_t *buf = db->db.db_data;
int i;
for (i = 0; i < db->db.db_size >> 3; i++) {
ASSERT(buf[i] == 0);
for (i = 0; i < db->db.db_size >> 3; i++) {
ASSERT(buf[i] == 0);
}
} else {
int i;
blkptr_t *bps = db->db.db_data;
ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
db->db.db_size);
/*
* We want to verify that all the blkptrs in the
* indirect block are holes, but we may have
* automatically set up a few fields for them.
* We iterate through each blkptr and verify
* they only have those fields set.
*/
for (i = 0;
i < db->db.db_size / sizeof (blkptr_t);
i++) {
blkptr_t *bp = &bps[i];
ASSERT(ZIO_CHECKSUM_IS_ZERO(
&bp->blk_cksum));
ASSERT(
DVA_IS_EMPTY(&bp->blk_dva[0]) &&
DVA_IS_EMPTY(&bp->blk_dva[1]) &&
DVA_IS_EMPTY(&bp->blk_dva[2]));
ASSERT0(bp->blk_fill);
ASSERT0(bp->blk_pad[0]);
ASSERT0(bp->blk_pad[1]);
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(BP_IS_HOLE(bp));
ASSERT0(bp->blk_phys_birth);
}
}
}
}
@@ -718,10 +755,32 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
DB_DNODE_EXIT(db);
dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
if (db->db_blkptr != NULL && db->db_level > 0 &&
BP_IS_HOLE(db->db_blkptr) &&
db->db_blkptr->blk_birth != 0) {
blkptr_t *bps = db->db.db_data;
int i;
for (i = 0; i < ((1 <<
DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
i++) {
blkptr_t *bp = &bps[i];
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
1 << dn->dn_indblkshift);
BP_SET_LSIZE(bp,
BP_GET_LEVEL(db->db_blkptr) == 1 ?
dn->dn_datablksz :
BP_GET_LSIZE(db->db_blkptr));
BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
BP_SET_LEVEL(bp,
BP_GET_LEVEL(db->db_blkptr) - 1);
BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
}
}
DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
return (0);
@@ -3094,6 +3153,45 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
rw_exit(&dn->dn_struct_rwlock);
}
/* ARGSUSED */
/*
* This function gets called just prior to running through the compression
* stage of the zio pipeline. If we're an indirect block comprised of only
* holes, then we want this indirect to be compressed away to a hole. In
* order to do that we must zero out any information about the holes that
* this indirect points to prior to before we try to compress it.
*/
static void
dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
{
dmu_buf_impl_t *db = vdb;
dnode_t *dn;
blkptr_t *bp;
uint64_t i;
int epbs;
ASSERT3U(db->db_level, >, 0);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
/* Determine if all our children are holes */
for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
if (!BP_IS_HOLE(bp))
break;
}
/*
* If all the children are holes, then zero them all out so that
* we may get compressed away.
*/
if (i == 1 << epbs) {
/* didn't find any non-holes */
bzero(db->db.db_data, db->db.db_size);
}
DB_DNODE_EXIT(db);
}
/*
* The SPA will call this callback several times for each zio - once
* for every physical child i/o (zio->io_phys_children times). This
@@ -3348,7 +3446,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dr->dr_zio = zio_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, contents, db->db.db_size, &zp,
dbuf_write_override_ready, NULL, dbuf_write_override_done,
dbuf_write_override_ready, NULL, NULL,
dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
mutex_enter(&db->db_mtx);
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -3359,14 +3458,26 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
dr->dr_zio = zio_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, NULL, db->db.db_size, &zp,
dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
dbuf_write_nofill_ready, NULL, NULL,
dbuf_write_nofill_done, db,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else {
arc_done_func_t *children_ready_cb = NULL;
ASSERT(arc_released(data));
/*
* For indirect blocks, we want to setup the children
* ready callback so that we can properly handle an indirect
* block that only contains holes.
*/
if (db->db_level != 0)
children_ready_cb = dbuf_write_children_ready;
dr->dr_zio = arc_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
children_ready_cb,
dbuf_write_physdone, dbuf_write_done, db,
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
}