mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 03:08:51 +03:00
Illumos 4370, 4371
4370 avoid transmitting holes during zfs send 4371 DMU code clean up Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net> Approved by: Garrett D'Amore <garrett@damore.org>a References: https://www.illumos.org/issues/4370 https://www.illumos.org/issues/4371 https://github.com/illumos/illumos-gate/commit/43466aa Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2529
This commit is contained in:
committed by
Brian Behlendorf
parent
fa86b5dbb6
commit
b0bc7a84d9
+102
-88
@@ -32,6 +32,7 @@
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
static void
|
||||
dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
|
||||
@@ -112,26 +113,48 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
static int
|
||||
static void
|
||||
free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
uint64_t bytesfreed = 0;
|
||||
int i, blocks_freed = 0;
|
||||
int i;
|
||||
|
||||
dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
|
||||
|
||||
for (i = 0; i < num; i++, bp++) {
|
||||
uint64_t lsize, lvl;
|
||||
dmu_object_type_t type;
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
|
||||
bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
|
||||
ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
|
||||
|
||||
/*
|
||||
* Save some useful information on the holes being
|
||||
* punched, including logical size, type, and indirection
|
||||
* level. Retaining birth time enables detection of when
|
||||
* holes are punched for reducing the number of free
|
||||
* records transmitted during a zfs send.
|
||||
*/
|
||||
|
||||
lsize = BP_GET_LSIZE(bp);
|
||||
type = BP_GET_TYPE(bp);
|
||||
lvl = BP_GET_LEVEL(bp);
|
||||
|
||||
bzero(bp, sizeof (blkptr_t));
|
||||
blocks_freed += 1;
|
||||
|
||||
if (spa_feature_is_active(dn->dn_objset->os_spa,
|
||||
SPA_FEATURE_HOLE_BIRTH)) {
|
||||
BP_SET_LSIZE(bp, lsize);
|
||||
BP_SET_TYPE(bp, type);
|
||||
BP_SET_LEVEL(bp, lvl);
|
||||
BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0);
|
||||
}
|
||||
}
|
||||
dnode_diduse_space(dn, -bytesfreed);
|
||||
return (blocks_freed);
|
||||
}
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
@@ -215,30 +238,27 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
|
||||
|
||||
#define ALL -1
|
||||
|
||||
static int
|
||||
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
||||
static void
|
||||
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
blkptr_t *bp;
|
||||
dmu_buf_impl_t *subdb;
|
||||
uint64_t start, end, dbstart, dbend, i;
|
||||
int epbs, shift, err;
|
||||
int all = TRUE;
|
||||
int blocks_freed = 0;
|
||||
int epbs, shift;
|
||||
|
||||
/*
|
||||
* There is a small possibility that this block will not be cached:
|
||||
* 1 - if level > 1 and there are no children with level <= 1
|
||||
* 2 - if we didn't get a dirty hold (because this block had just
|
||||
* finished being written -- and so had no holds), and then this
|
||||
* block got evicted before we got here.
|
||||
* 2 - if this block was evicted since we read it from
|
||||
* dmu_tx_hold_free().
|
||||
*/
|
||||
if (db->db_state != DB_CACHED)
|
||||
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
|
||||
|
||||
dbuf_release_bp(db);
|
||||
bp = (blkptr_t *)db->db.db_data;
|
||||
bp = db->db.db_data;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
@@ -248,7 +268,6 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
||||
start = blkid >> shift;
|
||||
if (dbstart < start) {
|
||||
bp += start - dbstart;
|
||||
all = FALSE;
|
||||
} else {
|
||||
start = dbstart;
|
||||
}
|
||||
@@ -256,49 +275,46 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
||||
end = (blkid + nblks - 1) >> shift;
|
||||
if (dbend <= end)
|
||||
end = dbend;
|
||||
else if (all)
|
||||
all = trunc;
|
||||
|
||||
ASSERT3U(start, <=, end);
|
||||
|
||||
if (db->db_level == 1) {
|
||||
FREE_VERIFY(db, start, end, tx);
|
||||
blocks_freed = free_blocks(dn, bp, end-start+1, tx);
|
||||
arc_buf_freeze(db->db_buf);
|
||||
ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
|
||||
DB_DNODE_EXIT(db);
|
||||
return (all ? ALL : blocks_freed);
|
||||
}
|
||||
free_blocks(dn, bp, end-start+1, tx);
|
||||
} else {
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
|
||||
i, B_TRUE, FTAG, &subdb));
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
ASSERT3P(bp, ==, subdb->db_blkptr);
|
||||
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
|
||||
ASSERT0(err);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
|
||||
ASSERT3P(subdb->db_blkptr, ==, bp);
|
||||
blocks_freed += free_blocks(dn, bp, 1, tx);
|
||||
} else {
|
||||
all = FALSE;
|
||||
free_children(subdb, blkid, nblks, tx);
|
||||
dbuf_rele(subdb, FTAG);
|
||||
}
|
||||
dbuf_rele(subdb, FTAG);
|
||||
}
|
||||
|
||||
/* If this whole block is free, free ourself too. */
|
||||
for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
|
||||
if (!BP_IS_HOLE(bp))
|
||||
break;
|
||||
}
|
||||
if (i == 1 << epbs) {
|
||||
/* didn't find any non-holes */
|
||||
bzero(db->db.db_data, db->db.db_size);
|
||||
free_blocks(dn, db->db_blkptr, 1, tx);
|
||||
} else {
|
||||
/*
|
||||
* Partial block free; must be marked dirty so that it
|
||||
* will be written out.
|
||||
*/
|
||||
ASSERT(db->db_dirtycnt > 0);
|
||||
}
|
||||
|
||||
DB_DNODE_EXIT(db);
|
||||
arc_buf_freeze(db->db_buf);
|
||||
#ifdef ZFS_DEBUG
|
||||
bp -= (end-start)+1;
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (i == start && blkid != 0)
|
||||
continue;
|
||||
else if (i == end && !trunc)
|
||||
continue;
|
||||
ASSERT0(bp->blk_birth);
|
||||
}
|
||||
#endif
|
||||
ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
|
||||
return (all ? ALL : blocks_freed);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -306,20 +322,21 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
||||
* and "free" all the blocks contained there.
|
||||
*/
|
||||
static void
|
||||
dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
|
||||
dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
blkptr_t *bp = dn->dn_phys->dn_blkptr;
|
||||
dmu_buf_impl_t *db;
|
||||
int trunc, start, end, shift, i, err;
|
||||
int dnlevel = dn->dn_phys->dn_nlevels;
|
||||
boolean_t trunc = B_FALSE;
|
||||
|
||||
if (blkid > dn->dn_phys->dn_maxblkid)
|
||||
return;
|
||||
|
||||
ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
|
||||
trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
|
||||
if (trunc)
|
||||
if (blkid + nblks > dn->dn_phys->dn_maxblkid) {
|
||||
nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
|
||||
trunc = B_TRUE;
|
||||
}
|
||||
|
||||
/* There are no indirect blocks in the object */
|
||||
if (dnlevel == 1) {
|
||||
@@ -328,41 +345,36 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
|
||||
return;
|
||||
}
|
||||
ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
|
||||
(void) free_blocks(dn, bp + blkid, nblks, tx);
|
||||
if (trunc) {
|
||||
ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
|
||||
(dn->dn_phys->dn_datablkszsec<<SPA_MINBLOCKSHIFT));
|
||||
dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
|
||||
ASSERT(off < dn->dn_phys->dn_maxblkid ||
|
||||
dn->dn_phys->dn_maxblkid == 0 ||
|
||||
dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
|
||||
free_blocks(dn, bp + blkid, nblks, tx);
|
||||
} else {
|
||||
int shift = (dnlevel - 1) *
|
||||
(dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
|
||||
int start = blkid >> shift;
|
||||
int end = (blkid + nblks - 1) >> shift;
|
||||
dmu_buf_impl_t *db;
|
||||
int i;
|
||||
|
||||
ASSERT(start < dn->dn_phys->dn_nblkptr);
|
||||
bp += start;
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
|
||||
TRUE, FTAG, &db));
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
free_children(db, blkid, nblks, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
|
||||
start = blkid >> shift;
|
||||
ASSERT(start < dn->dn_phys->dn_nblkptr);
|
||||
end = (blkid + nblks - 1) >> shift;
|
||||
bp += start;
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
|
||||
ASSERT0(err);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
|
||||
ASSERT3P(db->db_blkptr, ==, bp);
|
||||
(void) free_blocks(dn, bp, 1, tx);
|
||||
}
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
if (trunc) {
|
||||
ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
|
||||
ASSERTV(uint64_t off);
|
||||
dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
|
||||
|
||||
ASSERTV(off = (dn->dn_phys->dn_maxblkid + 1) *
|
||||
(dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT));
|
||||
dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
|
||||
ASSERT(off < dn->dn_phys->dn_maxblkid ||
|
||||
dn->dn_phys->dn_maxblkid == 0 ||
|
||||
dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
|
||||
@@ -504,7 +516,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
||||
|
||||
ASSERT(dn->dn_free_txg > 0);
|
||||
if (dn->dn_allocated_txg != dn->dn_free_txg)
|
||||
dbuf_will_dirty(dn->dn_dbuf, tx);
|
||||
dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
|
||||
bzero(dn->dn_phys, sizeof (dnode_phys_t));
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
@@ -535,6 +547,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
int txgoff = tx->tx_txg & TXG_MASK;
|
||||
list_t *list = &dn->dn_dirty_records[txgoff];
|
||||
boolean_t kill_spill = B_FALSE;
|
||||
boolean_t freeing_dnode;
|
||||
ASSERTV(static const dnode_phys_t zerodn = { 0 });
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
@@ -611,13 +624,14 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
dn->dn_next_bonustype[txgoff] = 0;
|
||||
}
|
||||
|
||||
freeing_dnode = dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg;
|
||||
|
||||
/*
|
||||
* We will either remove a spill block when a file is being removed
|
||||
* or we have been asked to remove it.
|
||||
*/
|
||||
if (dn->dn_rm_spillblk[txgoff] ||
|
||||
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
|
||||
dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
|
||||
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && freeing_dnode)) {
|
||||
if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
|
||||
kill_spill = B_TRUE;
|
||||
dn->dn_rm_spillblk[txgoff] = 0;
|
||||
@@ -640,7 +654,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
if (kill_spill) {
|
||||
(void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
|
||||
free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
@@ -656,7 +670,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
kmem_free(rp, sizeof (free_range_t));
|
||||
}
|
||||
|
||||
if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
|
||||
if (freeing_dnode) {
|
||||
dnode_sync_free(dn, tx);
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user