OpenZFS 7500 - Simplify dbuf_free_range by removing dn_unlisted_l0_blkid

Authored by: Stephen Blinick <stephen.blinick@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>

OpenZFS-issue: https://www.illumos.org/issues/7500
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/653af1b
Closes #5639
This commit is contained in:
George Melikov 2017-01-27 02:15:48 +03:00 committed by Brian Behlendorf
parent 39efbde7c5
commit 9c9531cb6f
3 changed files with 5 additions and 43 deletions

View File

@ -236,8 +236,6 @@ struct dnode {
/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */ /* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
uint32_t dn_dbufs_count; /* count of dn_dbufs */ uint32_t dn_dbufs_count; /* count of dn_dbufs */
/* There are no level-0 blocks of this blkid or higher in dn_dbufs */
uint64_t dn_unlisted_l0_blkid;
/* protected by os_lock: */ /* protected by os_lock: */
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */ list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */

View File

@ -74,11 +74,6 @@ static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh); static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
uint_t zfs_dbuf_evict_key; uint_t zfs_dbuf_evict_key;
/*
* Number of times that zfs_free_range() took the slow path while doing
* a zfs receive. A nonzero value indicates a potential performance problem.
*/
uint64_t zfs_free_range_recv_miss;
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
@ -1340,9 +1335,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* Evict (if its unreferenced) or clear (if its referenced) any level-0 * Evict (if its unreferenced) or clear (if its referenced) any level-0
* data blocks in the free range, so that any future readers will find * data blocks in the free range, so that any future readers will find
* empty blocks. * empty blocks.
*
* This is a no-op if the dataset is in the middle of an incremental
* receive; see comment below for details.
*/ */
void void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
@ -1352,10 +1344,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_buf_impl_t *db, *db_next; dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg; uint64_t txg = tx->tx_txg;
avl_index_t where; avl_index_t where;
boolean_t freespill =
(start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID);
if (end_blkid > dn->dn_maxblkid && !freespill) if (end_blkid > dn->dn_maxblkid &&
!(start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID))
end_blkid = dn->dn_maxblkid; end_blkid = dn->dn_maxblkid;
dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid); dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid);
@ -1365,28 +1356,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
db_search->db_state = DB_SEARCH; db_search->db_state = DB_SEARCH;
mutex_enter(&dn->dn_dbufs_mtx); mutex_enter(&dn->dn_dbufs_mtx);
if (start_blkid >= dn->dn_unlisted_l0_blkid && !freespill) {
/* There can't be any dbufs in this range; no need to search. */
#ifdef DEBUG
db = avl_find(&dn->dn_dbufs, db_search, &where); db = avl_find(&dn->dn_dbufs, db_search, &where);
ASSERT3P(db, ==, NULL); ASSERT3P(db, ==, NULL);
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
ASSERT(db == NULL || db->db_level > 0);
#endif
goto out;
} else if (dmu_objset_is_receiving(dn->dn_objset)) {
/*
* If we are receiving, we expect there to be no dbufs in
* the range to be freed, because receive modifies each
* block at most once, and in offset order. If this is
* not the case, it can lead to performance problems,
* so note that we unexpectedly took the slow path.
*/
atomic_inc_64(&zfs_free_range_recv_miss);
}
db = avl_find(&dn->dn_dbufs, db_search, &where);
ASSERT3P(db, ==, NULL);
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
for (; db != NULL; db = db_next) { for (; db != NULL; db = db_next) {
@ -1459,7 +1431,6 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
} }
out:
kmem_free(db_search, sizeof (dmu_buf_impl_t)); kmem_free(db_search, sizeof (dmu_buf_impl_t));
mutex_exit(&dn->dn_dbufs_mtx); mutex_exit(&dn->dn_dbufs_mtx);
} }
@ -2416,9 +2387,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
return (odb); return (odb);
} }
avl_add(&dn->dn_dbufs, db); avl_add(&dn->dn_dbufs, db);
if (db->db_level == 0 && db->db_blkid >=
dn->dn_unlisted_l0_blkid)
dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
db->db_state = DB_UNCACHED; db->db_state = DB_UNCACHED;
mutex_exit(&dn->dn_dbufs_mtx); mutex_exit(&dn->dn_dbufs_mtx);
arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF); arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
@ -141,7 +141,6 @@ dnode_cons(void *arg, void *unused, int kmflag)
dn->dn_id_flags = 0; dn->dn_id_flags = 0;
dn->dn_dbufs_count = 0; dn->dn_dbufs_count = 0;
dn->dn_unlisted_l0_blkid = 0;
avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t), avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link)); offsetof(dmu_buf_impl_t, db_link));
@ -194,7 +193,6 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_id_flags); ASSERT0(dn->dn_id_flags);
ASSERT0(dn->dn_dbufs_count); ASSERT0(dn->dn_dbufs_count);
ASSERT0(dn->dn_unlisted_l0_blkid);
avl_destroy(&dn->dn_dbufs); avl_destroy(&dn->dn_dbufs);
} }
@ -515,7 +513,6 @@ dnode_destroy(dnode_t *dn)
dn->dn_newuid = 0; dn->dn_newuid = 0;
dn->dn_newgid = 0; dn->dn_newgid = 0;
dn->dn_id_flags = 0; dn->dn_id_flags = 0;
dn->dn_unlisted_l0_blkid = 0;
dmu_zfetch_fini(&dn->dn_zfetch); dmu_zfetch_fini(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn); kmem_cache_free(dnode_cache, dn);
@ -766,7 +763,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ASSERT(avl_is_empty(&ndn->dn_dbufs)); ASSERT(avl_is_empty(&ndn->dn_dbufs));
avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs); avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
ndn->dn_dbufs_count = odn->dn_dbufs_count; ndn->dn_dbufs_count = odn->dn_dbufs_count;
ndn->dn_unlisted_l0_blkid = odn->dn_unlisted_l0_blkid;
ndn->dn_bonus = odn->dn_bonus; ndn->dn_bonus = odn->dn_bonus;
ndn->dn_have_spill = odn->dn_have_spill; ndn->dn_have_spill = odn->dn_have_spill;
ndn->dn_zio = odn->dn_zio; ndn->dn_zio = odn->dn_zio;
@ -799,7 +795,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t), avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link)); offsetof(dmu_buf_impl_t, db_link));
odn->dn_dbufs_count = 0; odn->dn_dbufs_count = 0;
odn->dn_unlisted_l0_blkid = 0;
odn->dn_bonus = NULL; odn->dn_bonus = NULL;
odn->dn_zfetch.zf_dnode = NULL; odn->dn_zfetch.zf_dnode = NULL;