From 4c7b7eedcde7fababf457ca04445e6d9d1617e29 Mon Sep 17 00:00:00 2001 From: "Justin T. Gibbs" Date: Thu, 12 Mar 2015 11:10:35 +1100 Subject: [PATCH] Illumos 5630 - stale bonus buffer in recycled dnode_t leads to data corruption 5630 stale bonus buffer in recycled dnode_t leads to data corruption Author: Justin T. Gibbs Reviewed by: Matthew Ahrens Reviewed by: George Wilson Reviewed by: Will Andrews Approved by: Robert Mustacchi References: https://www.illumos.org/issues/5630 https://github.com/illumos/illumos-gate/commit/cd485b4 Ported-by: Chris Dunlop Signed-off-by: Brian Behlendorf Signed-off-by: Richard Yao Issue #3172 --- include/sys/dnode.h | 2 ++ module/zfs/dbuf.c | 61 +++++++++++++++++++++++++++++++++-------- module/zfs/dnode.c | 8 +++++- module/zfs/dnode_sync.c | 6 ++++ 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index 64bf000b6..b63549b46 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -290,6 +290,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, void *ref, dnode_t **dnp); boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref); +void dnode_rele_and_unlock(dnode_t *dn, void *tag); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, @@ -311,6 +312,7 @@ void dnode_fini(void); int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off, int minlvl, uint64_t blkfill, uint64_t txg); void dnode_evict_dbufs(dnode_t *dn); +void dnode_evict_bonus(dnode_t *dn); #ifdef ZFS_DEBUG diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 347fc3535..f10a04d11 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2200,21 +2200,60 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) if (holds == 0) { if (db->db_blkid == DMU_BONUS_BLKID) { + dnode_t *dn; + + /* + * If the dnode moves here, we cannot cross this + * barrier until the move completes. + */ + DB_DNODE_ENTER(db); + + dn = DB_DNODE(db); + atomic_dec_32(&dn->dn_dbufs_count); + + /* + * Decrementing the dbuf count means that the bonus + * buffer's dnode hold is no longer discounted in + * dnode_move(). The dnode cannot move until after + * the dnode_rele_and_unlock() below. + */ + DB_DNODE_EXIT(db); + + /* + * Do not reference db after its lock is dropped. + * Another thread may evict it. + */ mutex_exit(&db->db_mtx); /* - * If the dnode moves here, we cannot cross this barrier - * until the move completes. + * If the dnode has been freed, evict the bonus + * buffer immediately. The data in the bonus + * buffer is no longer relevant and this prevents + * a stale bonus buffer from being associated + * with this dnode_t should the dnode_t be reused + * prior to being destroyed. */ - DB_DNODE_ENTER(db); - atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count); - DB_DNODE_EXIT(db); - /* - * The bonus buffer's dnode hold is no longer discounted - * in dnode_move(). The dnode cannot move until after - * the dnode_rele(). - */ - dnode_rele(DB_DNODE(db), db); + mutex_enter(&dn->dn_mtx); + if (dn->dn_type == DMU_OT_NONE || + dn->dn_free_txg != 0) { + /* + * Drop dn_mtx. It is a leaf lock and + * cannot be held when dnode_evict_bonus() + * acquires other locks in order to + * perform the eviction. + * + * Freed dnodes cannot be reused until the + * last hold is released. Since this bonus + * buffer has a hold, the dnode will remain + * in the free state, even without dn_mtx + * held, until the dnode_rele_and_unlock() + * below. + */ + mutex_exit(&dn->dn_mtx); + dnode_evict_bonus(dn); + mutex_enter(&dn->dn_mtx); + } + dnode_rele_and_unlock(dn, db); } else if (db->db_buf == NULL) { /* * This is a special case: we never associated this diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index dc082ff3e..ef74621a0 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1157,13 +1157,19 @@ dnode_add_ref(dnode_t *dn, void *tag) void dnode_rele(dnode_t *dn, void *tag) +{ + mutex_enter(&dn->dn_mtx); + dnode_rele_and_unlock(dn, tag); +} + +void +dnode_rele_and_unlock(dnode_t *dn, void *tag) { uint64_t refs; /* Get while the hold prevents the dnode from moving. */ dmu_buf_impl_t *db = dn->dn_dbuf; dnode_handle_t *dnh = dn->dn_handle; - mutex_enter(&dn->dn_mtx); refs = refcount_remove(&dn->dn_holds, tag); mutex_exit(&dn->dn_mtx); diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 6ad623998..1825e9835 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -454,6 +454,12 @@ dnode_evict_dbufs(dnode_t *dn) if (pass >= 100) dprintf("Required %d passes to evict dbufs\n", pass); + dnode_evict_bonus(dn); +} + +void +dnode_evict_bonus(dnode_t *dn) +{ rw_enter(&dn->dn_struct_rwlock, RW_WRITER); if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { mutex_enter(&dn->dn_bonus->db_mtx);