mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
Illumos 5911 - ZFS "hangs" while deleting file
5911 ZFS "hangs" while deleting file Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com> Reviewed by: Alek Pinchuk <alek@nexenta.com> Reviewed by: Simon Klinkert <simon.klinkert@gmail.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Richard Lowe <richlowe@richlowe.net> References: https://www.illumos.org/issues/5911 https://github.com/illumos/illumos-gate/commit/46e1baa Porting notes: Resolved ISO C90 forbids mixed declarations and code wanting in the dnode_free_range() function. Ported-by: kernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3554
This commit is contained in:
parent
5e8cd5d17f
commit
4bda3bd0e7
@ -20,7 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||||
*/
|
*/
|
||||||
@ -287,7 +287,7 @@ void dbuf_clear(dmu_buf_impl_t *db);
|
|||||||
void dbuf_evict(dmu_buf_impl_t *db);
|
void dbuf_evict(dmu_buf_impl_t *db);
|
||||||
|
|
||||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||||
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
|
void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
|
||||||
void dbuf_release_bp(dmu_buf_impl_t *db);
|
void dbuf_release_bp(dmu_buf_impl_t *db);
|
||||||
|
|
||||||
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
|
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||||
*/
|
*/
|
||||||
@ -1455,6 +1455,16 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
dbuf_dirty_record_t *dr, **drp;
|
dbuf_dirty_record_t *dr, **drp;
|
||||||
|
|
||||||
ASSERT(txg != 0);
|
ASSERT(txg != 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to our use of dn_nlevels below, this can only be called
|
||||||
|
* in open context, unless we are operating on the MOS.
|
||||||
|
* From syncing context, dn_nlevels may be different from the
|
||||||
|
* dn_nlevels used when dbuf was dirtied.
|
||||||
|
*/
|
||||||
|
ASSERT(db->db_objset ==
|
||||||
|
dmu_objset_pool(db->db_objset)->dp_meta_objset ||
|
||||||
|
txg != spa_syncing_txg(dmu_objset_spa(db->db_objset)));
|
||||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||||
ASSERT0(db->db_level);
|
ASSERT0(db->db_level);
|
||||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
@ -1477,11 +1487,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
|
|
||||||
ASSERT(db->db.db_size != 0);
|
ASSERT(db->db.db_size != 0);
|
||||||
|
|
||||||
/*
|
dsl_pool_undirty_space(dmu_objset_pool(dn->dn_objset),
|
||||||
* Any space we accounted for in dp_dirty_* will be cleaned up by
|
dr->dr_accounted, txg);
|
||||||
* dsl_pool_sync(). This is relatively rare so the discrepancy
|
|
||||||
* is not a big deal.
|
|
||||||
*/
|
|
||||||
|
|
||||||
*drp = dr->dr_next;
|
*drp = dr->dr_next;
|
||||||
|
|
||||||
@ -1496,7 +1503,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
|
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
|
||||||
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
|
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
|
||||||
} else if (db->db_blkid == DMU_SPILL_BLKID ||
|
} else if (db->db_blkid == DMU_SPILL_BLKID ||
|
||||||
db->db_level+1 == dn->dn_nlevels) {
|
db->db_level + 1 == dn->dn_nlevels) {
|
||||||
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
|
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
|
||||||
mutex_enter(&dn->dn_mtx);
|
mutex_enter(&dn->dn_mtx);
|
||||||
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
||||||
@ -1513,11 +1520,6 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
|
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (db->db_level != 0) {
|
|
||||||
mutex_destroy(&dr->dt.di.dr_mtx);
|
|
||||||
list_destroy(&dr->dt.di.dr_children);
|
|
||||||
}
|
|
||||||
|
|
||||||
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
||||||
|
|
||||||
ASSERT(db->db_dirtycnt > 0);
|
ASSERT(db->db_dirtycnt > 0);
|
||||||
@ -2603,7 +2605,7 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|||||||
|
|
||||||
zio = dr->dr_zio;
|
zio = dr->dr_zio;
|
||||||
mutex_enter(&dr->dt.di.dr_mtx);
|
mutex_enter(&dr->dt.di.dr_mtx);
|
||||||
dbuf_sync_list(&dr->dt.di.dr_children, tx);
|
dbuf_sync_list(&dr->dt.di.dr_children, db->db_level - 1, tx);
|
||||||
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
||||||
mutex_exit(&dr->dt.di.dr_mtx);
|
mutex_exit(&dr->dt.di.dr_mtx);
|
||||||
zio_nowait(zio);
|
zio_nowait(zio);
|
||||||
@ -2754,7 +2756,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
dbuf_sync_list(list_t *list, dmu_tx_t *tx)
|
dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dbuf_dirty_record_t *dr;
|
dbuf_dirty_record_t *dr;
|
||||||
|
|
||||||
@ -2771,6 +2773,10 @@ dbuf_sync_list(list_t *list, dmu_tx_t *tx)
|
|||||||
DMU_META_DNODE_OBJECT);
|
DMU_META_DNODE_OBJECT);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
|
||||||
|
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
|
||||||
|
VERIFY3U(dr->dr_dbuf->db_level, ==, level);
|
||||||
|
}
|
||||||
list_remove(list, dr);
|
list_remove(list, dr);
|
||||||
if (dr->dr_dbuf->db_level > 0)
|
if (dr->dr_dbuf->db_level > 0)
|
||||||
dbuf_sync_indirect(dr, tx);
|
dbuf_sync_indirect(dr, tx);
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/dmu.h>
|
#include <sys/dmu.h>
|
||||||
@ -679,7 +679,7 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
|
|||||||
uint64_t ibyte = i << shift;
|
uint64_t ibyte = i << shift;
|
||||||
err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0);
|
err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0);
|
||||||
i = ibyte >> shift;
|
i = ibyte >> shift;
|
||||||
if (err == ESRCH)
|
if (err == ESRCH || i > end)
|
||||||
break;
|
break;
|
||||||
if (err) {
|
if (err) {
|
||||||
tx->tx_err = err;
|
tx->tx_err = err;
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -1517,6 +1517,16 @@ out:
|
|||||||
rw_downgrade(&dn->dn_struct_rwlock);
|
rw_downgrade(&dn->dn_struct_rwlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx)
|
||||||
|
{
|
||||||
|
dmu_buf_impl_t *db = dbuf_hold_level(dn, 1, l1blkid, FTAG);
|
||||||
|
if (db != NULL) {
|
||||||
|
dmu_buf_will_dirty(&db->db, tx);
|
||||||
|
dbuf_rele(db, FTAG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
@ -1637,27 +1647,68 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|||||||
nblks += 1;
|
nblks += 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Dirty the first and last indirect blocks, as they (and/or their
|
* Dirty all the indirect blocks in this range. Note that only
|
||||||
* parents) will need to be written out if they were only
|
* the first and last indirect blocks can actually be written
|
||||||
* partially freed. Interior indirect blocks will be themselves freed,
|
* (if they were partially freed) -- they must be dirtied, even if
|
||||||
* by free_children(), so they need not be dirtied. Note that these
|
* they do not exist on disk yet. The interior blocks will
|
||||||
* interior blocks have already been prefetched by dmu_tx_hold_free().
|
* be freed by free_children(), so they will not actually be written.
|
||||||
|
* Even though these interior blocks will not be written, we
|
||||||
|
* dirty them for two reasons:
|
||||||
|
*
|
||||||
|
* - It ensures that the indirect blocks remain in memory until
|
||||||
|
* syncing context. (They have already been prefetched by
|
||||||
|
* dmu_tx_hold_free(), so we don't have to worry about reading
|
||||||
|
* them serially here.)
|
||||||
|
*
|
||||||
|
* - The dirty space accounting will put pressure on the txg sync
|
||||||
|
* mechanism to begin syncing, and to delay transactions if there
|
||||||
|
* is a large amount of freeing. Even though these indirect
|
||||||
|
* blocks will not be written, we could need to write the same
|
||||||
|
* amount of space if we copy the freed BPs into deadlists.
|
||||||
*/
|
*/
|
||||||
if (dn->dn_nlevels > 1) {
|
if (dn->dn_nlevels > 1) {
|
||||||
uint64_t first, last;
|
uint64_t first, last, i, ibyte;
|
||||||
|
int shift, err;
|
||||||
|
|
||||||
first = blkid >> epbs;
|
first = blkid >> epbs;
|
||||||
if ((db = dbuf_hold_level(dn, 1, first, FTAG))) {
|
dnode_dirty_l1(dn, first, tx);
|
||||||
dmu_buf_will_dirty(&db->db, tx);
|
|
||||||
dbuf_rele(db, FTAG);
|
|
||||||
}
|
|
||||||
if (trunc)
|
if (trunc)
|
||||||
last = dn->dn_maxblkid >> epbs;
|
last = dn->dn_maxblkid >> epbs;
|
||||||
else
|
else
|
||||||
last = (blkid + nblks - 1) >> epbs;
|
last = (blkid + nblks - 1) >> epbs;
|
||||||
if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) {
|
if (last != first)
|
||||||
dmu_buf_will_dirty(&db->db, tx);
|
dnode_dirty_l1(dn, last, tx);
|
||||||
dbuf_rele(db, FTAG);
|
|
||||||
|
shift = dn->dn_datablkshift + dn->dn_indblkshift -
|
||||||
|
SPA_BLKPTRSHIFT;
|
||||||
|
for (i = first + 1; i < last; i++) {
|
||||||
|
/*
|
||||||
|
* Set i to the blockid of the next non-hole
|
||||||
|
* level-1 indirect block at or after i. Note
|
||||||
|
* that dnode_next_offset() operates in terms of
|
||||||
|
* level-0-equivalent bytes.
|
||||||
|
*/
|
||||||
|
ibyte = i << shift;
|
||||||
|
err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK,
|
||||||
|
&ibyte, 2, 1, 0);
|
||||||
|
i = ibyte >> shift;
|
||||||
|
if (i >= last)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Normally we should not see an error, either
|
||||||
|
* from dnode_next_offset() or dbuf_hold_level()
|
||||||
|
* (except for ESRCH from dnode_next_offset).
|
||||||
|
* If there is an i/o error, then when we read
|
||||||
|
* this block in syncing context, it will use
|
||||||
|
* ZIO_FLAG_MUSTSUCCEED, and thus hang/panic according
|
||||||
|
* to the "failmode" property. dnode_next_offset()
|
||||||
|
* doesn't have a flag to indicate MUSTSUCCEED.
|
||||||
|
*/
|
||||||
|
if (err != 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
dnode_dirty_l1(dn, i, tx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -718,7 +718,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||||||
mutex_exit(&dn->dn_mtx);
|
mutex_exit(&dn->dn_mtx);
|
||||||
}
|
}
|
||||||
|
|
||||||
dbuf_sync_list(list, tx);
|
dbuf_sync_list(list, dn->dn_phys->dn_nlevels - 1, tx);
|
||||||
|
|
||||||
if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||||
ASSERT3P(list_head(list), ==, NULL);
|
ASSERT3P(list_head(list), ==, NULL);
|
||||||
|
Loading…
Reference in New Issue
Block a user