Illumos 5911 - ZFS "hangs" while deleting file

5911 ZFS "hangs" while deleting file
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Simon Klinkert <simon.klinkert@gmail.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

References:
  https://www.illumos.org/issues/5911
  https://github.com/illumos/illumos-gate/commit/46e1baa

Porting notes:

Resolved ISO C90 forbids mixed declarations and code wanting in
the dnode_free_range() function.

Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3554
This commit is contained in:
Matthew Ahrens
2015-07-02 18:23:20 +02:00
committed by Brian Behlendorf
parent 5e8cd5d17f
commit 4bda3bd0e7
5 changed files with 91 additions and 34 deletions
+65 -14
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -1517,6 +1517,16 @@ out:
rw_downgrade(&dn->dn_struct_rwlock);
}
static void
dnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = dbuf_hold_level(dn, 1, l1blkid, FTAG);
if (db != NULL) {
dmu_buf_will_dirty(&db->db, tx);
dbuf_rele(db, FTAG);
}
}
void
dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
{
@@ -1637,27 +1647,68 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
nblks += 1;
/*
* Dirty the first and last indirect blocks, as they (and/or their
* parents) will need to be written out if they were only
* partially freed. Interior indirect blocks will be themselves freed,
* by free_children(), so they need not be dirtied. Note that these
* interior blocks have already been prefetched by dmu_tx_hold_free().
* Dirty all the indirect blocks in this range. Note that only
* the first and last indirect blocks can actually be written
* (if they were partially freed) -- they must be dirtied, even if
* they do not exist on disk yet. The interior blocks will
* be freed by free_children(), so they will not actually be written.
* Even though these interior blocks will not be written, we
* dirty them for two reasons:
*
* - It ensures that the indirect blocks remain in memory until
* syncing context. (They have already been prefetched by
* dmu_tx_hold_free(), so we don't have to worry about reading
* them serially here.)
*
* - The dirty space accounting will put pressure on the txg sync
* mechanism to begin syncing, and to delay transactions if there
* is a large amount of freeing. Even though these indirect
* blocks will not be written, we could need to write the same
* amount of space if we copy the freed BPs into deadlists.
*/
if (dn->dn_nlevels > 1) {
uint64_t first, last;
uint64_t first, last, i, ibyte;
int shift, err;
first = blkid >> epbs;
if ((db = dbuf_hold_level(dn, 1, first, FTAG))) {
dmu_buf_will_dirty(&db->db, tx);
dbuf_rele(db, FTAG);
}
dnode_dirty_l1(dn, first, tx);
if (trunc)
last = dn->dn_maxblkid >> epbs;
else
last = (blkid + nblks - 1) >> epbs;
if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) {
dmu_buf_will_dirty(&db->db, tx);
dbuf_rele(db, FTAG);
if (last != first)
dnode_dirty_l1(dn, last, tx);
shift = dn->dn_datablkshift + dn->dn_indblkshift -
SPA_BLKPTRSHIFT;
for (i = first + 1; i < last; i++) {
/*
* Set i to the blockid of the next non-hole
* level-1 indirect block at or after i. Note
* that dnode_next_offset() operates in terms of
* level-0-equivalent bytes.
*/
ibyte = i << shift;
err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK,
&ibyte, 2, 1, 0);
i = ibyte >> shift;
if (i >= last)
break;
/*
* Normally we should not see an error, either
* from dnode_next_offset() or dbuf_hold_level()
* (except for ESRCH from dnode_next_offset).
* If there is an i/o error, then when we read
* this block in syncing context, it will use
* ZIO_FLAG_MUSTSUCCEED, and thus hang/panic according
* to the "failmode" property. dnode_next_offset()
* doesn't have a flag to indicate MUSTSUCCEED.
*/
if (err != 0)
break;
dnode_dirty_l1(dn, i, tx);
}
}