mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-27 04:32:16 +03:00
Illumos 4390 - I/O errors can corrupt space map when deleting fs/vol
4390 i/o errors when deleting filesystem/zvol can lead to space map corruption Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/4390 https://github.com/illumos/illumos-gate/commit/7fd05ac Porting notes: Previous stack-reduction efforts in traverse_visitb() caused a fair number of un-mergable pieces of code. This patch should reduce its stack footprint a bit more. The new local bptree_entry_phys_t in bptree_add() is dynamically-allocated using kmem_zalloc() for the purpose of stack reduction. The new global zfs_free_leak_on_eio has been defined as an integer rather than a boolean_t as was the case with the related zfs_recover global. Also, zfs_free_leak_on_eio's definition has been inserted into zfs_debug.c for consistency with the existing definition of zfs_recover. Illumos placed it in spa_misc.c. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2545
This commit is contained in:
committed by
Brian Behlendorf
parent
9b67f60560
commit
fbeddd60b7
+54
-73
@@ -58,12 +58,11 @@ typedef struct traverse_data {
|
||||
zbookmark_t *td_resume;
|
||||
int td_flags;
|
||||
prefetch_data_t *td_pfd;
|
||||
boolean_t td_paused;
|
||||
blkptr_cb_t *td_func;
|
||||
void *td_arg;
|
||||
} traverse_data_t;
|
||||
|
||||
#define TD_HARD(td) (td->td_flags & TRAVERSE_HARD)
|
||||
|
||||
static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
uint64_t objset, uint64_t object);
|
||||
static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
|
||||
@@ -165,7 +164,6 @@ resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
* If we found the block we're trying to resume from, zero
|
||||
* the bookmark out to indicate that we have resumed.
|
||||
*/
|
||||
ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
|
||||
if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
|
||||
bzero(td->td_resume, sizeof (*zb));
|
||||
if (td->td_flags & TRAVERSE_POST)
|
||||
@@ -175,14 +173,6 @@ resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
return (RESUME_SKIP_NONE);
|
||||
}
|
||||
|
||||
static void
|
||||
traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
|
||||
{
|
||||
ASSERT(td->td_resume != NULL);
|
||||
ASSERT0(zb->zb_level);
|
||||
bcopy(zb, td->td_resume, sizeof (*td->td_resume));
|
||||
}
|
||||
|
||||
static void
|
||||
traverse_prefetch_metadata(traverse_data_t *td,
|
||||
const blkptr_t *bp, const zbookmark_t *zb)
|
||||
@@ -211,9 +201,8 @@ static int
|
||||
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
const blkptr_t *bp, const zbookmark_t *zb)
|
||||
{
|
||||
int err = 0, lasterr = 0;
|
||||
int err = 0;
|
||||
arc_buf_t *buf = NULL;
|
||||
boolean_t pause = B_FALSE;
|
||||
|
||||
switch (resume_skip_check(td, dnp, zb)) {
|
||||
case RESUME_SKIP_ALL:
|
||||
@@ -252,7 +241,9 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
|
||||
return (err);
|
||||
if (err != 0)
|
||||
goto post;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (td->td_pfd && !td->td_pfd->pd_exited &&
|
||||
@@ -273,8 +264,6 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
td->td_arg);
|
||||
if (err == TRAVERSE_VISIT_NO_CHILDREN)
|
||||
return (0);
|
||||
if (err == ERESTART)
|
||||
pause = B_TRUE; /* handle pausing at a common point */
|
||||
if (err != 0)
|
||||
goto post;
|
||||
}
|
||||
@@ -288,7 +277,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
goto post;
|
||||
|
||||
czb = kmem_alloc(sizeof (zbookmark_t), KM_PUSHPAGE);
|
||||
|
||||
@@ -307,11 +296,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
zb->zb_blkid * epb + i);
|
||||
err = traverse_visitbp(td, dnp,
|
||||
&((blkptr_t *)buf->b_data)[i], czb);
|
||||
if (err != 0) {
|
||||
if (!TD_HARD(td))
|
||||
break;
|
||||
lasterr = err;
|
||||
}
|
||||
if (err != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
kmem_free(czb, sizeof (zbookmark_t));
|
||||
@@ -324,7 +310,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
goto post;
|
||||
dnp = buf->b_data;
|
||||
|
||||
for (i = 0; i < epb; i++) {
|
||||
@@ -336,11 +322,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
for (i = 0; i < epb; i++) {
|
||||
err = traverse_dnode(td, &dnp[i], zb->zb_objset,
|
||||
zb->zb_blkid * epb + i);
|
||||
if (err != 0) {
|
||||
if (!TD_HARD(td))
|
||||
break;
|
||||
lasterr = err;
|
||||
}
|
||||
if (err != 0)
|
||||
break;
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
@@ -350,7 +333,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
goto post;
|
||||
|
||||
osp = buf->b_data;
|
||||
dnp = &osp->os_meta_dnode;
|
||||
@@ -365,19 +348,11 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
|
||||
err = traverse_dnode(td, dnp, zb->zb_objset,
|
||||
DMU_META_DNODE_OBJECT);
|
||||
if (err && TD_HARD(td)) {
|
||||
lasterr = err;
|
||||
err = 0;
|
||||
}
|
||||
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
dnp = &osp->os_groupused_dnode;
|
||||
err = traverse_dnode(td, dnp, zb->zb_objset,
|
||||
DMU_GROUPUSED_OBJECT);
|
||||
}
|
||||
if (err && TD_HARD(td)) {
|
||||
lasterr = err;
|
||||
err = 0;
|
||||
}
|
||||
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
dnp = &osp->os_userused_dnode;
|
||||
err = traverse_dnode(td, dnp, zb->zb_objset,
|
||||
@@ -389,19 +364,37 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
|
||||
post:
|
||||
if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
|
||||
if (err == 0 && (td->td_flags & TRAVERSE_POST))
|
||||
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
|
||||
if (err == ERESTART)
|
||||
pause = B_TRUE;
|
||||
|
||||
if ((td->td_flags & TRAVERSE_HARD) && (err == EIO || err == ECKSUM)) {
|
||||
/*
|
||||
* Ignore this disk error as requested by the HARD flag,
|
||||
* and continue traversal.
|
||||
*/
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (pause && td->td_resume != NULL) {
|
||||
ASSERT3U(err, ==, ERESTART);
|
||||
ASSERT(!TD_HARD(td));
|
||||
traverse_pause(td, zb);
|
||||
/*
|
||||
* If we are stopping here, set td_resume.
|
||||
*/
|
||||
if (td->td_resume != NULL && err != 0 && !td->td_paused) {
|
||||
td->td_resume->zb_objset = zb->zb_objset;
|
||||
td->td_resume->zb_object = zb->zb_object;
|
||||
td->td_resume->zb_level = 0;
|
||||
/*
|
||||
* If we have stopped on an indirect block (e.g. due to
|
||||
* i/o error), we have not visited anything below it.
|
||||
* Set the bookmark to the first level-0 block that we need
|
||||
* to visit. This way, the resuming code does not need to
|
||||
* deal with resuming from indirect blocks.
|
||||
*/
|
||||
td->td_resume->zb_blkid = zb->zb_blkid <<
|
||||
(zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
|
||||
td->td_paused = B_TRUE;
|
||||
}
|
||||
|
||||
return (err != 0 ? err : lasterr);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -426,29 +419,21 @@ static int
|
||||
traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
uint64_t objset, uint64_t object)
|
||||
{
|
||||
int j, err = 0, lasterr = 0;
|
||||
int j, err = 0;
|
||||
zbookmark_t czb;
|
||||
|
||||
for (j = 0; j < dnp->dn_nblkptr; j++) {
|
||||
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
|
||||
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
|
||||
if (err != 0) {
|
||||
if (!TD_HARD(td))
|
||||
break;
|
||||
lasterr = err;
|
||||
}
|
||||
if (err != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
||||
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
|
||||
err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
|
||||
if (err != 0) {
|
||||
if (!TD_HARD(td))
|
||||
return (err);
|
||||
lasterr = err;
|
||||
}
|
||||
}
|
||||
return (err != 0 ? err : lasterr);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
@@ -539,6 +524,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
||||
td->td_arg = arg;
|
||||
td->td_pfd = pd;
|
||||
td->td_flags = flags;
|
||||
td->td_paused = B_FALSE;
|
||||
|
||||
pd->pd_blks_max = zfs_pd_blks_max;
|
||||
pd->pd_flags = flags;
|
||||
@@ -617,7 +603,7 @@ int
|
||||
traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
|
||||
blkptr_cb_t func, void *arg)
|
||||
{
|
||||
int err, lasterr = 0;
|
||||
int err;
|
||||
uint64_t obj;
|
||||
dsl_pool_t *dp = spa_get_dsl(spa);
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
@@ -630,16 +616,15 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
|
||||
return (err);
|
||||
|
||||
/* visit each dataset */
|
||||
for (obj = 1; err == 0 || (err != ESRCH && hard);
|
||||
for (obj = 1; err == 0;
|
||||
err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
|
||||
dmu_object_info_t doi;
|
||||
|
||||
err = dmu_object_info(mos, obj, &doi);
|
||||
if (err != 0) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
continue;
|
||||
if (hard)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (doi.doi_bonus_type == DMU_OT_DSL_DATASET) {
|
||||
@@ -650,25 +635,21 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
|
||||
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
if (err != 0) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
continue;
|
||||
if (hard)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (ds->ds_phys->ds_prev_snap_txg > txg)
|
||||
txg = ds->ds_phys->ds_prev_snap_txg;
|
||||
err = traverse_dataset(ds, txg, flags, func, arg);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
if (err != 0) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
}
|
||||
if (err != 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (err == ESRCH)
|
||||
err = 0;
|
||||
return (err != 0 ? err : lasterr);
|
||||
return (err);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_SPL)
|
||||
|
||||
Reference in New Issue
Block a user