mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
OpenZFS 8961 - SPA load/import should tell us why it failed
Problem ======= When we fail to open or import a storage pool, we typically don't get any additional diagnostic information, just "no pool found" or "can not import". While there may be no additional user-consumable information, we should at least make this situation easier to debug/diagnose for developers and support. For example, we could start by using `zfs_dbgmsg()` to log each thing that we try when importing, and which things failed. E.g. "tried uberblock of txg X from label Y of device Z". Also, we could log each of the stages that we go through in `spa_load_impl()`. Solution ======== Following the cleanup to `spa_load_impl()`, debug messages have been added to every point of failure in that function. Additionally, debug messages have been added to strategic places, such as `vdev_disk_open()`. Authored by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Andrew Stormont <andyjstormont@gmail.com> Approved by: Dan McDonald <danmcd@joyent.com> Ported-by: Tim Chase <tim@chase2k.com> Signed-off-by: Tim Chase <tim@chase2k.com> OpenZFS-issue: https://illumos.org/issues/8961 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/418079e0 Closes #7459
This commit is contained in:
committed by
Brian Behlendorf
parent
ca0845d59e
commit
4a0ee12af8
+62
-8
@@ -74,6 +74,28 @@ unsigned int zfs_checksums_per_second = 20;
|
||||
*/
|
||||
int zfs_scan_ignore_errors = 0;
|
||||
|
||||
/*PRINTFLIKE2*/
|
||||
void
|
||||
vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
|
||||
{
|
||||
va_list adx;
|
||||
char buf[256];
|
||||
|
||||
va_start(adx, fmt);
|
||||
(void) vsnprintf(buf, sizeof (buf), fmt, adx);
|
||||
va_end(adx);
|
||||
|
||||
if (vd->vdev_path != NULL) {
|
||||
zfs_dbgmsg("%s vdev '%s': %s", vd->vdev_ops->vdev_op_type,
|
||||
vd->vdev_path, buf);
|
||||
} else {
|
||||
zfs_dbgmsg("%s-%llu vdev (guid %llu): %s",
|
||||
vd->vdev_ops->vdev_op_type,
|
||||
(u_longlong_t)vd->vdev_id,
|
||||
(u_longlong_t)vd->vdev_guid, buf);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Virtual device management.
|
||||
*/
|
||||
@@ -1056,14 +1078,20 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
|
||||
error = dmu_read(mos, vd->vdev_ms_array,
|
||||
m * sizeof (uint64_t), sizeof (uint64_t), &object,
|
||||
DMU_READ_PREFETCH);
|
||||
if (error)
|
||||
if (error != 0) {
|
||||
vdev_dbgmsg(vd, "unable to read the metaslab "
|
||||
"array [error=%d]", error);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
error = metaslab_init(vd->vdev_mg, m, object, txg,
|
||||
&(vd->vdev_ms[m]));
|
||||
if (error)
|
||||
if (error != 0) {
|
||||
vdev_dbgmsg(vd, "metaslab_init failed [error=%d]",
|
||||
error);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
if (txg == 0)
|
||||
@@ -1147,8 +1175,7 @@ vdev_probe_done(zio_t *zio)
|
||||
zio->io_error = 0;
|
||||
} else {
|
||||
ASSERT(zio->io_error != 0);
|
||||
zfs_dbgmsg("failed probe on vdev %llu",
|
||||
(longlong_t)vd->vdev_id);
|
||||
vdev_dbgmsg(vd, "failed probe");
|
||||
zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
|
||||
spa, vd, NULL, NULL, 0, 0);
|
||||
zio->io_error = SET_ERROR(ENXIO);
|
||||
@@ -1608,6 +1635,7 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
if ((label = vdev_label_read_config(vd, txg)) == NULL) {
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LABEL);
|
||||
vdev_dbgmsg(vd, "vdev_validate: failed reading config");
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -1620,6 +1648,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_SPLIT_POOL);
|
||||
nvlist_free(label);
|
||||
vdev_dbgmsg(vd, "vdev_validate: vdev split into other "
|
||||
"pool");
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -1629,6 +1659,10 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
nvlist_free(label);
|
||||
vdev_dbgmsg(vd, "vdev_validate: vdev label pool_guid "
|
||||
"doesn't match config (%llu != %llu)",
|
||||
(u_longlong_t)guid,
|
||||
(u_longlong_t)spa_guid(spa));
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -1658,6 +1692,9 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
nvlist_free(label);
|
||||
vdev_dbgmsg(vd, "vdev_validate: config guid doesn't "
|
||||
"match label guid (%llu != %llu)",
|
||||
(u_longlong_t)vd->vdev_guid, (u_longlong_t)guid);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -1666,6 +1703,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
nvlist_free(label);
|
||||
vdev_dbgmsg(vd, "vdev_validate: '%s' missing",
|
||||
ZPOOL_CONFIG_POOL_STATE);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -1677,8 +1716,12 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
*/
|
||||
if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
|
||||
spa_load_state(spa) == SPA_LOAD_OPEN &&
|
||||
state != POOL_STATE_ACTIVE)
|
||||
state != POOL_STATE_ACTIVE) {
|
||||
vdev_dbgmsg(vd, "vdev_validate: invalid pool state "
|
||||
"(%llu) for spa %s", (u_longlong_t)state,
|
||||
spa->spa_name);
|
||||
return (SET_ERROR(EBADF));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we were able to open and validate a vdev that was
|
||||
@@ -2314,9 +2357,10 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
|
||||
* the top level so that we update the config.
|
||||
*/
|
||||
if (object != space_map_object(vd->vdev_dtl_sm)) {
|
||||
zfs_dbgmsg("txg %llu, spa %s, DTL old object %llu, "
|
||||
"new object %llu", txg, spa_name(spa), object,
|
||||
space_map_object(vd->vdev_dtl_sm));
|
||||
vdev_dbgmsg(vd, "txg %llu, spa %s, DTL old object %llu, "
|
||||
"new object %llu", (u_longlong_t)txg, spa_name(spa),
|
||||
(u_longlong_t)object,
|
||||
(u_longlong_t)space_map_object(vd->vdev_dtl_sm));
|
||||
vdev_config_dirty(vd->vdev_top);
|
||||
}
|
||||
|
||||
@@ -2425,8 +2469,13 @@ vdev_load(vdev_t *vd)
|
||||
if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) {
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
vdev_dbgmsg(vd, "vdev_load: invalid size. ashift=%llu, "
|
||||
"asize=%llu", (u_longlong_t)vd->vdev_ashift,
|
||||
(u_longlong_t)vd->vdev_asize);
|
||||
return (SET_ERROR(ENXIO));
|
||||
} else if ((error = vdev_metaslab_init(vd, 0)) != 0) {
|
||||
vdev_dbgmsg(vd, "vdev_load: metaslab_init failed "
|
||||
"[error=%d]", error);
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
return (error);
|
||||
@@ -2439,6 +2488,8 @@ vdev_load(vdev_t *vd)
|
||||
if (vd->vdev_ops->vdev_op_leaf && (error = vdev_dtl_load(vd)) != 0) {
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
vdev_dbgmsg(vd, "vdev_load: vdev_dtl_load failed "
|
||||
"[error=%d]", error);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -2452,6 +2503,9 @@ vdev_load(vdev_t *vd)
|
||||
obsolete_sm_object, 0, vd->vdev_asize, 0))) {
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
vdev_dbgmsg(vd, "vdev_load: space_map_open failed for "
|
||||
"obsolete spacemap (obj %llu) [error=%d]",
|
||||
(u_longlong_t)obsolete_sm_object, error);
|
||||
return (error);
|
||||
}
|
||||
space_map_update(vd->vdev_obsolete_sm);
|
||||
|
||||
Reference in New Issue
Block a user