zpool import progress kstat

When an import requires a long MMP activity check, or when the user
requests pool recovery, the import make take a long time.  The user may
not know why, or be able to tell whether the import is progressing or is
hung.

Add a kstat which lists all imports currently being processed by the
kernel (currently only one at a time is possible, but the kstat allows
for more than one).  The kstat is /proc/spl/kstat/zfs/import_progress.

The kstat contents are as follows:
pool_guid         load_state multihost_secs  max_txg pool_name
16667015954387398 3          15              0       tank3

load_state: the value of spa_load_state
multihost_secs:  seconds until the end of the multihost activity
                 check; if over, or none required, this is 0
max_txg: current spa_load_max_txg, if rewind is occurring

This could be used by outside tools, such as a pacemaker resource agent,
to report import progress, or as a part of manual troubleshooting.  The
zpool import subcommand could also be modified to report this
information.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #8696
This commit is contained in:
Olaf Faaland
2019-05-09 10:08:05 -07:00
committed by Brian Behlendorf
parent b689de85e8
commit ca95f70dff
3 changed files with 242 additions and 2 deletions
+24 -2
View File
@@ -1437,6 +1437,7 @@ spa_unload(spa_t *spa)
ASSERT(MUTEX_HELD(&spa_namespace_lock));
spa_import_progress_remove(spa_guid(spa));
spa_load_note(spa, "UNLOADING");
/*
@@ -2375,6 +2376,8 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
int error;
spa->spa_load_state = state;
(void) spa_import_progress_set_state(spa_guid(spa),
spa_load_state(spa));
gethrestime(&spa->spa_loaded_ts);
error = spa_load_impl(spa, type, &ereport);
@@ -2397,6 +2400,9 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
spa->spa_ena = 0;
(void) spa_import_progress_set_state(spa_guid(spa),
spa_load_state(spa));
return (error);
}
@@ -2469,6 +2475,7 @@ spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label,
*/
if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0)
return (B_FALSE);
/*
* If the tryconfig_ values are nonzero, they are the results of an
* earlier tryimport. If they all match the uberblock we just found,
@@ -2617,10 +2624,14 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
import_delay = spa_activity_check_duration(spa, ub);
/* Add a small random factor in case of simultaneous imports (0-25%) */
import_expire = gethrtime() + import_delay +
(import_delay * spa_get_random(250) / 1000);
import_delay += import_delay * spa_get_random(250) / 1000;
import_expire = gethrtime() + import_delay;
while (gethrtime() < import_expire) {
(void) spa_import_progress_set_mmp_check(spa_guid(spa),
NSEC2SEC(import_expire - gethrtime()));
vdev_uberblock_load(rvd, ub, &mmp_label);
if (txg != ub->ub_txg || timestamp != ub->ub_timestamp ||
@@ -2987,6 +2998,10 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
}
if (spa->spa_load_max_txg != UINT64_MAX) {
(void) spa_import_progress_set_max_txg(spa_guid(spa),
(u_longlong_t)spa->spa_load_max_txg);
}
spa_load_note(spa, "using uberblock with txg=%llu",
(u_longlong_t)ub->ub_txg);
@@ -3916,6 +3931,8 @@ spa_ld_mos_init(spa_t *spa, spa_import_type_t type)
if (error != 0)
return (error);
spa_import_progress_add(spa);
/*
* Now that we have the vdev tree, try to open each vdev. This involves
* opening the underlying physical device, retrieving its geometry and
@@ -4346,6 +4363,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
spa_config_exit(spa, SCL_CONFIG, FTAG);
}
spa_import_progress_remove(spa_guid(spa));
spa_load_note(spa, "LOADED");
return (0);
@@ -4406,6 +4424,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
* from previous txgs when spa_load fails.
*/
ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
spa_import_progress_remove(spa_guid(spa));
return (load_error);
}
@@ -4417,6 +4436,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
if (rewind_flags & ZPOOL_NEVER_REWIND) {
nvlist_free(config);
spa_import_progress_remove(spa_guid(spa));
return (load_error);
}
@@ -4459,6 +4479,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
if (state == SPA_LOAD_RECOVER) {
ASSERT3P(loadinfo, ==, NULL);
spa_import_progress_remove(spa_guid(spa));
return (rewind_error);
} else {
/* Store the rewind info as part of the initial load info */
@@ -4469,6 +4490,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
fnvlist_free(spa->spa_load_info);
spa->spa_load_info = loadinfo;
spa_import_progress_remove(spa_guid(spa));
return (load_error);
}
}