mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 11:47:43 +03:00
Parallel pool import
This commit allow spa_load() to drop the spa_namespace_lock so that imports can happen concurrently. Prior to dropping the spa_namespace_lock, the import logic will set the spa_load_thread value to track the thread which is doing the import. Consumers of spa_lookup() retain the same behavior by blocking when either a thread is holding the spa_namespace_lock or the spa_load_thread value is set. This will ensure that critical concurrent operations cannot take place while a pool is being imported. The zpool command is also enhanced to provide multi-threaded support when invoking zpool import -a. Lastly, zinject provides a mechanism to insert artificial delays when importing a pool and new zfs tests are added to verify parallel import functionality. Contributions-by: Don Brady <don.brady@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Wilson <gwilson@delphix.com> Closes #16093
This commit is contained in:
+40
-18
@@ -3273,8 +3273,6 @@ spa_spawn_aux_threads(spa_t *spa)
|
||||
{
|
||||
ASSERT(spa_writeable(spa));
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
spa_start_raidz_expansion_thread(spa);
|
||||
spa_start_indirect_condensing_thread(spa);
|
||||
spa_start_livelist_destroy_thread(spa);
|
||||
@@ -4981,7 +4979,8 @@ spa_ld_read_checkpoint_txg(spa_t *spa)
|
||||
int error = 0;
|
||||
|
||||
ASSERT0(spa->spa_checkpoint_txg);
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread);
|
||||
|
||||
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
|
||||
@@ -5228,6 +5227,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
boolean_t checkpoint_rewind =
|
||||
(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
|
||||
boolean_t update_config_cache = B_FALSE;
|
||||
hrtime_t load_start = gethrtime();
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
|
||||
@@ -5272,13 +5272,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the namespace lock for the rest of the function.
|
||||
*/
|
||||
spa->spa_load_thread = curthread;
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
/*
|
||||
* Retrieve the checkpoint txg if the pool has a checkpoint.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading checkpoint txg");
|
||||
error = spa_ld_read_checkpoint_txg(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve the mapping of indirect vdevs. Those vdevs were removed
|
||||
@@ -5291,7 +5297,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Loading indirect vdev metadata");
|
||||
error = spa_ld_open_indirect_vdev_metadata(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve the full list of active features from the MOS and check if
|
||||
@@ -5300,7 +5306,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Checking feature flags");
|
||||
error = spa_ld_check_features(spa, &missing_feat_write);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Load several special directories from the MOS needed by the dsl_pool
|
||||
@@ -5309,7 +5315,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Loading special MOS directories");
|
||||
error = spa_ld_load_special_directories(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve pool properties from the MOS.
|
||||
@@ -5317,7 +5323,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Loading properties");
|
||||
error = spa_ld_get_props(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve the list of auxiliary devices - cache devices and spares -
|
||||
@@ -5326,7 +5332,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Loading AUX vdevs");
|
||||
error = spa_ld_open_aux_vdevs(spa, type);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Load the metadata for all vdevs. Also check if unopenable devices
|
||||
@@ -5335,17 +5341,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Loading vdev metadata");
|
||||
error = spa_ld_load_vdev_metadata(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
spa_import_progress_set_notes(spa, "Loading dedup tables");
|
||||
error = spa_ld_load_dedup_tables(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
spa_import_progress_set_notes(spa, "Loading BRT");
|
||||
error = spa_ld_load_brt(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Verify the logs now to make sure we don't have any unexpected errors
|
||||
@@ -5354,7 +5360,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Verifying Log Devices");
|
||||
error = spa_ld_verify_logs(spa, type, ereport);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
if (missing_feat_write) {
|
||||
ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT);
|
||||
@@ -5364,8 +5370,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
* read-only mode but not read-write mode. We now have enough
|
||||
* information and can return to userland.
|
||||
*/
|
||||
return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP));
|
||||
error = spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5376,7 +5383,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_import_progress_set_notes(spa, "Verifying pool data");
|
||||
error = spa_ld_verify_pool_data(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Calculate the deflated space for the pool. This must be done before
|
||||
@@ -5501,13 +5508,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
spa_import_progress_set_notes(spa, "Finished importing");
|
||||
}
|
||||
zio_handle_import_delay(spa, gethrtime() - load_start);
|
||||
|
||||
spa_import_progress_remove(spa_guid(spa));
|
||||
spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
|
||||
|
||||
spa_load_note(spa, "LOADED");
|
||||
fail:
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa->spa_load_thread = NULL;
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
|
||||
return (error);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -6757,9 +6770,14 @@ spa_tryimport(nvlist_t *tryconfig)
|
||||
/*
|
||||
* Create and initialize the spa structure.
|
||||
*/
|
||||
char *name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
|
||||
(void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
|
||||
TRYIMPORT_NAME, (u_longlong_t)curthread, poolname);
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
|
||||
spa = spa_add(name, tryconfig, NULL);
|
||||
spa_activate(spa, SPA_MODE_READ);
|
||||
kmem_free(name, MAXPATHLEN);
|
||||
|
||||
/*
|
||||
* Rewind pool if a max txg was provided.
|
||||
@@ -6874,6 +6892,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
||||
{
|
||||
int error;
|
||||
spa_t *spa;
|
||||
hrtime_t export_start = gethrtime();
|
||||
|
||||
if (oldconfig)
|
||||
*oldconfig = NULL;
|
||||
@@ -7018,6 +7037,9 @@ export_spa:
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
}
|
||||
|
||||
if (new_state == POOL_STATE_EXPORTED)
|
||||
zio_handle_export_delay(spa, gethrtime() - export_start);
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user