mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Add support for parallel pool exports
Changed spa_export_common() such that it no longer holds the spa_namespace_lock for the entire duration and instead sets spa_export_thread to indicate an import is in progress on the spa. This allows for an export to a diffent pool to proceed in parallel while an export is still processing potentially long operations like spa_unload_log_sm_flush_all(). Calls like spa_lookup() and spa_vdev_enter() that rely on the spa_namespace_lock to serialize them against a concurrent export, now wait for any in-progress export thread to complete before proceeding. The 'zpool import -a' sub-command also provides multi-threaded support, using a thread pool to submit the exports in parallel. Sponsored-By: Klara Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <gwilson@delphix.com> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #16153
This commit is contained in:
committed by
Brian Behlendorf
parent
abec7dcd30
commit
975a13259b
+2
-2
@@ -8143,11 +8143,11 @@ l2arc_dev_get_next(void)
|
||||
|
||||
ASSERT3P(next, !=, NULL);
|
||||
} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
|
||||
next->l2ad_trim_all);
|
||||
next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting);
|
||||
|
||||
/* if we were unable to find any usable vdevs, return NULL */
|
||||
if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
|
||||
next->l2ad_trim_all)
|
||||
next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting)
|
||||
next = NULL;
|
||||
|
||||
l2arc_dev_last = next;
|
||||
|
||||
+29
-7
@@ -34,6 +34,7 @@
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
* Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -1991,7 +1992,8 @@ spa_destroy_aux_threads(spa_t *spa)
|
||||
static void
|
||||
spa_unload(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED);
|
||||
|
||||
spa_import_progress_remove(spa_guid(spa));
|
||||
@@ -6955,7 +6957,7 @@ static int
|
||||
spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
||||
boolean_t force, boolean_t hardforce)
|
||||
{
|
||||
int error;
|
||||
int error = 0;
|
||||
spa_t *spa;
|
||||
hrtime_t export_start = gethrtime();
|
||||
|
||||
@@ -6979,8 +6981,8 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
||||
spa->spa_is_exporting = B_TRUE;
|
||||
|
||||
/*
|
||||
* Put a hold on the pool, drop the namespace lock, stop async tasks,
|
||||
* reacquire the namespace lock, and see if we can export.
|
||||
* Put a hold on the pool, drop the namespace lock, stop async tasks
|
||||
* and see if we can export.
|
||||
*/
|
||||
spa_open_ref(spa, FTAG);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
@@ -6990,10 +6992,18 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
||||
taskq_wait(spa->spa_zvol_taskq);
|
||||
}
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa->spa_export_thread = curthread;
|
||||
spa_close(spa, FTAG);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
/*
|
||||
* At this point we no longer hold the spa_namespace_lock and
|
||||
* the spa_export_thread indicates that an export is in progress.
|
||||
*/
|
||||
|
||||
if (spa->spa_state == POOL_STATE_UNINITIALIZED)
|
||||
goto export_spa;
|
||||
|
||||
/*
|
||||
* The pool will be in core if it's openable, in which case we can
|
||||
* modify its state. Objsets may be open only because they're dirty,
|
||||
@@ -7089,6 +7099,10 @@ export_spa:
|
||||
if (oldconfig && spa->spa_config)
|
||||
*oldconfig = fnvlist_dup(spa->spa_config);
|
||||
|
||||
if (new_state == POOL_STATE_EXPORTED)
|
||||
zio_handle_export_delay(spa, gethrtime() - export_start);
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
if (new_state != POOL_STATE_UNINITIALIZED) {
|
||||
if (!hardforce)
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
|
||||
@@ -7100,17 +7114,25 @@ export_spa:
|
||||
* we make sure to reset the exporting flag.
|
||||
*/
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
spa->spa_export_thread = NULL;
|
||||
}
|
||||
|
||||
if (new_state == POOL_STATE_EXPORTED)
|
||||
zio_handle_export_delay(spa, gethrtime() - export_start);
|
||||
|
||||
/*
|
||||
* Wake up any waiters on spa_namespace_lock
|
||||
* They need to re-attempt a spa_lookup()
|
||||
*/
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (0);
|
||||
|
||||
fail:
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
spa->spa_export_thread = NULL;
|
||||
spa_async_resume(spa);
|
||||
|
||||
/* Wake up any waiters on spa_namespace_lock */
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (error);
|
||||
}
|
||||
|
||||
+40
-10
@@ -27,7 +27,7 @@
|
||||
* Copyright (c) 2017 Datto Inc.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -82,8 +82,8 @@
|
||||
* - Check if spa_refcount is zero
|
||||
* - Rename a spa_t
|
||||
* - add/remove/attach/detach devices
|
||||
* - Held for the duration of create/destroy/export
|
||||
* - Held at the start and end of import
|
||||
* - Held for the duration of create/destroy
|
||||
* - Held at the start and end of import and export
|
||||
*
|
||||
* It does not need to handle recursion. A create or destroy may
|
||||
* reference objects (files or zvols) in other pools, but by
|
||||
@@ -636,8 +636,14 @@ retry:
|
||||
if (spa == NULL)
|
||||
return (NULL);
|
||||
|
||||
if (spa->spa_load_thread != NULL &&
|
||||
spa->spa_load_thread != curthread) {
|
||||
/*
|
||||
* Avoid racing with import/export, which don't hold the namespace
|
||||
* lock for their entire duration.
|
||||
*/
|
||||
if ((spa->spa_load_thread != NULL &&
|
||||
spa->spa_load_thread != curthread) ||
|
||||
(spa->spa_export_thread != NULL &&
|
||||
spa->spa_export_thread != curthread)) {
|
||||
cv_wait(&spa_namespace_cv, &spa_namespace_lock);
|
||||
goto retry;
|
||||
}
|
||||
@@ -950,14 +956,15 @@ spa_open_ref(spa_t *spa, const void *tag)
|
||||
|
||||
/*
|
||||
* Remove a reference to the given spa_t. Must have at least one reference, or
|
||||
* have the namespace lock held.
|
||||
* have the namespace lock held or be part of a pool import/export.
|
||||
*/
|
||||
void
|
||||
spa_close(spa_t *spa, const void *tag)
|
||||
{
|
||||
ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
|
||||
MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread);
|
||||
spa->spa_load_thread == curthread ||
|
||||
spa->spa_export_thread == curthread);
|
||||
(void) zfs_refcount_remove(&spa->spa_refcount, tag);
|
||||
}
|
||||
|
||||
@@ -977,13 +984,15 @@ spa_async_close(spa_t *spa, const void *tag)
|
||||
|
||||
/*
|
||||
* Check to see if the spa refcount is zero. Must be called with
|
||||
* spa_namespace_lock held. We really compare against spa_minref, which is the
|
||||
* number of references acquired when opening a pool
|
||||
* spa_namespace_lock held or be the spa export thread. We really
|
||||
* compare against spa_minref, which is the number of references
|
||||
* acquired when opening a pool
|
||||
*/
|
||||
boolean_t
|
||||
spa_refcount_zero(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
return (zfs_refcount_count(&spa->spa_refcount) == spa->spa_minref);
|
||||
}
|
||||
@@ -1231,6 +1240,21 @@ spa_vdev_enter(spa_t *spa)
|
||||
mutex_enter(&spa->spa_vdev_top_lock);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
|
||||
/*
|
||||
* We have a reference on the spa and a spa export could be
|
||||
* starting but no longer holding the spa_namespace_lock. So
|
||||
* check if there is an export and if so wait. It will fail
|
||||
* fast (EBUSY) since we are still holding a spa reference.
|
||||
*
|
||||
* Note that we can be woken by a different spa transitioning
|
||||
* through an import/export, so we must wait for our condition
|
||||
* to change before proceeding.
|
||||
*/
|
||||
while (spa->spa_export_thread != NULL &&
|
||||
spa->spa_export_thread != curthread) {
|
||||
cv_wait(&spa_namespace_cv, &spa_namespace_lock);
|
||||
}
|
||||
|
||||
vdev_autotrim_stop_all(spa);
|
||||
|
||||
return (spa_vdev_config_enter(spa));
|
||||
@@ -1248,6 +1272,12 @@ spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
|
||||
mutex_enter(&spa->spa_vdev_top_lock);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
|
||||
/* See comment in spa_vdev_enter() */
|
||||
while (spa->spa_export_thread != NULL &&
|
||||
spa->spa_export_thread != curthread) {
|
||||
cv_wait(&spa_namespace_cv, &spa_namespace_lock);
|
||||
}
|
||||
|
||||
vdev_autotrim_stop_all(spa);
|
||||
|
||||
if (guid != 0) {
|
||||
|
||||
@@ -682,7 +682,8 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list)
|
||||
(void) spa;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
while ((vd = list_remove_head(vd_list)) != NULL) {
|
||||
mutex_enter(&vd->vdev_initialize_lock);
|
||||
@@ -724,7 +725,8 @@ vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state,
|
||||
if (vd_list == NULL) {
|
||||
vdev_initialize_stop_wait_impl(vd);
|
||||
} else {
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
vd->vdev_spa->spa_export_thread == curthread);
|
||||
list_insert_tail(vd_list, vd);
|
||||
}
|
||||
}
|
||||
@@ -756,7 +758,8 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
list_t vd_list;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
list_create(&vd_list, sizeof (vdev_t),
|
||||
offsetof(vdev_t, vdev_initialize_node));
|
||||
|
||||
@@ -1087,7 +1087,8 @@ vdev_rebuild_stop_wait(vdev_t *vd)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
if (vd == spa->spa_root_vdev) {
|
||||
for (uint64_t i = 0; i < vd->vdev_children; i++)
|
||||
|
||||
@@ -1040,7 +1040,8 @@ vdev_trim_stop_wait(spa_t *spa, list_t *vd_list)
|
||||
(void) spa;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
while ((vd = list_remove_head(vd_list)) != NULL) {
|
||||
mutex_enter(&vd->vdev_trim_lock);
|
||||
@@ -1079,7 +1080,8 @@ vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt_state, list_t *vd_list)
|
||||
if (vd_list == NULL) {
|
||||
vdev_trim_stop_wait_impl(vd);
|
||||
} else {
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
vd->vdev_spa->spa_export_thread == curthread);
|
||||
list_insert_tail(vd_list, vd);
|
||||
}
|
||||
}
|
||||
@@ -1115,7 +1117,8 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
|
||||
list_t vd_list;
|
||||
vdev_t *vd_l2cache;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
list_create(&vd_list, sizeof (vdev_t),
|
||||
offsetof(vdev_t, vdev_trim_node));
|
||||
|
||||
Reference in New Issue
Block a user