mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Parallel pool import
This commit allow spa_load() to drop the spa_namespace_lock so that imports can happen concurrently. Prior to dropping the spa_namespace_lock, the import logic will set the spa_load_thread value to track the thread which is doing the import. Consumers of spa_lookup() retain the same behavior by blocking when either a thread is holding the spa_namespace_lock or the spa_load_thread value is set. This will ensure that critical concurrent operations cannot take place while a pool is being imported. The zpool command is also enhanced to provide multi-threaded support when invoking zpool import -a. Lastly, zinject provides a mechanism to insert artificial delays when importing a pool and new zfs tests are added to verify parallel import functionality. Contributions-by: Don Brady <don.brady@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Wilson <gwilson@delphix.com> Closes #16093
This commit is contained in:
+106
-9
@@ -22,7 +22,7 @@
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
* Copyright (c) 2023-2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -310,6 +310,11 @@ usage(void)
|
||||
"\t\tcreate 3 lanes on the device; one lane with a latency\n"
|
||||
"\t\tof 10 ms and two lanes with a 25 ms latency.\n"
|
||||
"\n"
|
||||
"\tzinject -P import|export -s <seconds> pool\n"
|
||||
"\t\tAdd an artificial delay to a future pool import or export,\n"
|
||||
"\t\tsuch that the operation takes a minimum of supplied seconds\n"
|
||||
"\t\tto complete.\n"
|
||||
"\n"
|
||||
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
|
||||
"\t\tCause the pool to stop writing blocks yet not\n"
|
||||
"\t\treport errors for a duration. Simulates buggy hardware\n"
|
||||
@@ -392,8 +397,10 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
|
||||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_guid != 0 || record->zi_func[0] != '\0')
|
||||
if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
|
||||
record->zi_duration != 0) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (*count == 0) {
|
||||
(void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
|
||||
@@ -507,6 +514,33 @@ print_panic_handler(int id, const char *pool, zinject_record_t *record,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
|
||||
void *data)
|
||||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
|
||||
record->zi_cmd != ZINJECT_DELAY_EXPORT) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (*count == 0) {
|
||||
(void) printf("%3s %-19s %-11s %s\n",
|
||||
"ID", "POOL", "DELAY (sec)", "COMMAND");
|
||||
(void) printf("--- ------------------- -----------"
|
||||
" -------\n");
|
||||
}
|
||||
|
||||
*count += 1;
|
||||
|
||||
(void) printf("%3d %-19s %-11llu %s\n",
|
||||
id, pool, (u_longlong_t)record->zi_duration,
|
||||
record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print all registered error handlers. Returns the number of handlers
|
||||
* registered.
|
||||
@@ -537,6 +571,13 @@ print_all_handlers(void)
|
||||
count = 0;
|
||||
}
|
||||
|
||||
(void) iter_handlers(print_pool_delay_handler, &count);
|
||||
if (count > 0) {
|
||||
total += count;
|
||||
(void) printf("\n");
|
||||
count = 0;
|
||||
}
|
||||
|
||||
(void) iter_handlers(print_panic_handler, &count);
|
||||
|
||||
return (count + total);
|
||||
@@ -609,9 +650,27 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
|
||||
zc.zc_guid = flags;
|
||||
|
||||
if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
|
||||
(void) fprintf(stderr, "failed to add handler: %s\n",
|
||||
errno == EDOM ? "block level exceeds max level of object" :
|
||||
strerror(errno));
|
||||
const char *errmsg = strerror(errno);
|
||||
|
||||
switch (errno) {
|
||||
case EDOM:
|
||||
errmsg = "block level exceeds max level of object";
|
||||
break;
|
||||
case EEXIST:
|
||||
if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
|
||||
errmsg = "pool already imported";
|
||||
if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
|
||||
errmsg = "a handler already exists";
|
||||
break;
|
||||
case ENOENT:
|
||||
/* import delay injector running on older zfs module */
|
||||
if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
|
||||
errmsg = "import delay injector not supported";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
|
||||
return (1);
|
||||
}
|
||||
|
||||
@@ -636,6 +695,9 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
|
||||
} else if (record->zi_duration < 0) {
|
||||
(void) printf(" txgs: %lld \n",
|
||||
(u_longlong_t)-record->zi_duration);
|
||||
} else if (record->zi_timer > 0) {
|
||||
(void) printf(" timer: %lld ms\n",
|
||||
(u_longlong_t)NSEC2MSEC(record->zi_timer));
|
||||
} else {
|
||||
(void) printf("objset: %llu\n",
|
||||
(u_longlong_t)record->zi_objset);
|
||||
@@ -834,7 +896,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
|
||||
while ((c = getopt(argc, argv,
|
||||
":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
|
||||
":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
flags |= ZINJECT_FLUSH_ARC;
|
||||
@@ -952,6 +1014,19 @@ main(int argc, char **argv)
|
||||
sizeof (record.zi_func));
|
||||
record.zi_cmd = ZINJECT_PANIC;
|
||||
break;
|
||||
case 'P':
|
||||
if (strcasecmp(optarg, "import") == 0) {
|
||||
record.zi_cmd = ZINJECT_DELAY_IMPORT;
|
||||
} else if (strcasecmp(optarg, "export") == 0) {
|
||||
record.zi_cmd = ZINJECT_DELAY_EXPORT;
|
||||
} else {
|
||||
(void) fprintf(stderr, "invalid command '%s': "
|
||||
"must be 'import' or 'export'\n", optarg);
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
case 'q':
|
||||
quiet = 1;
|
||||
break;
|
||||
@@ -1033,7 +1108,7 @@ main(int argc, char **argv)
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (record.zi_duration != 0)
|
||||
if (record.zi_duration != 0 && record.zi_cmd == 0)
|
||||
record.zi_cmd = ZINJECT_IGNORED_WRITES;
|
||||
|
||||
if (cancel != NULL) {
|
||||
@@ -1179,8 +1254,8 @@ main(int argc, char **argv)
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || device != NULL || record.zi_freq > 0 ||
|
||||
dvas != 0) {
|
||||
(void) fprintf(stderr, "panic (-p) incompatible with "
|
||||
"other options\n");
|
||||
(void) fprintf(stderr, "%s incompatible with other "
|
||||
"options\n", "import|export delay (-P)");
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (2);
|
||||
@@ -1198,6 +1273,28 @@ main(int argc, char **argv)
|
||||
if (argv[1] != NULL)
|
||||
record.zi_type = atoi(argv[1]);
|
||||
dataset[0] = '\0';
|
||||
} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
|
||||
record.zi_cmd == ZINJECT_DELAY_EXPORT) {
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || device != NULL || record.zi_freq > 0 ||
|
||||
dvas != 0) {
|
||||
(void) fprintf(stderr, "%s incompatible with other "
|
||||
"options\n", "import|export delay (-P)");
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (2);
|
||||
}
|
||||
|
||||
if (argc != 1 || record.zi_duration <= 0) {
|
||||
(void) fprintf(stderr, "import|export delay (-P) "
|
||||
"injection requires a duration (-s) and a single "
|
||||
"pool name\n");
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (2);
|
||||
}
|
||||
|
||||
(void) strlcpy(pool, argv[0], sizeof (pool));
|
||||
} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || record.zi_freq > 0 || dvas != 0) {
|
||||
|
||||
+55
-17
@@ -50,6 +50,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <thread_pool.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <pwd.h>
|
||||
@@ -3455,15 +3456,40 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
||||
return (ret);
|
||||
}
|
||||
|
||||
typedef struct import_parameters {
|
||||
nvlist_t *ip_config;
|
||||
const char *ip_mntopts;
|
||||
nvlist_t *ip_props;
|
||||
int ip_flags;
|
||||
int *ip_err;
|
||||
} import_parameters_t;
|
||||
|
||||
static void
|
||||
do_import_task(void *arg)
|
||||
{
|
||||
import_parameters_t *ip = arg;
|
||||
*ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
|
||||
ip->ip_props, ip->ip_flags);
|
||||
free(ip);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
||||
char *orig_name, char *new_name,
|
||||
boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all,
|
||||
importargs_t *import)
|
||||
char *orig_name, char *new_name, importargs_t *import)
|
||||
{
|
||||
nvlist_t *config = NULL;
|
||||
nvlist_t *found_config = NULL;
|
||||
uint64_t pool_state;
|
||||
boolean_t pool_specified = (import->poolname != NULL ||
|
||||
import->guid != 0);
|
||||
|
||||
|
||||
tpool_t *tp = NULL;
|
||||
if (import->do_all) {
|
||||
tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN),
|
||||
0, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we have a list of import candidate configs. Even if
|
||||
@@ -3480,9 +3506,11 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
||||
|
||||
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
|
||||
&pool_state) == 0);
|
||||
if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
|
||||
if (!import->do_destroyed &&
|
||||
pool_state == POOL_STATE_DESTROYED)
|
||||
continue;
|
||||
if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
|
||||
if (import->do_destroyed &&
|
||||
pool_state != POOL_STATE_DESTROYED)
|
||||
continue;
|
||||
|
||||
verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
|
||||
@@ -3491,12 +3519,21 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
||||
if (!pool_specified) {
|
||||
if (first)
|
||||
first = B_FALSE;
|
||||
else if (!do_all)
|
||||
else if (!import->do_all)
|
||||
(void) fputc('\n', stdout);
|
||||
|
||||
if (do_all) {
|
||||
err |= do_import(config, NULL, mntopts,
|
||||
props, flags);
|
||||
if (import->do_all) {
|
||||
import_parameters_t *ip = safe_malloc(
|
||||
sizeof (import_parameters_t));
|
||||
|
||||
ip->ip_config = config;
|
||||
ip->ip_mntopts = mntopts;
|
||||
ip->ip_props = props;
|
||||
ip->ip_flags = flags;
|
||||
ip->ip_err = &err;
|
||||
|
||||
(void) tpool_dispatch(tp, do_import_task,
|
||||
(void *)ip);
|
||||
} else {
|
||||
/*
|
||||
* If we're importing from cachefile, then
|
||||
@@ -3544,6 +3581,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
||||
found_config = config;
|
||||
}
|
||||
}
|
||||
if (import->do_all) {
|
||||
tpool_wait(tp);
|
||||
tpool_destroy(tp);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we were searching for a specific pool, verify that we found a
|
||||
@@ -3773,7 +3814,6 @@ zpool_do_import(int argc, char **argv)
|
||||
boolean_t xtreme_rewind = B_FALSE;
|
||||
boolean_t do_scan = B_FALSE;
|
||||
boolean_t pool_exists = B_FALSE;
|
||||
boolean_t pool_specified = B_FALSE;
|
||||
uint64_t txg = -1ULL;
|
||||
char *cachefile = NULL;
|
||||
importargs_t idata = { 0 };
|
||||
@@ -3972,7 +4012,6 @@ zpool_do_import(int argc, char **argv)
|
||||
searchname = argv[0];
|
||||
searchguid = 0;
|
||||
}
|
||||
pool_specified = B_TRUE;
|
||||
|
||||
/*
|
||||
* User specified a name or guid. Ensure it's unique.
|
||||
@@ -4005,6 +4044,8 @@ zpool_do_import(int argc, char **argv)
|
||||
idata.cachefile = cachefile;
|
||||
idata.scan = do_scan;
|
||||
idata.policy = policy;
|
||||
idata.do_destroyed = do_destroyed;
|
||||
idata.do_all = do_all;
|
||||
|
||||
libpc_handle_t lpch = {
|
||||
.lpc_lib_handle = g_zfs,
|
||||
@@ -4047,9 +4088,7 @@ zpool_do_import(int argc, char **argv)
|
||||
}
|
||||
|
||||
err = import_pools(pools, props, mntopts, flags,
|
||||
argc >= 1 ? argv[0] : NULL,
|
||||
argc >= 2 ? argv[1] : NULL,
|
||||
do_destroyed, pool_specified, do_all, &idata);
|
||||
argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, &idata);
|
||||
|
||||
/*
|
||||
* If we're using the cachefile and we failed to import, then
|
||||
@@ -4070,9 +4109,8 @@ zpool_do_import(int argc, char **argv)
|
||||
pools = zpool_search_import(&lpch, &idata);
|
||||
|
||||
err = import_pools(pools, props, mntopts, flags,
|
||||
argc >= 1 ? argv[0] : NULL,
|
||||
argc >= 2 ? argv[1] : NULL,
|
||||
do_destroyed, pool_specified, do_all, &idata);
|
||||
argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL,
|
||||
&idata);
|
||||
}
|
||||
|
||||
error:
|
||||
|
||||
Reference in New Issue
Block a user