Invoke zdb by guid to avoid import errors

The problem that was occurring is basically that a device was removed 
by ztest and replaced with another device. It was then reguided. The 
import then failed because there were two possible imports with the 
same name; one with the new guid, and one with the old. This can 
happen because the label writes from the device removal/replacement 
can be subject to ztest's error injection. 

The other ways to fix this would be to change the error injection to 
not trigger on removals (which may not be technically feasible), or 
to change the import code to not report configurations that are so 
short on devices (which would potentially have unpleasant end-user 
effects when trying to recover from data losses/device configuration 
issues).

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15298
This commit is contained in:
Paul Dagnelie 2023-09-22 16:08:51 -07:00 committed by Brian Behlendorf
parent 0aabd6b482
commit 0ce1b2ca19

View File

@ -6378,6 +6378,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
spa_t *spa = ztest_spa;
uint64_t orig, load;
int error;
ztest_shared_t *zs = ztest_shared;
if (ztest_opts.zo_mmp_test)
return;
@ -6387,6 +6388,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
(void) pthread_rwlock_wrlock(&ztest_name_lock);
error = spa_change_guid(spa);
zs->zs_guid = spa_guid(spa);
(void) pthread_rwlock_unlock(&ztest_name_lock);
if (error != 0)
@ -6916,7 +6918,7 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
* Verify pool integrity by running zdb.
*/
static void
ztest_run_zdb(const char *pool)
ztest_run_zdb(uint64_t guid)
{
int status;
char *bin;
@ -6940,13 +6942,13 @@ ztest_run_zdb(const char *pool)
free(set_gvars_args);
size_t would = snprintf(zdb, len,
"%s -bcc%s%s -G -d -Y -e -y %s -p %s %s",
"%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64,
bin,
ztest_opts.zo_verbose >= 3 ? "s" : "",
ztest_opts.zo_verbose >= 4 ? "v" : "",
set_gvars_args_joined,
ztest_opts.zo_dir,
pool);
guid);
ASSERT3U(would, <, len);
umem_free(set_gvars_args_joined, strlen(set_gvars_args_joined) + 1);
@ -7524,14 +7526,15 @@ ztest_import(ztest_shared_t *zs)
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
zs->zs_metaslab_sz =
1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
zs->zs_guid = spa_guid(spa);
spa_close(spa, FTAG);
kernel_fini();
if (!ztest_opts.zo_mmp_test) {
ztest_run_zdb(ztest_opts.zo_pool);
ztest_run_zdb(zs->zs_guid);
ztest_freeze();
ztest_run_zdb(ztest_opts.zo_pool);
ztest_run_zdb(zs->zs_guid);
}
(void) pthread_rwlock_destroy(&ztest_name_lock);
@ -7602,7 +7605,6 @@ ztest_run(ztest_shared_t *zs)
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
zs->zs_guid = dds.dds_guid;
dmu_objset_disown(os, B_TRUE, FTAG);
/*
@ -7873,14 +7875,15 @@ ztest_init(ztest_shared_t *zs)
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
zs->zs_metaslab_sz =
1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
zs->zs_guid = spa_guid(spa);
spa_close(spa, FTAG);
kernel_fini();
if (!ztest_opts.zo_mmp_test) {
ztest_run_zdb(ztest_opts.zo_pool);
ztest_run_zdb(zs->zs_guid);
ztest_freeze();
ztest_run_zdb(ztest_opts.zo_pool);
ztest_run_zdb(zs->zs_guid);
}
(void) pthread_rwlock_destroy(&ztest_name_lock);
@ -8303,7 +8306,7 @@ main(int argc, char **argv)
}
if (!ztest_opts.zo_mmp_test)
ztest_run_zdb(ztest_opts.zo_pool);
ztest_run_zdb(zs->zs_guid);
}
if (ztest_opts.zo_verbose >= 1) {