Add ability to scrub from last scrubbed txg

Some users might want to scrub only new data because they would like
to know if the new write wasn't corrupted.  This PR adds possibility
scrub only newly written data.

This introduces new `last_scrubbed_txg` property, indicating the
transaction group (TXG) up to which the most recent scrub operation
has checked and repaired the dataset, so users can run scrub only
from the last saved point. We use a scn_max_txg and scn_min_txg
which are already built into scrub, to accomplish that.

Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
Sponsored-By: Wasabi Technology, Inc.
Sponsored-By: Klara Inc.
Closes #16301
This commit is contained in:
Mariusz Zaborski
2024-12-04 20:21:45 +01:00
committed by Brian Behlendorf
parent 5988de77b0
commit 3b0c1131ef
19 changed files with 264 additions and 35 deletions
+3
View File
@@ -128,6 +128,9 @@ zpool_prop_init(void)
zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_SIZE, "dedup_table_size",
0, PROP_READONLY, ZFS_TYPE_POOL, "<size>", "DDTSIZE", B_FALSE,
sfeatures);
zprop_register_number(ZPOOL_PROP_LAST_SCRUBBED_TXG,
"last_scrubbed_txg", 0, PROP_READONLY, ZFS_TYPE_POOL, "<txg>",
"LAST_SCRUBBED_TXG", B_FALSE, sfeatures);
/* default number properties */
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
+48 -17
View File
@@ -231,6 +231,9 @@ static uint_t zfs_resilver_defer_percent = 10;
((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
#define DSL_SCAN_IS_SCRUB(scn) \
((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB)
/*
* Enable/disable the processing of the free_bpobj object.
*/
@@ -855,15 +858,15 @@ dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
void
dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
(void) arg;
setup_sync_arg_t *setup_sync_arg = (setup_sync_arg_t *)arg;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
pool_scan_func_t *funcp = arg;
dmu_object_type_t ot = 0;
dsl_pool_t *dp = scn->scn_dp;
spa_t *spa = dp->dp_spa;
ASSERT(!dsl_scan_is_running(scn));
ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
ASSERT3U(setup_sync_arg->func, >, POOL_SCAN_NONE);
ASSERT3U(setup_sync_arg->func, <, POOL_SCAN_FUNCS);
memset(&scn->scn_phys, 0, sizeof (scn->scn_phys));
/*
@@ -873,10 +876,14 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
memset(&scn->errorscrub_phys, 0, sizeof (scn->errorscrub_phys));
dsl_errorscrub_sync_state(scn, tx);
scn->scn_phys.scn_func = *funcp;
scn->scn_phys.scn_func = setup_sync_arg->func;
scn->scn_phys.scn_state = DSS_SCANNING;
scn->scn_phys.scn_min_txg = 0;
scn->scn_phys.scn_max_txg = tx->tx_txg;
scn->scn_phys.scn_min_txg = setup_sync_arg->txgstart;
if (setup_sync_arg->txgend == 0) {
scn->scn_phys.scn_max_txg = tx->tx_txg;
} else {
scn->scn_phys.scn_max_txg = setup_sync_arg->txgend;
}
scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
scn->scn_phys.scn_start_time = gethrestime_sec();
scn->scn_phys.scn_errors = 0;
@@ -963,7 +970,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
spa_history_log_internal(spa, "scan setup", tx,
"func=%u mintxg=%llu maxtxg=%llu",
*funcp, (u_longlong_t)scn->scn_phys.scn_min_txg,
setup_sync_arg->func, (u_longlong_t)scn->scn_phys.scn_min_txg,
(u_longlong_t)scn->scn_phys.scn_max_txg);
}
@@ -973,10 +980,16 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
* error scrub.
*/
int
dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan(dsl_pool_t *dp, pool_scan_func_t func, uint64_t txgstart,
uint64_t txgend)
{
spa_t *spa = dp->dp_spa;
dsl_scan_t *scn = dp->dp_scan;
setup_sync_arg_t setup_sync_arg;
if (func != POOL_SCAN_SCRUB && (txgstart != 0 || txgend != 0)) {
return (EINVAL);
}
/*
* Purge all vdev caches and probe all devices. We do this here
@@ -1027,8 +1040,13 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
return (SET_ERROR(err));
}
setup_sync_arg.func = func;
setup_sync_arg.txgstart = txgstart;
setup_sync_arg.txgend = txgend;
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
dsl_scan_setup_sync, &setup_sync_arg, 0,
ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
static void
@@ -1116,15 +1134,24 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_notify_waiters(spa);
if (dsl_scan_restarting(scn, tx))
if (dsl_scan_restarting(scn, tx)) {
spa_history_log_internal(spa, "scan aborted, restarting", tx,
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
else if (!complete)
} else if (!complete) {
spa_history_log_internal(spa, "scan cancelled", tx,
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
else
} else {
spa_history_log_internal(spa, "scan done", tx,
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB(scn)) {
VERIFY0(zap_update(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_LAST_SCRUBBED_TXG,
sizeof (uint64_t), 1,
&scn->scn_phys.scn_max_txg, tx));
spa->spa_scrubbed_last_txg = scn->scn_phys.scn_max_txg;
}
}
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
spa->spa_scrub_active = B_FALSE;
@@ -4330,14 +4357,18 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* current scan progress is below zfs_resilver_defer_percent.
*/
if (dsl_scan_restarting(scn, tx) || restart_early) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
setup_sync_arg_t setup_sync_arg = {
.func = POOL_SCAN_SCRUB,
.txgstart = 0,
.txgend = 0,
};
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
func = POOL_SCAN_RESILVER;
setup_sync_arg.func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d",
func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg,
restart_early);
dsl_scan_setup_sync(&func, tx);
setup_sync_arg.func, dp->dp_spa->spa_name,
(longlong_t)tx->tx_txg, restart_early);
dsl_scan_setup_sync(&setup_sync_arg, tx);
}
/*
+20 -2
View File
@@ -451,9 +451,10 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv)
spa_prop_add_list(nv, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL,
ddt_get_ddt_dsize(spa), src);
spa_prop_add_list(nv, ZPOOL_PROP_HEALTH, NULL,
rvd->vdev_state, src);
spa_prop_add_list(nv, ZPOOL_PROP_LAST_SCRUBBED_TXG, NULL,
spa_get_last_scrubbed_txg(spa), src);
version = spa_version(spa);
if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) {
@@ -4727,6 +4728,12 @@ spa_ld_get_props(spa_t *spa)
if (error != 0 && error != ENOENT)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
/* Load the last scrubbed txg. */
error = spa_dir_prop(spa, DMU_POOL_LAST_SCRUBBED_TXG,
&spa->spa_scrubbed_last_txg, B_FALSE);
if (error != 0 && error != ENOENT)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
/*
* Load the livelist deletion field. If a livelist is queued for
* deletion, indicate that in the spa
@@ -8869,6 +8876,13 @@ spa_scan_stop(spa_t *spa)
int
spa_scan(spa_t *spa, pool_scan_func_t func)
{
return (spa_scan_range(spa, func, 0, 0));
}
int
spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart,
uint64_t txgend)
{
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
@@ -8879,6 +8893,9 @@ spa_scan(spa_t *spa, pool_scan_func_t func)
!spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
return (SET_ERROR(ENOTSUP));
if (func != POOL_SCAN_SCRUB && (txgstart != 0 || txgend != 0))
return (SET_ERROR(ENOTSUP));
/*
* If a resilver was requested, but there is no DTL on a
* writeable leaf device, we have nothing to do.
@@ -8893,7 +8910,7 @@ spa_scan(spa_t *spa, pool_scan_func_t func)
!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG))
return (SET_ERROR(ENOTSUP));
return (dsl_scan(spa->spa_dsl_pool, func));
return (dsl_scan(spa->spa_dsl_pool, func, txgstart, txgend));
}
/*
@@ -10976,6 +10993,7 @@ EXPORT_SYMBOL(spa_l2cache_drop);
/* scanning */
EXPORT_SYMBOL(spa_scan);
EXPORT_SYMBOL(spa_scan_range);
EXPORT_SYMBOL(spa_scan_stop);
/* spa syncing */
+6
View File
@@ -2676,6 +2676,12 @@ spa_mode(spa_t *spa)
return (spa->spa_mode);
}
uint64_t
spa_get_last_scrubbed_txg(spa_t *spa)
{
return (spa->spa_scrubbed_last_txg);
}
uint64_t
spa_bootfs(spa_t *spa)
{
+9 -3
View File
@@ -3811,9 +3811,15 @@ raidz_reflow_complete_sync(void *arg, dmu_tx_t *tx)
* setup a scrub. All the data has been sucessfully copied
* but we have not validated any checksums.
*/
pool_scan_func_t func = POOL_SCAN_SCRUB;
if (zfs_scrub_after_expand && dsl_scan_setup_check(&func, tx) == 0)
dsl_scan_setup_sync(&func, tx);
setup_sync_arg_t setup_sync_arg = {
.func = POOL_SCAN_SCRUB,
.txgstart = 0,
.txgend = 0,
};
if (zfs_scrub_after_expand &&
dsl_scan_setup_check(&setup_sync_arg.func, tx) == 0) {
dsl_scan_setup_sync(&setup_sync_arg, tx);
}
}
/*
+7 -3
View File
@@ -345,10 +345,14 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
* While we're in syncing context take the opportunity to
* setup the scrub when there are no more active rebuilds.
*/
pool_scan_func_t func = POOL_SCAN_SCRUB;
if (dsl_scan_setup_check(&func, tx) == 0 &&
setup_sync_arg_t setup_sync_arg = {
.func = POOL_SCAN_SCRUB,
.txgstart = 0,
.txgend = 0,
};
if (dsl_scan_setup_check(&setup_sync_arg.func, tx) == 0 &&
zfs_rebuild_scrub_enabled) {
dsl_scan_setup_sync(&func, tx);
dsl_scan_setup_sync(&setup_sync_arg, tx);
}
cv_broadcast(&vd->vdev_rebuild_cv);
+3
View File
@@ -1718,6 +1718,9 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
} else if (scan_type == POOL_SCAN_NONE) {
error = spa_scan_stop(spa);
} else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) {
error = spa_scan_range(spa, scan_type,
spa_get_last_scrubbed_txg(spa), 0);
} else {
error = spa_scan(spa, scan_type);
}