Teach zpool scrub to scrub only blocks in error log

Added a flag '-e' in zpool scrub to scrub only blocks in error log. A
user can pause, resume and cancel the error scrub by passing additional
command line arguments -p -s just like a regular scrub. This involves
adding a new flag, creating new libzfs interfaces, a new ioctl, and the
actual iteration and read-issuing logic. Error scrubbing is executed in
multiple txg to make sure pool performance is not affected.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Co-authored-by: TulsiJain tulsi.jain@delphix.com
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #8995
Closes #12355
This commit is contained in:
George Amanakis
2021-12-17 21:35:28 +01:00
committed by Brian Behlendorf
parent e34e15ed6d
commit 482eeef804
29 changed files with 1602 additions and 71 deletions
+2 -1
View File
@@ -5717,7 +5717,8 @@
<enumerator name='POOL_SCAN_NONE' value='0'/>
<enumerator name='POOL_SCAN_SCRUB' value='1'/>
<enumerator name='POOL_SCAN_RESILVER' value='2'/>
<enumerator name='POOL_SCAN_FUNCS' value='3'/>
<enumerator name='POOL_SCAN_ERRORSCRUB' value='3'/>
<enumerator name='POOL_SCAN_FUNCS' value='4'/>
</enum-decl>
<typedef-decl name='pool_scan_func_t' type-id='1b092565' id='7313fbe2'/>
<enum-decl name='pool_scrub_cmd' id='a1474cbd'>
+85 -20
View File
@@ -2648,50 +2648,84 @@ out:
int
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
{
zfs_cmd_t zc = {"\0"};
char errbuf[ERRBUFLEN];
int err;
libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = func;
zc.zc_flags = cmd;
nvlist_t *args = fnvlist_alloc();
fnvlist_add_uint64(args, "scan_type", (uint64_t)func);
fnvlist_add_uint64(args, "scan_command", (uint64_t)cmd);
if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
err = lzc_scrub(ZFS_IOC_POOL_SCRUB, zhp->zpool_name, args, NULL);
fnvlist_free(args);
if (err == 0) {
return (0);
} else if (err == ZFS_ERR_IOC_CMD_UNAVAIL) {
zfs_cmd_t zc = {"\0"};
(void) strlcpy(zc.zc_name, zhp->zpool_name,
sizeof (zc.zc_name));
zc.zc_cookie = func;
zc.zc_flags = cmd;
err = errno;
if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
return (0);
}
/* ECANCELED on a scrub means we resumed a paused scrub */
if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
cmd == POOL_SCRUB_NORMAL)
/*
* An ECANCELED on a scrub means one of the following:
* 1. we resumed a paused scrub.
* 2. we resumed a paused error scrub.
* 3. Error scrub is not run because of no error log.
*/
if (err == ECANCELED && (func == POOL_SCAN_SCRUB ||
func == POOL_SCAN_ERRORSCRUB) && cmd == POOL_SCRUB_NORMAL)
return (0);
if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
/*
* The following cases have been handled here:
* 1. Paused a scrub/error scrub if there is none in progress.
*/
if (err == ENOENT && func != POOL_SCAN_NONE && cmd ==
POOL_SCRUB_PAUSE) {
return (0);
}
if (func == POOL_SCAN_SCRUB) {
ASSERT3U(func, >=, POOL_SCAN_NONE);
ASSERT3U(func, <, POOL_SCAN_FUNCS);
if (func == POOL_SCAN_SCRUB || func == POOL_SCAN_ERRORSCRUB) {
if (cmd == POOL_SCRUB_PAUSE) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot pause scrubbing %s"),
zc.zc_name);
zhp->zpool_name);
} else {
assert(cmd == POOL_SCRUB_NORMAL);
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot scrub %s"),
zc.zc_name);
zhp->zpool_name);
}
} else if (func == POOL_SCAN_RESILVER) {
assert(cmd == POOL_SCRUB_NORMAL);
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot restart resilver on %s"), zc.zc_name);
"cannot restart resilver on %s"), zhp->zpool_name);
} else if (func == POOL_SCAN_NONE) {
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot cancel scrubbing %s"), zc.zc_name);
"cannot cancel scrubbing %s"), zhp->zpool_name);
} else {
assert(!"unexpected result");
}
/*
* With EBUSY, five cases are possible:
*
* Current state Requested
* 1. Normal Scrub Running Normal Scrub or Error Scrub
* 2. Normal Scrub Paused Error Scrub
* 3. Normal Scrub Paused Pause Normal Scrub
* 4. Error Scrub Running Normal Scrub or Error Scrub
* 5. Error Scrub Paused Pause Error Scrub
* 6. Resilvering Anything else
*/
if (err == EBUSY) {
nvlist_t *nvroot;
pool_scan_stat_t *ps = NULL;
@@ -2703,12 +2737,43 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
ps->pss_state == DSS_SCANNING) {
if (cmd == POOL_SCRUB_PAUSE)
return (zfs_error(hdl, EZFS_SCRUB_PAUSED,
if (ps->pss_pass_scrub_pause == 0) {
/* handles case 1 */
assert(cmd == POOL_SCRUB_NORMAL);
return (zfs_error(hdl, EZFS_SCRUBBING,
errbuf));
else
return (zfs_error(hdl, EZFS_SCRUBBING, errbuf));
} else {
if (func == POOL_SCAN_ERRORSCRUB) {
/* handles case 2 */
ASSERT3U(cmd, ==, POOL_SCRUB_NORMAL);
return (zfs_error(hdl,
EZFS_SCRUB_PAUSED_TO_CANCEL,
errbuf));
} else {
/* handles case 3 */
ASSERT3U(func, ==, POOL_SCAN_SCRUB);
ASSERT3U(cmd, ==, POOL_SCRUB_PAUSE);
return (zfs_error(hdl,
EZFS_SCRUB_PAUSED, errbuf));
}
}
} else if (ps &&
ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB &&
ps->pss_error_scrub_state == DSS_ERRORSCRUBBING) {
if (ps->pss_pass_error_scrub_pause == 0) {
/* handles case 4 */
ASSERT3U(cmd, ==, POOL_SCRUB_NORMAL);
return (zfs_error(hdl, EZFS_ERRORSCRUBBING,
errbuf));
} else {
/* handles case 5 */
ASSERT3U(func, ==, POOL_SCAN_ERRORSCRUB);
ASSERT3U(cmd, ==, POOL_SCRUB_PAUSE);
return (zfs_error(hdl, EZFS_ERRORSCRUB_PAUSED,
errbuf));
}
} else {
/* handles case 6 */
return (zfs_error(hdl, EZFS_RESILVERING, errbuf));
}
} else if (err == ENOENT) {
+12 -2
View File
@@ -243,10 +243,20 @@ libzfs_error_description(libzfs_handle_t *hdl)
"into a new one"));
case EZFS_SCRUB_PAUSED:
return (dgettext(TEXT_DOMAIN, "scrub is paused; "
"use 'zpool scrub' to resume"));
"use 'zpool scrub' to resume scrub"));
case EZFS_SCRUB_PAUSED_TO_CANCEL:
return (dgettext(TEXT_DOMAIN, "scrub is paused; "
"use 'zpool scrub' to resume or 'zpool scrub -s' to "
"cancel scrub"));
case EZFS_SCRUBBING:
return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
"use 'zpool scrub -s' to cancel current scrub"));
"use 'zpool scrub -s' to cancel scrub"));
case EZFS_ERRORSCRUBBING:
return (dgettext(TEXT_DOMAIN, "currently error scrubbing; "
"use 'zpool scrub -s' to cancel error scrub"));
case EZFS_ERRORSCRUB_PAUSED:
return (dgettext(TEXT_DOMAIN, "error scrub is paused; "
"use 'zpool scrub -e' to resume error scrub"));
case EZFS_NO_SCRUB:
return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
case EZFS_DIFF:
+105
View File
@@ -187,6 +187,7 @@
<elf-symbol name='lzc_reopen' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_rollback' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_rollback_to' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_scrub' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_send' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_send_redacted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_send_resume' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -1261,6 +1262,110 @@
<enumerator name='POOL_TRIM_FUNCS' value='3'/>
</enum-decl>
<typedef-decl name='pool_trim_func_t' type-id='54ed608a' id='b1146b8d'/>
<enum-decl name='zfs_ioc' id='12033f13'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='ZFS_IOC_FIRST' value='23040'/>
<enumerator name='ZFS_IOC' value='23040'/>
<enumerator name='ZFS_IOC_POOL_CREATE' value='23040'/>
<enumerator name='ZFS_IOC_POOL_DESTROY' value='23041'/>
<enumerator name='ZFS_IOC_POOL_IMPORT' value='23042'/>
<enumerator name='ZFS_IOC_POOL_EXPORT' value='23043'/>
<enumerator name='ZFS_IOC_POOL_CONFIGS' value='23044'/>
<enumerator name='ZFS_IOC_POOL_STATS' value='23045'/>
<enumerator name='ZFS_IOC_POOL_TRYIMPORT' value='23046'/>
<enumerator name='ZFS_IOC_POOL_SCAN' value='23047'/>
<enumerator name='ZFS_IOC_POOL_FREEZE' value='23048'/>
<enumerator name='ZFS_IOC_POOL_UPGRADE' value='23049'/>
<enumerator name='ZFS_IOC_POOL_GET_HISTORY' value='23050'/>
<enumerator name='ZFS_IOC_VDEV_ADD' value='23051'/>
<enumerator name='ZFS_IOC_VDEV_REMOVE' value='23052'/>
<enumerator name='ZFS_IOC_VDEV_SET_STATE' value='23053'/>
<enumerator name='ZFS_IOC_VDEV_ATTACH' value='23054'/>
<enumerator name='ZFS_IOC_VDEV_DETACH' value='23055'/>
<enumerator name='ZFS_IOC_VDEV_SETPATH' value='23056'/>
<enumerator name='ZFS_IOC_VDEV_SETFRU' value='23057'/>
<enumerator name='ZFS_IOC_OBJSET_STATS' value='23058'/>
<enumerator name='ZFS_IOC_OBJSET_ZPLPROPS' value='23059'/>
<enumerator name='ZFS_IOC_DATASET_LIST_NEXT' value='23060'/>
<enumerator name='ZFS_IOC_SNAPSHOT_LIST_NEXT' value='23061'/>
<enumerator name='ZFS_IOC_SET_PROP' value='23062'/>
<enumerator name='ZFS_IOC_CREATE' value='23063'/>
<enumerator name='ZFS_IOC_DESTROY' value='23064'/>
<enumerator name='ZFS_IOC_ROLLBACK' value='23065'/>
<enumerator name='ZFS_IOC_RENAME' value='23066'/>
<enumerator name='ZFS_IOC_RECV' value='23067'/>
<enumerator name='ZFS_IOC_SEND' value='23068'/>
<enumerator name='ZFS_IOC_INJECT_FAULT' value='23069'/>
<enumerator name='ZFS_IOC_CLEAR_FAULT' value='23070'/>
<enumerator name='ZFS_IOC_INJECT_LIST_NEXT' value='23071'/>
<enumerator name='ZFS_IOC_ERROR_LOG' value='23072'/>
<enumerator name='ZFS_IOC_CLEAR' value='23073'/>
<enumerator name='ZFS_IOC_PROMOTE' value='23074'/>
<enumerator name='ZFS_IOC_SNAPSHOT' value='23075'/>
<enumerator name='ZFS_IOC_DSOBJ_TO_DSNAME' value='23076'/>
<enumerator name='ZFS_IOC_OBJ_TO_PATH' value='23077'/>
<enumerator name='ZFS_IOC_POOL_SET_PROPS' value='23078'/>
<enumerator name='ZFS_IOC_POOL_GET_PROPS' value='23079'/>
<enumerator name='ZFS_IOC_SET_FSACL' value='23080'/>
<enumerator name='ZFS_IOC_GET_FSACL' value='23081'/>
<enumerator name='ZFS_IOC_SHARE' value='23082'/>
<enumerator name='ZFS_IOC_INHERIT_PROP' value='23083'/>
<enumerator name='ZFS_IOC_SMB_ACL' value='23084'/>
<enumerator name='ZFS_IOC_USERSPACE_ONE' value='23085'/>
<enumerator name='ZFS_IOC_USERSPACE_MANY' value='23086'/>
<enumerator name='ZFS_IOC_USERSPACE_UPGRADE' value='23087'/>
<enumerator name='ZFS_IOC_HOLD' value='23088'/>
<enumerator name='ZFS_IOC_RELEASE' value='23089'/>
<enumerator name='ZFS_IOC_GET_HOLDS' value='23090'/>
<enumerator name='ZFS_IOC_OBJSET_RECVD_PROPS' value='23091'/>
<enumerator name='ZFS_IOC_VDEV_SPLIT' value='23092'/>
<enumerator name='ZFS_IOC_NEXT_OBJ' value='23093'/>
<enumerator name='ZFS_IOC_DIFF' value='23094'/>
<enumerator name='ZFS_IOC_TMP_SNAPSHOT' value='23095'/>
<enumerator name='ZFS_IOC_OBJ_TO_STATS' value='23096'/>
<enumerator name='ZFS_IOC_SPACE_WRITTEN' value='23097'/>
<enumerator name='ZFS_IOC_SPACE_SNAPS' value='23098'/>
<enumerator name='ZFS_IOC_DESTROY_SNAPS' value='23099'/>
<enumerator name='ZFS_IOC_POOL_REGUID' value='23100'/>
<enumerator name='ZFS_IOC_POOL_REOPEN' value='23101'/>
<enumerator name='ZFS_IOC_SEND_PROGRESS' value='23102'/>
<enumerator name='ZFS_IOC_LOG_HISTORY' value='23103'/>
<enumerator name='ZFS_IOC_SEND_NEW' value='23104'/>
<enumerator name='ZFS_IOC_SEND_SPACE' value='23105'/>
<enumerator name='ZFS_IOC_CLONE' value='23106'/>
<enumerator name='ZFS_IOC_BOOKMARK' value='23107'/>
<enumerator name='ZFS_IOC_GET_BOOKMARKS' value='23108'/>
<enumerator name='ZFS_IOC_DESTROY_BOOKMARKS' value='23109'/>
<enumerator name='ZFS_IOC_RECV_NEW' value='23110'/>
<enumerator name='ZFS_IOC_POOL_SYNC' value='23111'/>
<enumerator name='ZFS_IOC_CHANNEL_PROGRAM' value='23112'/>
<enumerator name='ZFS_IOC_LOAD_KEY' value='23113'/>
<enumerator name='ZFS_IOC_UNLOAD_KEY' value='23114'/>
<enumerator name='ZFS_IOC_CHANGE_KEY' value='23115'/>
<enumerator name='ZFS_IOC_REMAP' value='23116'/>
<enumerator name='ZFS_IOC_POOL_CHECKPOINT' value='23117'/>
<enumerator name='ZFS_IOC_POOL_DISCARD_CHECKPOINT' value='23118'/>
<enumerator name='ZFS_IOC_POOL_INITIALIZE' value='23119'/>
<enumerator name='ZFS_IOC_POOL_TRIM' value='23120'/>
<enumerator name='ZFS_IOC_REDACT' value='23121'/>
<enumerator name='ZFS_IOC_GET_BOOKMARK_PROPS' value='23122'/>
<enumerator name='ZFS_IOC_WAIT' value='23123'/>
<enumerator name='ZFS_IOC_WAIT_FS' value='23124'/>
<enumerator name='ZFS_IOC_VDEV_GET_PROPS' value='23125'/>
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
<enumerator name='ZFS_IOC_EVENTS_SEEK' value='23171'/>
<enumerator name='ZFS_IOC_NEXTBOOT' value='23172'/>
<enumerator name='ZFS_IOC_JAIL' value='23173'/>
<enumerator name='ZFS_IOC_UNJAIL' value='23174'/>
<enumerator name='ZFS_IOC_SET_BOOTENV' value='23175'/>
<enumerator name='ZFS_IOC_GET_BOOTENV' value='23176'/>
<enumerator name='ZFS_IOC_LAST' value='23177'/>
</enum-decl>
<typedef-decl name='zfs_ioc_t' type-id='12033f13' id='5b35941c'/>
<enum-decl name='zpool_wait_activity_t' naming-typedef-id='73446457' id='849338e3'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='ZPOOL_WAIT_CKPT_DISCARD' value='0'/>
+7
View File
@@ -247,6 +247,13 @@ out:
return (error);
}
int
lzc_scrub(zfs_ioc_t ioc, const char *name,
nvlist_t *source, nvlist_t **resultp)
{
return (lzc_ioctl(ioc, name, source, resultp));
}
int
lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
uint8_t *wkeydata, uint_t wkeylen)