diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 5e5bbc972..ae71cdc88 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -122,6 +122,7 @@ static int zfs_do_change_key(int argc, char **argv); static int zfs_do_project(int argc, char **argv); static int zfs_do_version(int argc, char **argv); static int zfs_do_redact(int argc, char **argv); +static int zfs_do_wait(int argc, char **argv); #ifdef __FreeBSD__ static int zfs_do_jail(int argc, char **argv); @@ -183,7 +184,8 @@ typedef enum { HELP_VERSION, HELP_REDACT, HELP_JAIL, - HELP_UNJAIL + HELP_UNJAIL, + HELP_WAIT, } zfs_help_t; typedef struct zfs_command { @@ -248,6 +250,7 @@ static zfs_command_t command_table[] = { { "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY }, { "change-key", zfs_do_change_key, HELP_CHANGE_KEY }, { "redact", zfs_do_redact, HELP_REDACT }, + { "wait", zfs_do_wait, HELP_WAIT }, #ifdef __FreeBSD__ { "jail", zfs_do_jail, HELP_JAIL }, @@ -410,6 +413,8 @@ get_usage(zfs_help_t idx) return (gettext("\tjail \n")); case HELP_UNJAIL: return (gettext("\tunjail \n")); + case HELP_WAIT: + return (gettext("\twait [-t ] \n")); } abort(); @@ -8317,6 +8322,90 @@ zfs_do_project(int argc, char **argv) return (ret); } +static int +zfs_do_wait(int argc, char **argv) +{ + boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES]; + int error, i; + char c; + + /* By default, wait for all types of activity. */ + for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) + enabled[i] = B_TRUE; + + while ((c = getopt(argc, argv, "t:")) != -1) { + switch (c) { + case 't': + { + static char *col_subopts[] = { "deleteq", NULL }; + char *value; + + /* Reset activities array */ + bzero(&enabled, sizeof (enabled)); + while (*optarg != '\0') { + int activity = getsubopt(&optarg, col_subopts, + &value); + + if (activity < 0) { + (void) fprintf(stderr, + gettext("invalid activity '%s'\n"), + value); + usage(B_FALSE); + } + + enabled[activity] = B_TRUE; + } + break; + } + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argv += optind; + argc -= optind; + if (argc < 1) { + (void) fprintf(stderr, gettext("missing 'filesystem' " + "argument\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) + return (1); + + for (;;) { + boolean_t missing = B_FALSE; + boolean_t any_waited = B_FALSE; + + for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) { + boolean_t waited; + + if (!enabled[i]) + continue; + + error = zfs_wait_status(zhp, i, &missing, &waited); + if (error != 0 || missing) + break; + + any_waited = (any_waited || waited); + } + + if (error != 0 || missing || !any_waited) + break; + } + + zfs_close(zhp); + + return (error); +} + /* * Display version message */ diff --git a/configure.ac b/configure.ac index eeb0a3843..370a1970f 100644 --- a/configure.ac +++ b/configure.ac @@ -264,6 +264,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile + tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile tests/zfs-tests/tests/functional/cli_root/zpool/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile diff --git a/include/libzfs.h b/include/libzfs.h index 236a73130..7633579d4 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -507,6 +507,9 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); +extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t, + boolean_t *, boolean_t *); + /* * zfs encryption management */ diff --git a/include/libzfs_core.h b/include/libzfs_core.h index c4b4f8e71..18ce6994a 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -133,6 +133,7 @@ int lzc_pool_checkpoint_discard(const char *); int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *); int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *); +int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); #ifdef __cplusplus } diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index bb6921027..88fd61035 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -121,6 +121,11 @@ struct dsl_dir { bplist_t dd_pending_frees; bplist_t dd_pending_allocs; + kmutex_t dd_activity_lock; + kcondvar_t dd_activity_cv; + boolean_t dd_activity_cancelled; + uint64_t dd_activity_waiters; + /* protected by dd_lock; keep at end of struct for better locality */ char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; }; @@ -192,6 +197,9 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd); void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj); void dsl_dir_livelist_close(dsl_dir_t *dd); void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total); +int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, + boolean_t *waited); +void dsl_dir_cancel_waiters(dsl_dir_t *dd); /* internal reserved dir name */ #define MOS_DIR_NAME "$MOS" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 3484b13e3..477356aa7 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1282,6 +1282,7 @@ typedef enum zfs_ioc { ZFS_IOC_REDACT, /* 0x5a51 */ ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */ ZFS_IOC_WAIT, /* 0x5a53 */ + ZFS_IOC_WAIT_FS, /* 0x5a54 */ /* * Per-platform (Optional) - 6/128 numbers reserved. @@ -1358,6 +1359,11 @@ typedef enum { ZPOOL_WAIT_NUM_ACTIVITIES } zpool_wait_activity_t; +typedef enum { + ZFS_WAIT_DELETEQ, + ZFS_WAIT_NUM_ACTIVITIES +} zfs_wait_activity_t; + /* * Bookmark name values. */ @@ -1415,6 +1421,12 @@ typedef enum { #define ZPOOL_WAIT_TAG "wait_tag" #define ZPOOL_WAIT_WAITED "wait_waited" +/* + * The following are names used when invoking ZFS_IOC_WAIT_FS. + */ +#define ZFS_WAIT_ACTIVITY "wait_activity" +#define ZFS_WAIT_WAITED "wait_waited" + /* * Flags for ZFS_IOC_VDEV_SET_STATE */ diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 48d656323..45e7a79fb 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -5599,3 +5599,31 @@ zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize, volsize += numdb; return (volsize); } + +/* + * Wait for the given activity and return the status of the wait (whether or not + * any waiting was done) in the 'waited' parameter. Non-existent fses are + * reported via the 'missing' parameter, rather than by printing an error + * message. This is convenient when this function is called in a loop over a + * long period of time (as it is, for example, by zfs's wait cmd). In that + * scenario, a fs being exported or destroyed should be considered a normal + * event, so we don't want to print an error when we find that the fs doesn't + * exist. + */ +int +zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity, + boolean_t *missing, boolean_t *waited) +{ + int error = lzc_wait_fs(zhp->zfs_name, activity, waited); + *missing = (error == ENOENT); + if (*missing) + return (0); + + if (error != 0) { + (void) zfs_standard_error_fmt(zhp->zfs_hdl, error, + dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"), + zhp->zfs_name); + } + + return (error); +} diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index f65db4ff4..18143d364 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -1621,3 +1621,23 @@ lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag, { return (wait_common(pool, activity, B_TRUE, tag, waited)); } + +int +lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited) +{ + nvlist_t *args = fnvlist_alloc(); + nvlist_t *result = NULL; + + fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity); + + int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result); + + if (error == 0 && waited != NULL) + *waited = fnvlist_lookup_boolean_value(result, + ZFS_WAIT_WAITED); + + fnvlist_free(args); + fnvlist_free(result); + + return (error); +} diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am index f81a1f672..8239c2157 100644 --- a/man/man8/Makefile.am +++ b/man/man8/Makefile.am @@ -41,6 +41,7 @@ dist_man_MANS = \ zfs-unmount.8 \ zfs-upgrade.8 \ zfs-userspace.8 \ + zfs-wait.8 \ zgenhostid.8 \ zinject.8 \ zpool.8 \ diff --git a/man/man8/zfs-wait.8 b/man/man8/zfs-wait.8 new file mode 100644 index 000000000..dcc679bb0 --- /dev/null +++ b/man/man8/zfs-wait.8 @@ -0,0 +1,71 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. +.\" Copyright (c) 2017 Datto Inc. +.\" Copyright (c) 2018 George Melikov. All Rights Reserved. +.\" Copyright 2017 Nexenta Systems, Inc. +.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. +.\" +.Dd August 9, 2019 +.Dt ZFS-WAIT 8 +.Os Linux +.Sh NAME +.Nm zfs Ns Pf - Cm wait +.Nd Wait for background activity to stop in a ZFS filesystem +.Sh SYNOPSIS +.Nm +.Cm wait +.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ... +.Ar fs +.Sh DESCRIPTION +.Bl -tag -width Ds +.It Xo +.Nm +.Cm wait +.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ... +.Ar fs +.Xc +Waits until all background activity of the given types has ceased in the given +filesystem. +The activity could cease because it has completed or because the filesystem has +been destroyed or unmounted. +If no activities are specified, the command waits until background activity of +every type listed below has ceased. +If there is no activity of the given types in progress, the command returns +immediately. +.Pp +These are the possible values for +.Ar activity , +along with what each one waits for: +.Bd -literal + deleteq The filesystem's internal delete queue to empty +.Ed +.Pp +Note that the internal delete queue does not finish draining until +all large files have had time to be fully destroyed and all open file +handles to unlinked files are closed. +.El +.El +.Sh SEE ALSO +.Xr lsof 8 diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index eb6e0e33e..587f16c4e 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -281,6 +281,11 @@ Attaches a filesystem to a jail. .It Xr zfs-unjail 8 Detaches a filesystem from a jail. .El +.Ss Waiting +.Bl -tag -width "" +.It Xr zfs-wait 8 +Wait for background activity in a filesystem to complete. +.El .Sh EXIT STATUS The .Nm diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 7ebf38ddb..591e35fd1 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -52,6 +52,8 @@ #include #include #include +#include +#include /* * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups @@ -739,6 +741,8 @@ zfs_rmnode(znode_t *zp) zfs_unlinked_add(xzp, tx); } + mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock); + /* * Remove this znode from the unlinked set. If a has rollback has * occurred while a file is open and unlinked. Then when the file @@ -749,6 +753,13 @@ zfs_rmnode(znode_t *zp) zp->z_id, tx); VERIFY(error == 0 || error == ENOENT); + uint64_t count; + if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) { + cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv); + } + + mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock); + dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1); zfs_znode_delete(zp, tx); diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 478e07862..b6757d1bc 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -872,6 +873,8 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) "num_entries in unlinked set: %llu", zs.zs_num_entries); zfs_unlinked_drain(zfsvfs); + dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; + dd->dd_activity_cancelled = B_FALSE; } /* @@ -1423,6 +1426,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); } dmu_objset_evict_dbufs(zfsvfs->z_os); + dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; + dsl_dir_cancel_waiters(dd); return (0); } @@ -1813,6 +1818,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) if (err != 0) goto bail; + ds->ds_dir->dd_activity_cancelled = B_FALSE; VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); zfs_set_fuid_feature(zfsvfs); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 3e5a67bdb..2d6e95e31 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname, static int dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) { - boolean_t held; + boolean_t held = B_FALSE; if (!dmu_tx_is_syncing(tx)) return (0); - if (owner != NULL) { - VERIFY3P(ds->ds_owner, ==, owner); - dsl_dataset_long_rele(ds, owner); - } - - held = dsl_dataset_long_held(ds); - - if (owner != NULL) - dsl_dataset_long_hold(ds, owner); + dsl_dir_t *dd = ds->ds_dir; + mutex_enter(&dd->dd_activity_lock); + uint64_t holds = zfs_refcount_count(&ds->ds_longholds) - + (owner != NULL ? 1 : 0); + /* + * The value of dd_activity_waiters can chance as soon as we drop the + * lock, but we're fine with that; new waiters coming in or old + * waiters leaving doesn't cause problems, since we're going to cancel + * waiters later anyway. The goal of this check is to verify that no + * non-waiters have long-holds, and all new long-holds will be + * prevented because we're holding the pool config as writer. + */ + if (holds != dd->dd_activity_waiters) + held = B_TRUE; + mutex_exit(&dd->dd_activity_lock); if (held) return (SET_ERROR(EBUSY)); @@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, DMU_MAX_ACCESS * spa_asize_inflation); ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); + dsl_dir_cancel_waiters(origin_head->ds_dir); + /* * Swap per-dataset feature flags. */ diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 01b5f080d..883928f0e 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) if (zfs_refcount_count(&ds->ds_longholds) != expected_holds) return (SET_ERROR(EBUSY)); + ASSERT0(ds->ds_dir->dd_activity_waiters); + mos = ds->ds_dir->dd_pool->dp_meta_objset; /* @@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) /* We need to log before removing it from the namespace. */ spa_history_log_internal_ds(ds, "destroy", tx, " "); + dsl_dir_cancel_waiters(ds->ds_dir); + rmorigin = (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 172ebc72c..63ecb1d39 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -51,6 +51,9 @@ #include #include "zfs_namecheck.h" #include "zfs_prop.h" +#ifdef _KERNEL +#include +#endif /* * Filesystem and Snapshot Limits @@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); + cv_destroy(&dd->dd_activity_cv); + mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); } @@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, } mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL); dsl_prop_init(dd); dsl_dir_snap_cmtime_update(dd); @@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); + cv_destroy(&dd->dd_activity_cv); + mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dd = winner; @@ -310,6 +319,8 @@ errout: if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); + cv_destroy(&dd->dd_activity_cv); + mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); @@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total) } } +static int +dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds, + zfs_wait_activity_t activity, boolean_t *in_progress) +{ + int error = 0; + + ASSERT(MUTEX_HELD(&dd->dd_activity_lock)); + + switch (activity) { + case ZFS_WAIT_DELETEQ: { +#ifdef _KERNEL + objset_t *os; + error = dmu_objset_from_ds(ds, &os); + if (error != 0) + break; + + mutex_enter(&os->os_user_ptr_lock); + void *user = dmu_objset_get_user(os); + mutex_exit(&os->os_user_ptr_lock); + if (dmu_objset_type(os) != DMU_OST_ZFS || + user == NULL || zfs_get_vfs_flag_unmounted(os)) { + *in_progress = B_FALSE; + return (0); + } + + uint64_t readonly = B_FALSE; + error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly, + NULL); + + if (error != 0) + break; + + if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) { + *in_progress = B_FALSE; + return (0); + } + + uint64_t count, unlinked_obj; + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, + &unlinked_obj); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + break; + } + error = zap_count(os, unlinked_obj, &count); + + if (error == 0) + *in_progress = (count != 0); + break; +#else + /* + * The delete queue is ZPL specific, and libzpool doesn't have + * it. It doesn't make sense to wait for it. + */ + *in_progress = B_FALSE; + break; +#endif + } + default: + panic("unrecognized value for activity %d", activity); + } + + return (error); +} + +int +dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, + boolean_t *waited) +{ + int error = 0; + boolean_t in_progress; + dsl_pool_t *dp = dd->dd_pool; + for (;;) { + dsl_pool_config_enter(dp, FTAG); + error = dsl_dir_activity_in_progress(dd, ds, activity, + &in_progress); + dsl_pool_config_exit(dp, FTAG); + if (error != 0 || !in_progress) + break; + + *waited = B_TRUE; + + if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) == + 0 || dd->dd_activity_cancelled) { + error = SET_ERROR(EINTR); + break; + } + } + return (error); +} + +void +dsl_dir_cancel_waiters(dsl_dir_t *dd) +{ + mutex_enter(&dd->dd_activity_lock); + dd->dd_activity_cancelled = B_TRUE; + cv_broadcast(&dd->dd_activity_cv); + while (dd->dd_activity_waiters > 0) + cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock); + mutex_exit(&dd->dd_activity_lock); +} + #if defined(_KERNEL) EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index d57aef509..fb9435341 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -4072,6 +4072,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl) return (error); } +/* + * This ioctl waits for activity of a particular type to complete. If there is + * no activity of that type in progress, it returns immediately, and the + * returned value "waited" is false. If there is activity in progress, and no + * tag is passed in, the ioctl blocks until all activity of that type is + * complete, and then returns with "waited" set to true. + * + * If a thread waiting in the ioctl receives a signal, the call will return + * immediately, and the return value will be EINTR. + * + * innvl: { + * "wait_activity" -> int32_t + * } + * + * outnvl: "waited" -> boolean_t + */ +static const zfs_ioc_key_t zfs_keys_fs_wait[] = { + {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0}, +}; + +static int +zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + int32_t activity; + boolean_t waited = B_FALSE; + int error; + dsl_pool_t *dp; + dsl_dir_t *dd; + dsl_dataset_t *ds; + + if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0) + return (SET_ERROR(EINVAL)); + + if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0) + return (SET_ERROR(EINVAL)); + + if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0) + return (error); + + if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + + dd = ds->ds_dir; + mutex_enter(&dd->dd_activity_lock); + dd->dd_activity_waiters++; + + /* + * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t + * aren't evicted while we're waiting. Normally this is prevented by + * holding the pool, but we can't do that while we're waiting since + * that would prevent TXGs from syncing out. Some of the functionality + * of long-holds (e.g. preventing deletion) is unnecessary for this + * case, since we would cancel the waiters before proceeding with a + * deletion. An alternative mechanism for keeping the dataset around + * could be developed but this is simpler. + */ + dsl_dataset_long_hold(ds, FTAG); + dsl_pool_rele(dp, FTAG); + + error = dsl_dir_wait(dd, ds, activity, &waited); + + dsl_dataset_long_rele(ds, FTAG); + dd->dd_activity_waiters--; + if (dd->dd_activity_waiters == 0) + cv_signal(&dd->dd_activity_cv); + mutex_exit(&dd->dd_activity_lock); + + dsl_dataset_rele(ds, FTAG); + + if (error == 0) + fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited); + + return (error); +} + /* * fsname is name of dataset to rollback (to most recent snapshot) * @@ -6915,6 +6992,11 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait)); + zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS, + zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, + zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 84ea70f07..af720ad9b 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -288,6 +288,10 @@ tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos', 'zfs_upgrade_007_neg'] tags = ['functional', 'cli_root', 'zfs_upgrade'] +[tests/functional/cli_root/zfs_wait] +tests = ['zfs_wait_deleteq'] +tags = ['functional', 'cli_root', 'zfs_wait'] + [tests/functional/cli_root/zpool] tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors'] tags = ['functional', 'cli_root', 'zpool'] diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c index 47e8ff5e2..3f6147509 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c @@ -739,6 +739,18 @@ test_wait(const char *pool) nvlist_free(optional); } +static void +test_wait_fs(const char *dataset) +{ + nvlist_t *required = fnvlist_alloc(); + + fnvlist_add_int32(required, "wait_activity", 2); + + IOC_INPUT_TEST(ZFS_IOC_WAIT_FS, dataset, required, NULL, EINVAL); + + nvlist_free(required); +} + static void zfs_ioc_input_tests(const char *pool) { @@ -826,6 +838,7 @@ zfs_ioc_input_tests(const char *pool) test_vdev_trim(pool); test_wait(pool); + test_wait_fs(dataset); /* * cleanup @@ -980,6 +993,7 @@ validate_ioc_values(void) CHECK(ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT); CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS); CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT); + CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS); CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT); CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR); CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK); diff --git a/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/Makefile.am index 01af9d6b9..8d99df09f 100644 --- a/tests/zfs-tests/tests/functional/cli_root/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/Makefile.am @@ -32,6 +32,7 @@ SUBDIRS = \ zfs_unmount \ zfs_unshare \ zfs_upgrade \ + zfs_wait \ zpool \ zpool_add \ zpool_attach \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am new file mode 100644 index 000000000..d401fe68b --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_wait +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zfs_wait_deleteq.ksh + +dist_pkgdata_DATA = \ + zfs_wait.kshlib diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh new file mode 100755 index 000000000..456d2d0c2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh @@ -0,0 +1,20 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh new file mode 100755 index 000000000..cca05fee7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh @@ -0,0 +1,21 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib new file mode 100644 index 000000000..9f62a7c92 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib @@ -0,0 +1,80 @@ +#!/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018, 2019 by Delphix. All rights reserved. +# + +typeset -a disk_array=($(find_disks $DISKS)) + +typeset -r DISK1=${disk_array[0]} +typeset -r DISK2=${disk_array[1]} +typeset -r DISK3=${disk_array[2]} + +# +# When the condition it is waiting for becomes true, 'zfs wait' should return +# promptly. We want to enforce this, but any check will be racey because it will +# take some small but indeterminate amount of time for the waiting thread to be +# woken up and for the process to exit. +# +# To deal with this, we provide a grace period after the condition becomes true +# during which 'zfs wait' can exit. If it hasn't exited by the time the grace +# period expires we assume something is wrong and fail the test. While there is +# no value that can really be correct, the idea is we choose something large +# enough that it shouldn't cause issues in practice. +# +typeset -r WAIT_EXIT_GRACE=2.0 + +function proc_exists # pid +{ + ps -p $1 >/dev/null +} + +function proc_must_exist # pid +{ + proc_exists $1 || log_fail "zpool process exited too soon" +} + +function proc_must_not_exist # pid +{ + proc_exists $1 && log_fail "zpool process took too long to exit" +} + +function get_time +{ + date +'%H:%M:%S' +} + +function kill_if_running +{ + typeset pid=$1 + [[ $pid ]] && proc_exists $pid && log_must kill -s TERM $pid +} + +# Log a command and then start it running in the background +function log_bkgrnd +{ + log_note "$(get_time) Starting cmd in background '$@'" + "$@" & +} + +# Check that a background process has completed and exited with a status of 0 +function bkgrnd_proc_succeeded +{ + typeset pid=$1 + + log_must sleep $WAIT_EXIT_GRACE + + proc_must_not_exist $pid + wait $pid || log_fail "process exited with status $?" + log_note "$(get_time) wait completed successfully" +} diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh new file mode 100755 index 000000000..00c5a109c --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh @@ -0,0 +1,57 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib + +# +# DESCRIPTION: +# 'zfs wait' works when waiting for checkpoint discard to complete. +# +# STRATEGY: +# 1. Create a file +# 2. Open a file descriptor pointing to that file. +# 3. Delete the file. +# 4. Start a background process waiting for the delete queue to empty. +# 5. Verify that the command doesn't return immediately. +# 6. Close the open file descriptor. +# 7. Verify that the command returns soon after the descriptor is closed. +# + +function cleanup +{ + kill_if_running $pid + exec 3<&- +} + + +typeset -r TESTFILE="/$TESTPOOL/testfile" +typeset pid + +log_onexit cleanup + +log_must touch $TESTFILE +exec 3<> $TESTFILE +log_must rm $TESTFILE +log_bkgrnd zfs wait -t deleteq $TESTPOOL +pid=$! +proc_must_exist $pid + +exec 3<&- +log_must sleep 0.5 +bkgrnd_proc_succeeded $pid + +log_pass "'zfs wait -t discard' works."