From 36f36610c3f9a847d30f1366f0e75b956170a55e Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Thu, 3 Sep 2020 20:04:09 -0700 Subject: [PATCH] Replace cv_{timed}wait_sig with cv_{timed}wait_idle where appropriate There are a number of places where cv_?_sig is used simply for accounting purposes but the surrounding code has no ability to cope with actually receiving a signal. On FreeBSD it is possible to send signals to individual kernel threads so this could enable undesirable behavior. This patch adds routines on Linux that will do the same idle accounting as _sig without making the task interruptible. On FreeBSD cv_*_idle are all aliases for cv_* Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Matt Macy Closes #10843 --- include/os/freebsd/spl/sys/condvar.h | 10 ++++- include/os/freebsd/zfs/sys/zfs_context_os.h | 4 -- include/os/linux/spl/sys/condvar.h | 7 ++++ include/sys/zfs_context.h | 4 ++ module/os/linux/spl/spl-condvar.c | 44 +++++++++++++++++++++ module/zfs/arc.c | 2 +- module/zfs/dbuf.c | 2 +- module/zfs/mmp.c | 2 +- module/zfs/txg.c | 12 ++---- module/zfs/vdev_trim.c | 2 +- module/zfs/zthr.c | 9 +---- 11 files changed, 73 insertions(+), 25 deletions(-) diff --git a/include/os/freebsd/spl/sys/condvar.h b/include/os/freebsd/spl/sys/condvar.h index a42995793..a634ab6b6 100644 --- a/include/os/freebsd/spl/sys/condvar.h +++ b/include/os/freebsd/spl/sys/condvar.h @@ -142,8 +142,14 @@ cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t timo) return (1); } -#define cv_timedwait_io cv_timedwait -#define cv_timedwait_sig_io cv_timedwait_sig +#define cv_timedwait_io cv_timedwait +#define cv_timedwait_idle cv_timedwait +#define cv_timedwait_sig_io cv_timedwait_sig +#define cv_wait_io cv_wait +#define cv_wait_io_sig cv_wait_sig +#define cv_wait_idle cv_wait +#define cv_timedwait_io_hires cv_timedwait_hires +#define cv_timedwait_idle_hires cv_timedwait_hires static inline int cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, diff --git a/include/os/freebsd/zfs/sys/zfs_context_os.h b/include/os/freebsd/zfs/sys/zfs_context_os.h index 0a2f0bfaa..b46c4aa7c 100644 --- a/include/os/freebsd/zfs/sys/zfs_context_os.h +++ b/include/os/freebsd/zfs/sys/zfs_context_os.h @@ -41,9 +41,6 @@ #include #include -#define cv_wait_io(cv, mp) cv_wait(cv, mp) -#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp) - #define cond_resched() kern_yield(PRI_USER) #define taskq_create_sysdc(a, b, d, e, p, dc, f) \ @@ -84,7 +81,6 @@ typedef int fstrans_cookie_t; #define signal_pending(x) SIGPENDING(x) #define current curthread #define thread_join(x) -#define cv_wait_io(cv, mp) cv_wait(cv, mp) typedef struct opensolaris_utsname utsname_t; extern utsname_t *utsname(void); extern int spa_import_rootpool(const char *name, bool checkpointrewind); diff --git a/include/os/linux/spl/sys/condvar.h b/include/os/linux/spl/sys/condvar.h index 22408824f..fa321403b 100644 --- a/include/os/linux/spl/sys/condvar.h +++ b/include/os/linux/spl/sys/condvar.h @@ -80,15 +80,19 @@ extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *); extern void __cv_destroy(kcondvar_t *); extern void __cv_wait(kcondvar_t *, kmutex_t *); extern void __cv_wait_io(kcondvar_t *, kmutex_t *); +extern void __cv_wait_idle(kcondvar_t *, kmutex_t *); extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *); extern int __cv_wait_sig(kcondvar_t *, kmutex_t *); extern int __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); extern int __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); extern int __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t); +extern int __cv_timedwait_idle(kcondvar_t *, kmutex_t *, clock_t); extern int cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t, hrtime_t res, int flag); extern int cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t, hrtime_t res, int flag); +extern int cv_timedwait_idle_hires(kcondvar_t *, kmutex_t *, hrtime_t, + hrtime_t res, int flag); extern void __cv_signal(kcondvar_t *); extern void __cv_broadcast(kcondvar_t *c); @@ -96,6 +100,7 @@ extern void __cv_broadcast(kcondvar_t *c); #define cv_destroy(cvp) __cv_destroy(cvp) #define cv_wait(cvp, mp) __cv_wait(cvp, mp) #define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp) +#define cv_wait_idle(cvp, mp) __cv_wait_idle(cvp, mp) #define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp) #define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp) #define cv_signal(cvp) __cv_signal(cvp) @@ -109,5 +114,7 @@ extern void __cv_broadcast(kcondvar_t *c); #define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t) #define cv_timedwait_io(cvp, mp, t) __cv_timedwait_io(cvp, mp, t) #define cv_timedwait_sig(cvp, mp, t) __cv_timedwait_sig(cvp, mp, t) +#define cv_timedwait_idle(cvp, mp, t) __cv_timedwait_idle(cvp, mp, t) + #endif /* _SPL_CONDVAR_H */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 16df302c8..8e16399e8 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -325,11 +325,15 @@ extern void cv_signal(kcondvar_t *cv); extern void cv_broadcast(kcondvar_t *cv); #define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at) +#define cv_timedwait_idle(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_wait_io(cv, mp) cv_wait(cv, mp) +#define cv_wait_idle(cv, mp) cv_wait(cv, mp) #define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp) #define cv_timedwait_sig_hires(cv, mp, t, r, f) \ cv_timedwait_hires(cv, mp, t, r, f) +#define cv_timedwait_idle_hires(cv, mp, t, r, f) \ + cv_timedwait_hires(cv, mp, t, r, f) /* * Thread-specific data diff --git a/module/os/linux/spl/spl-condvar.c b/module/os/linux/spl/spl-condvar.c index 9d045e3e8..49f486645 100644 --- a/module/os/linux/spl/spl-condvar.c +++ b/module/os/linux/spl/spl-condvar.c @@ -198,6 +198,18 @@ __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_sig); +void +__cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) +{ + sigset_t blocked, saved; + + sigfillset(&blocked); + (void) sigprocmask(SIG_BLOCK, &blocked, &saved); + cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); + (void) sigprocmask(SIG_SETMASK, &saved, NULL); +} +EXPORT_SYMBOL(__cv_wait_idle); + #if defined(HAVE_IO_SCHEDULE_TIMEOUT) #define spl_io_schedule_timeout(t) io_schedule_timeout(t) #else @@ -330,6 +342,21 @@ __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) } EXPORT_SYMBOL(__cv_timedwait_sig); +int +__cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) +{ + sigset_t blocked, saved; + int rc; + + sigfillset(&blocked); + (void) sigprocmask(SIG_BLOCK, &blocked, &saved); + rc = __cv_timedwait_common(cvp, mp, exp_time, + TASK_INTERRUPTIBLE, 0); + (void) sigprocmask(SIG_SETMASK, &saved, NULL); + + return (rc); +} +EXPORT_SYMBOL(__cv_timedwait_idle); /* * 'expire_time' argument is an absolute clock time in nanoseconds. * Return value is time left (expire_time - now) or -1 if timeout occurred. @@ -427,6 +454,23 @@ cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, } EXPORT_SYMBOL(cv_timedwait_sig_hires); +int +cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, + hrtime_t res, int flag) +{ + sigset_t blocked, saved; + int rc; + + sigfillset(&blocked); + (void) sigprocmask(SIG_BLOCK, &blocked, &saved); + rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, + TASK_INTERRUPTIBLE); + (void) sigprocmask(SIG_SETMASK, &saved, NULL); + + return (rc); +} +EXPORT_SYMBOL(cv_timedwait_idle_hires); + void __cv_signal(kcondvar_t *cvp) { diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 904c325f3..0c33a4535 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -9174,7 +9174,7 @@ l2arc_feed_thread(void *unused) cookie = spl_fstrans_mark(); while (l2arc_thread_exit == 0) { CALLB_CPR_SAFE_BEGIN(&cpr); - (void) cv_timedwait_sig(&l2arc_feed_thr_cv, + (void) cv_timedwait_idle(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, next); CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); next = ddi_get_lbolt() + hz; diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 2de1f4e4c..7d817320a 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -718,7 +718,7 @@ dbuf_evict_thread(void *unused) while (!dbuf_evict_thread_exit) { while (!dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) { CALLB_CPR_SAFE_BEGIN(&cpr); - (void) cv_timedwait_sig_hires(&dbuf_evict_cv, + (void) cv_timedwait_idle_hires(&dbuf_evict_cv, &dbuf_evict_lock, SEC2NSEC(1), MSEC2NSEC(1), 0); CALLB_CPR_SAFE_END(&cpr, &dbuf_evict_lock); } diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index 4170d7e03..1b97de468 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -671,7 +671,7 @@ mmp_thread(void *arg) } CALLB_CPR_SAFE_BEGIN(&cpr); - (void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv, + (void) cv_timedwait_idle_hires(&mmp->mmp_thread_cv, &mmp->mmp_thread_lock, next_time, USEC2NSEC(100), CALLOUT_FLAG_ABSOLUTE); CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock); diff --git a/module/zfs/txg.c b/module/zfs/txg.c index a5f2b0417..65375b579 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -242,16 +242,11 @@ txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time) { CALLB_CPR_SAFE_BEGIN(cpr); - /* - * cv_wait_sig() is used instead of cv_wait() in order to prevent - * this process from incorrectly contributing to the system load - * average when idle. - */ if (time) { - (void) cv_timedwait_sig(cv, &tx->tx_sync_lock, + (void) cv_timedwait_idle(cv, &tx->tx_sync_lock, ddi_get_lbolt() + time); } else { - cv_wait_sig(cv, &tx->tx_sync_lock); + cv_wait_idle(cv, &tx->tx_sync_lock); } CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock); @@ -760,7 +755,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce) if (should_quiesce == B_TRUE) { cv_wait_io(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock); } else { - cv_wait_sig(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock); + cv_wait_idle(&tx->tx_quiesce_done_cv, + &tx->tx_sync_lock); } } mutex_exit(&tx->tx_sync_lock); diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c index 3f8c34806..7383e1493 100644 --- a/module/zfs/vdev_trim.c +++ b/module/zfs/vdev_trim.c @@ -481,7 +481,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size) if (ta->trim_type == TRIM_TYPE_MANUAL) { while (vd->vdev_trim_rate != 0 && !vdev_trim_should_stop(vd) && vdev_trim_calculate_rate(ta) > vd->vdev_trim_rate) { - cv_timedwait_sig(&vd->vdev_trim_io_cv, + cv_timedwait_idle(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock, ddi_get_lbolt() + MSEC_TO_TICK(10)); } diff --git a/module/zfs/zthr.c b/module/zfs/zthr.c index fdc4b8633..e230cd2cb 100644 --- a/module/zfs/zthr.c +++ b/module/zfs/zthr.c @@ -237,15 +237,10 @@ zthr_procedure(void *arg) t->zthr_func(t->zthr_arg, t); mutex_enter(&t->zthr_state_lock); } else { - /* - * cv_wait_sig() is used instead of cv_wait() in - * order to prevent this process from incorrectly - * contributing to the system load average when idle. - */ if (t->zthr_sleep_timeout == 0) { - cv_wait_sig(&t->zthr_cv, &t->zthr_state_lock); + cv_wait_idle(&t->zthr_cv, &t->zthr_state_lock); } else { - (void) cv_timedwait_sig_hires(&t->zthr_cv, + (void) cv_timedwait_idle_hires(&t->zthr_cv, &t->zthr_state_lock, t->zthr_sleep_timeout, MSEC2NSEC(1), 0); }