From 61c3391acc988573aaf9e59550f863de4affcb68 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Sun, 13 Jan 2019 10:09:46 -0800 Subject: [PATCH] Serialize ZTHR operations to eliminate races Adds a new lock for serializing operations on zthrs. The commit also includes some code cleanup and refactoring. Reviewed by: Matt Ahrens Reviewed by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #8229 --- include/sys/spa_checkpoint.h | 2 +- include/sys/zthr.h | 22 +-- module/zfs/arc.c | 8 +- module/zfs/spa.c | 14 +- module/zfs/spa_checkpoint.c | 6 +- module/zfs/vdev_indirect.c | 6 +- module/zfs/zthr.c | 264 +++++++++++++++++++++++------------ 7 files changed, 192 insertions(+), 130 deletions(-) diff --git a/include/sys/spa_checkpoint.h b/include/sys/spa_checkpoint.h index a5c856014..9be2b6eea 100644 --- a/include/sys/spa_checkpoint.h +++ b/include/sys/spa_checkpoint.h @@ -37,7 +37,7 @@ int spa_checkpoint(const char *); int spa_checkpoint_discard(const char *); boolean_t spa_checkpoint_discard_thread_check(void *, zthr_t *); -int spa_checkpoint_discard_thread(void *, zthr_t *); +void spa_checkpoint_discard_thread(void *, zthr_t *); int spa_checkpoint_get_stats(spa_t *, pool_checkpoint_stat_t *); diff --git a/include/sys/zthr.h b/include/sys/zthr.h index ce6033ecb..33c218ec4 100644 --- a/include/sys/zthr.h +++ b/include/sys/zthr.h @@ -14,42 +14,26 @@ */ /* - * Copyright (c) 2017 by Delphix. All rights reserved. + * Copyright (c) 2017, 2018 by Delphix. All rights reserved. */ #ifndef _SYS_ZTHR_H #define _SYS_ZTHR_H typedef struct zthr zthr_t; -typedef int (zthr_func_t)(void *, zthr_t *); +typedef void (zthr_func_t)(void *, zthr_t *); typedef boolean_t (zthr_checkfunc_t)(void *, zthr_t *); -struct zthr { - kthread_t *zthr_thread; - kmutex_t zthr_lock; - kcondvar_t zthr_cv; - boolean_t zthr_cancel; - hrtime_t zthr_wait_time; - - zthr_checkfunc_t *zthr_checkfunc; - zthr_func_t *zthr_func; - void *zthr_arg; - int zthr_rc; -}; - extern zthr_t *zthr_create(zthr_checkfunc_t checkfunc, zthr_func_t *func, void *arg); extern zthr_t *zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg, hrtime_t nano_wait); - -extern void zthr_exit(zthr_t *t, int rc); extern void zthr_destroy(zthr_t *t); extern void zthr_wakeup(zthr_t *t); -extern int zthr_cancel(zthr_t *t); +extern void zthr_cancel(zthr_t *t); extern void zthr_resume(zthr_t *t); extern boolean_t zthr_iscancelled(zthr_t *t); -extern boolean_t zthr_isrunning(zthr_t *t); #endif /* _SYS_ZTHR_H */ diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 9e0ffd06d..7e0963334 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5113,7 +5113,7 @@ arc_adjust_cb_check(void *arg, zthr_t *zthr) * from the ARC. */ /* ARGSUSED */ -static int +static void arc_adjust_cb(void *arg, zthr_t *zthr) { uint64_t evicted = 0; @@ -5147,8 +5147,6 @@ arc_adjust_cb(void *arg, zthr_t *zthr) } mutex_exit(&arc_adjust_lock); spl_fstrans_unmark(cookie); - - return (0); } /* ARGSUSED */ @@ -5190,7 +5188,7 @@ arc_reap_cb_check(void *arg, zthr_t *zthr) * to free more buffers. */ /* ARGSUSED */ -static int +static void arc_reap_cb(void *arg, zthr_t *zthr) { int64_t free_memory; @@ -5231,8 +5229,6 @@ arc_reap_cb(void *arg, zthr_t *zthr) arc_reduce_target_size(to_free); } spl_fstrans_unmark(cookie); - - return (0); } #ifdef _KERNEL diff --git a/module/zfs/spa.c b/module/zfs/spa.c index c4ff5002b..c98daab49 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1486,13 +1486,11 @@ spa_unload(spa_t *spa) } if (spa->spa_condense_zthr != NULL) { - ASSERT(!zthr_isrunning(spa->spa_condense_zthr)); zthr_destroy(spa->spa_condense_zthr); spa->spa_condense_zthr = NULL; } if (spa->spa_checkpoint_discard_zthr != NULL) { - ASSERT(!zthr_isrunning(spa->spa_checkpoint_discard_zthr)); zthr_destroy(spa->spa_checkpoint_discard_zthr); spa->spa_checkpoint_discard_zthr = NULL; } @@ -7214,12 +7212,12 @@ spa_async_suspend(spa_t *spa) spa_vdev_remove_suspend(spa); zthr_t *condense_thread = spa->spa_condense_zthr; - if (condense_thread != NULL && zthr_isrunning(condense_thread)) - VERIFY0(zthr_cancel(condense_thread)); + if (condense_thread != NULL) + zthr_cancel(condense_thread); zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; - if (discard_thread != NULL && zthr_isrunning(discard_thread)) - VERIFY0(zthr_cancel(discard_thread)); + if (discard_thread != NULL) + zthr_cancel(discard_thread); } void @@ -7232,11 +7230,11 @@ spa_async_resume(spa_t *spa) spa_restart_removal(spa); zthr_t *condense_thread = spa->spa_condense_zthr; - if (condense_thread != NULL && !zthr_isrunning(condense_thread)) + if (condense_thread != NULL) zthr_resume(condense_thread); zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; - if (discard_thread != NULL && !zthr_isrunning(discard_thread)) + if (discard_thread != NULL) zthr_resume(discard_thread); } diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c index 863ec46b1..230ae5785 100644 --- a/module/zfs/spa_checkpoint.c +++ b/module/zfs/spa_checkpoint.c @@ -393,7 +393,7 @@ spa_checkpoint_discard_thread_check(void *arg, zthr_t *zthr) return (B_TRUE); } -int +void spa_checkpoint_discard_thread(void *arg, zthr_t *zthr) { spa_t *spa = arg; @@ -408,7 +408,7 @@ spa_checkpoint_discard_thread(void *arg, zthr_t *zthr) dmu_buf_t **dbp; if (zthr_iscancelled(zthr)) - return (0); + return; ASSERT3P(vd->vdev_ops, !=, &vdev_indirect_ops); @@ -445,8 +445,6 @@ spa_checkpoint_discard_thread(void *arg, zthr_t *zthr) VERIFY0(dsl_sync_task(spa->spa_name, NULL, spa_checkpoint_discard_complete_sync, spa, 0, ZFS_SPACE_CHECK_NONE)); - - return (0); } diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 070d1b8d9..d0725add8 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -647,7 +647,7 @@ spa_condense_indirect_thread_check(void *arg, zthr_t *zthr) } /* ARGSUSED */ -static int +static void spa_condense_indirect_thread(void *arg, zthr_t *zthr) { spa_t *spa = arg; @@ -744,13 +744,11 @@ spa_condense_indirect_thread(void *arg, zthr_t *zthr) * shutting down. */ if (zthr_iscancelled(zthr)) - return (0); + return; VERIFY0(dsl_sync_task(spa_name(spa), NULL, spa_condense_indirect_complete_sync, sci, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); - - return (0); } /* diff --git a/module/zfs/zthr.c b/module/zfs/zthr.c index c5b11dafc..64372f338 100644 --- a/module/zfs/zthr.c +++ b/module/zfs/zthr.c @@ -14,7 +14,7 @@ */ /* - * Copyright (c) 2017 by Delphix. All rights reserved. + * Copyright (c) 2017, 2019 by Delphix. All rights reserved. */ /* @@ -28,7 +28,7 @@ * * 1] The operation needs to run over multiple txgs. * 2] There is be a single point of reference in memory or on disk that - * indicates whether the operation should run/is running or is + * indicates whether the operation should run/is running or has * stopped. * * If the operation satisfies the above then the following rules guarantee @@ -51,6 +51,9 @@ * during creation to wakeup on its own after a specified interval * [see zthr_create_timer()]. * + * Note: ZTHR threads are NOT a replacement for generic threads! Please + * ensure that they fit your use-case well before using them. + * * == ZTHR creation * * Every zthr needs three inputs to start running: @@ -64,17 +67,17 @@ * 2] A user-defined ZTHR function (func) which the zthr executes when * it is not sleeping. The function should adhere to the following * signature type: - * int func_name(void *args, zthr_t *t); + * void func_name(void *args, zthr_t *t); * * 3] A void args pointer that will be passed to checkfunc and func * implicitly by the infrastructure. * * The reason why the above API needs two different functions, * instead of one that both checks and does the work, has to do with - * the zthr's internal lock (zthr_lock) and the allowed cancellation - * windows. We want to hold the zthr_lock while running checkfunc - * but not while running func. This way the zthr can be cancelled - * while doing work and not while checking for work. + * the zthr's internal state lock (zthr_state_lock) and the allowed + * cancellation windows. We want to hold the zthr_state_lock while + * running checkfunc but not while running func. This way the zthr + * can be cancelled while doing work and not while checking for work. * * To start a zthr: * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args); @@ -83,7 +86,7 @@ * args, max_sleep); * * After that you should be able to wakeup, cancel, and resume the - * zthr from another thread using zthr_pointer. + * zthr from another thread using the zthr_pointer. * * NOTE: ZTHR threads could potentially wake up spuriously and the * user should take this into account when writing a checkfunc. @@ -102,8 +105,8 @@ * zthr_resume(zthr_pointer); * * A zthr will implicitly check if it has received a cancellation - * signal every time func returns and everytime it wakes up [see ZTHR - * state transitions below]. + * signal every time func returns and every time it wakes up [see + * ZTHR state transitions below]. * * At times, waiting for the zthr's func to finish its job may take * time. This may be very time-consuming for some operations that @@ -119,17 +122,8 @@ * while (!work_done && !zthr_iscancelled(t)) { * ... ... * } - * return (0); * } * - * == ZTHR exit - * - * For the rare cases where the zthr wants to stop running voluntarily - * while running its ZTHR function (func), we provide zthr_exit(). - * When a zthr has voluntarily stopped running, it can be resumed with - * zthr_resume(), just like it would if it was cancelled by some other - * thread. - * * == ZTHR cleanup * * Cancelling a zthr doesn't clean up its metadata (internal locks, @@ -165,49 +159,86 @@ * v * zthr stopped running * + * == Implementation of ZTHR requests + * + * ZTHR wakeup, cancel, and resume are requests on a zthr to + * change its internal state. Requests on a zthr are serialized + * using the zthr_request_lock, while changes in its internal + * state are protected by the zthr_state_lock. A request will + * first acquire the zthr_request_lock and then immediately + * acquire the zthr_state_lock. We do this so that incoming + * requests are serialized using the request lock, while still + * allowing us to use the state lock for thread communication + * via zthr_cv. */ #include #include -void -zthr_exit(zthr_t *t, int rc) -{ - ASSERT3P(t->zthr_thread, ==, curthread); - mutex_enter(&t->zthr_lock); - t->zthr_thread = NULL; - t->zthr_rc = rc; - cv_broadcast(&t->zthr_cv); - mutex_exit(&t->zthr_lock); - thread_exit(); -} +struct zthr { + /* running thread doing the work */ + kthread_t *zthr_thread; + + /* lock protecting internal data & invariants */ + kmutex_t zthr_state_lock; + + /* mutex that serializes external requests */ + kmutex_t zthr_request_lock; + + /* notification mechanism for requests */ + kcondvar_t zthr_cv; + + /* flag set to true if we are canceling the zthr */ + boolean_t zthr_cancel; + + /* + * maximum amount of time that the zthr is spent sleeping; + * if this is 0, the thread doesn't wake up until it gets + * signaled. + */ + hrtime_t zthr_wait_time; + + /* consumer-provided callbacks & data */ + zthr_checkfunc_t *zthr_checkfunc; + zthr_func_t *zthr_func; + void *zthr_arg; +}; static void zthr_procedure(void *arg) { zthr_t *t = arg; - int rc = 0; - mutex_enter(&t->zthr_lock); + mutex_enter(&t->zthr_state_lock); + ASSERT3P(t->zthr_thread, ==, curthread); + while (!t->zthr_cancel) { if (t->zthr_checkfunc(t->zthr_arg, t)) { - mutex_exit(&t->zthr_lock); - rc = t->zthr_func(t->zthr_arg, t); - mutex_enter(&t->zthr_lock); + mutex_exit(&t->zthr_state_lock); + t->zthr_func(t->zthr_arg, t); + mutex_enter(&t->zthr_state_lock); } else { /* go to sleep */ if (t->zthr_wait_time == 0) { - cv_wait_sig(&t->zthr_cv, &t->zthr_lock); + cv_wait_sig(&t->zthr_cv, &t->zthr_state_lock); } else { (void) cv_timedwait_sig_hires(&t->zthr_cv, - &t->zthr_lock, t->zthr_wait_time, + &t->zthr_state_lock, t->zthr_wait_time, MSEC2NSEC(1), 0); } } } - mutex_exit(&t->zthr_lock); - zthr_exit(t, rc); + /* + * Clear out the kernel thread metadata and notify the + * zthr_cancel() thread that we've stopped running. + */ + t->zthr_thread = NULL; + t->zthr_cancel = B_FALSE; + cv_broadcast(&t->zthr_cv); + + mutex_exit(&t->zthr_state_lock); + thread_exit(); } zthr_t * @@ -226,10 +257,11 @@ zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg, hrtime_t max_sleep) { zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP); - mutex_init(&t->zthr_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&t->zthr_state_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&t->zthr_request_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL); - mutex_enter(&t->zthr_lock); + mutex_enter(&t->zthr_state_lock); t->zthr_checkfunc = checkfunc; t->zthr_func = func; t->zthr_arg = arg; @@ -237,7 +269,7 @@ zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, 0, &p0, TS_RUN, minclsyspri); - mutex_exit(&t->zthr_lock); + mutex_exit(&t->zthr_state_lock); return (t); } @@ -245,71 +277,130 @@ zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void zthr_destroy(zthr_t *t) { + ASSERT(!MUTEX_HELD(&t->zthr_state_lock)); + ASSERT(!MUTEX_HELD(&t->zthr_request_lock)); VERIFY3P(t->zthr_thread, ==, NULL); - mutex_destroy(&t->zthr_lock); + mutex_destroy(&t->zthr_request_lock); + mutex_destroy(&t->zthr_state_lock); cv_destroy(&t->zthr_cv); kmem_free(t, sizeof (*t)); } /* - * Note: If the zthr is not sleeping and misses the wakeup - * (e.g it is running its ZTHR function), it will check if - * there is work to do before going to sleep using its checker - * function [see ZTHR state transition in ZTHR block comment]. - * Thus, missing the wakeup still yields the expected behavior. + * Wake up the zthr if it is sleeping. If the thread has been + * cancelled that does nothing. */ void zthr_wakeup(zthr_t *t) { - mutex_enter(&t->zthr_lock); + mutex_enter(&t->zthr_request_lock); + mutex_enter(&t->zthr_state_lock); + + /* + * There are 4 states that we can find the zthr when issuing + * this broadcast: + * + * [1] The common case of the thread being asleep, at which + * point the broadcast will wake it up. + * [2] The thread has been cancelled. Waking up a cancelled + * thread is a no-op. Any work that is still left to be + * done should be handled the next time the thread is + * resumed. + * [3] The thread is doing work and is already up, so this + * is basically a no-op. + * [4] The thread was just created/resumed, in which case the + * behavior is similar to [3]. + */ cv_broadcast(&t->zthr_cv); - mutex_exit(&t->zthr_lock); + + mutex_exit(&t->zthr_state_lock); + mutex_exit(&t->zthr_request_lock); } /* - * Note: If the zthr is not running (e.g. has been cancelled + * Sends a cancel request to the zthr and blocks until the zthr is + * cancelled. If the zthr is not running (e.g. has been cancelled * already), this is a no-op. */ -int +void zthr_cancel(zthr_t *t) { - int rc = 0; + mutex_enter(&t->zthr_request_lock); + mutex_enter(&t->zthr_state_lock); - mutex_enter(&t->zthr_lock); + /* + * Since we are holding the zthr_state_lock at this point + * we can find the state in one of the following 4 states: + * + * [1] The thread has already been cancelled, therefore + * there is nothing for us to do. + * [2] The thread is sleeping, so we broadcast the CV first + * to wake it up and then we set the flag and we are + * waiting for it to exit. + * [3] The thread is doing work, in which case we just set + * the flag and wait for it to finish. + * [4] The thread was just created/resumed, in which case + * the behavior is similar to [3]. + * + * Since requests are serialized, by the time that we get + * control back we expect that the zthr is cancelled and + * not running anymore. + */ + if (t->zthr_thread != NULL) { + t->zthr_cancel = B_TRUE; - /* broadcast in case the zthr is sleeping */ - cv_broadcast(&t->zthr_cv); + /* broadcast in case the zthr is sleeping */ + cv_broadcast(&t->zthr_cv); - t->zthr_cancel = B_TRUE; - while (t->zthr_thread != NULL) - cv_wait(&t->zthr_cv, &t->zthr_lock); - t->zthr_cancel = B_FALSE; - rc = t->zthr_rc; - mutex_exit(&t->zthr_lock); + while (t->zthr_thread != NULL) + cv_wait(&t->zthr_cv, &t->zthr_state_lock); - return (rc); + ASSERT(!t->zthr_cancel); + } + + mutex_exit(&t->zthr_state_lock); + mutex_exit(&t->zthr_request_lock); } +/* + * Sends a resume request to the supplied zthr. If the zthr is + * already running this is a no-op. + */ void zthr_resume(zthr_t *t) { - ASSERT3P(t->zthr_thread, ==, NULL); - - mutex_enter(&t->zthr_lock); + mutex_enter(&t->zthr_request_lock); + mutex_enter(&t->zthr_state_lock); ASSERT3P(&t->zthr_checkfunc, !=, NULL); ASSERT3P(&t->zthr_func, !=, NULL); ASSERT(!t->zthr_cancel); - t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, - 0, &p0, TS_RUN, minclsyspri); + /* + * There are 4 states that we find the zthr in at this point + * given the locks that we hold: + * + * [1] The zthr was cancelled, so we spawn a new thread for + * the zthr (common case). + * [2] The zthr is running at which point this is a no-op. + * [3] The zthr is sleeping at which point this is a no-op. + * [4] The zthr was just spawned at which point this is a + * no-op. + */ + if (t->zthr_thread == NULL) { + t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, + 0, &p0, TS_RUN, minclsyspri); + } - mutex_exit(&t->zthr_lock); + mutex_exit(&t->zthr_state_lock); + mutex_exit(&t->zthr_request_lock); } /* * This function is intended to be used by the zthr itself - * to check if another thread has signal it to stop running. + * (specifically the zthr_func callback provided) to check + * if another thread has signaled it to stop running before + * doing some expensive operation. * * returns TRUE if we are in the middle of trying to cancel * this thread. @@ -319,25 +410,22 @@ zthr_resume(zthr_t *t) boolean_t zthr_iscancelled(zthr_t *t) { - boolean_t cancelled; - ASSERT3P(t->zthr_thread, ==, curthread); - mutex_enter(&t->zthr_lock); - cancelled = t->zthr_cancel; - mutex_exit(&t->zthr_lock); - + /* + * The majority of the functions here grab zthr_request_lock + * first and then zthr_state_lock. This function only grabs + * the zthr_state_lock. That is because this function should + * only be called from the zthr_func to check if someone has + * issued a zthr_cancel() on the thread. If there is a zthr_cancel() + * happening concurrently, attempting to grab the request lock + * here would result in a deadlock. + * + * By grabbing only the zthr_state_lock this function is allowed + * to run concurrently with a zthr_cancel() request. + */ + mutex_enter(&t->zthr_state_lock); + boolean_t cancelled = t->zthr_cancel; + mutex_exit(&t->zthr_state_lock); return (cancelled); } - -boolean_t -zthr_isrunning(zthr_t *t) -{ - boolean_t running; - - mutex_enter(&t->zthr_lock); - running = (t->zthr_thread != NULL); - mutex_exit(&t->zthr_lock); - - return (running); -}