From 663dc86de2be3b7f43eacf8b2cc4ee45d11b3705 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Wed, 19 Nov 2025 21:21:10 +0500 Subject: [PATCH] Fix taskq NULL pointer dereference on timer race Remove unsafe timer_pending() check in taskq_cancel_id() that created a race where: - Timer expires and timer_pending() returns FALSE - task_done() frees task with tqent_func = NULL - Timer callback executes and queues freed task - Worker thread crashes executing NULL function Always call timer_delete_sync() unconditionally to ensure timer callback completes before task is freed. Reliably reproducible by injecting mdelay(10) after setting CANCEL flag to widen the race window, combined with frequent task cancellations (e.g., snapshot automount expiry). Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Ameer Hamza Closes #17942 --- module/os/linux/spl/spl-taskq.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 092f090d9..0eb16ae34 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -635,14 +635,31 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id) /* * The task_expire() function takes the tq->tq_lock so drop - * drop the lock before synchronously cancelling the timer. + * the lock before synchronously cancelling the timer. + * + * Always call timer_delete_sync() unconditionally. A + * timer_pending() check would be insufficient and unsafe. + * When a timer expires, it is immediately dequeued from the + * timer wheel (timer_pending() returns FALSE), but the + * callback (task_expire) may not run until later. + * + * The race window: + * 1) Timer expires and is dequeued - timer_pending() now + * returns FALSE + * 2) task_done() is called below, freeing the task, sets + * tqent_func = NULL and clears flags including CANCEL + * 3) Timer callback finally runs, sees no CANCEL flag, + * queues task to prio_list + * 4) Worker thread attempts to execute NULL tqent_func + * and panics + * + * timer_delete_sync() prevents this by ensuring the timer + * callback completes before the task is freed. */ - if (timer_pending(&t->tqent_timer)) { - spin_unlock_irqrestore(&tq->tq_lock, flags); - timer_delete_sync(&t->tqent_timer); - spin_lock_irqsave_nested(&tq->tq_lock, flags, - tq->tq_lock_class); - } + spin_unlock_irqrestore(&tq->tq_lock, flags); + timer_delete_sync(&t->tqent_timer); + spin_lock_irqsave_nested(&tq->tq_lock, flags, + tq->tq_lock_class); if (!(t->tqent_flags & TQENT_FLAG_PREALLOC)) task_done(tq, t);