From 15fc3d64c837d633e44053137ed03c2edcf0c5d0 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 5 Mar 2026 20:12:55 +0500 Subject: [PATCH] L2ARC: Lazy sublist reset flags for persistent markers Replace direct marker-to-tail manipulation with per-sublist boolean flags consumed lazily by feed threads. Each scanning thread resets its own marker when it sees the flag, rather than having another thread manipulate the marker directly. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #18289 --- include/sys/arc_impl.h | 6 ++- module/zfs/arc.c | 96 ++++++++++++++++++++++++++---------------- 2 files changed, 64 insertions(+), 38 deletions(-) diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 14f797959..b1bea24ac 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -56,10 +56,12 @@ typedef struct l2arc_info { uint64_t l2arc_total_capacity; /* total L2ARC capacity */ uint64_t l2arc_smallest_capacity; /* smallest device capacity */ /* - * Per-device thread coordination for sublist processing + * Per-device thread coordination for sublist processing. + * reset: flags sublist marker for lazy reset to tail. */ boolean_t *l2arc_sublist_busy[L2ARC_FEED_TYPES]; - kmutex_t l2arc_sublist_lock; /* protects busy flags */ + boolean_t *l2arc_sublist_reset[L2ARC_FEED_TYPES]; + kmutex_t l2arc_sublist_lock; /* protects busy/reset flags */ int l2arc_next_sublist[L2ARC_FEED_TYPES]; /* round-robin */ } l2arc_info_t; diff --git a/module/zfs/arc.c b/module/zfs/arc.c index bd94a43ce..736faaf51 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -9073,6 +9073,8 @@ l2arc_pool_markers_init(spa_t *spa) arc_state_alloc_markers(num_sublists); spa->spa_l2arc_info.l2arc_sublist_busy[pass] = kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP); + spa->spa_l2arc_info.l2arc_sublist_reset[pass] = + kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP); for (int i = 0; i < num_sublists; i++) { multilist_sublist_t *mls = @@ -9117,12 +9119,18 @@ l2arc_pool_markers_fini(spa_t *spa) num_sublists); spa->spa_l2arc_info.l2arc_markers[pass] = NULL; - /* Free sublist busy flags for this pass */ + /* Free sublist busy and reset flags for this pass */ ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_busy[pass], !=, NULL); kmem_free(spa->spa_l2arc_info.l2arc_sublist_busy[pass], num_sublists * sizeof (boolean_t)); spa->spa_l2arc_info.l2arc_sublist_busy[pass] = NULL; + + ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_reset[pass], !=, + NULL); + kmem_free(spa->spa_l2arc_info.l2arc_sublist_reset[pass], + num_sublists * sizeof (boolean_t)); + spa->spa_l2arc_info.l2arc_sublist_reset[pass] = NULL; } mutex_destroy(&spa->spa_l2arc_info.l2arc_sublist_lock); @@ -9608,6 +9616,19 @@ l2arc_write_sublist(spa_t *spa, l2arc_dev_t *dev, int pass, int sublist_idx, persistent_marker = spa->spa_l2arc_info. l2arc_markers[pass][sublist_idx]; + /* + * Check if this sublist's marker was flagged for reset to tail. + * This handles depth cap resets and global resets without needing + * to coordinate with actively-scanning threads. + */ + if (save_position && + spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx]) { + multilist_sublist_remove(mls, persistent_marker); + multilist_sublist_insert_tail(mls, persistent_marker); + spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx] = + B_FALSE; + } + if (save_position && persistent_marker == multilist_sublist_head(mls)) { multilist_sublist_unlock(mls); return (B_FALSE); @@ -9798,14 +9819,24 @@ next: } /* - * Position persistent marker for next iteration. In case of - * save_position, validate that prev_hdr still belongs to the current - * sublist. The sublist lock is dropped during L2ARC write I/O, allowing - * ARC eviction to potentially free prev_hdr. If freed, we can't do much - * except to reset the marker. + * Position persistent marker for next iteration. + * + * If a reset was flagged during our scan (sublist lock was dropped + * for I/O, allowing another thread to set the flag), honor it by + * moving the marker to tail instead of advancing. + * + * Otherwise, validate that prev_hdr still belongs to the current + * sublist. The sublist lock is dropped during L2ARC write I/O, + * allowing ARC eviction to potentially free prev_hdr. If freed, + * we can't do much except to reset the marker. */ multilist_sublist_remove(mls, persistent_marker); if (save_position && + spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx]) { + multilist_sublist_insert_tail(mls, persistent_marker); + spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx] = + B_FALSE; + } else if (save_position && multilist_link_active(&prev_hdr->b_l1hdr.b_arc_node)) { if (hdr != NULL) { /* @@ -9845,40 +9876,33 @@ l2arc_blk_fetch_done(zio_t *zio) } /* - * Reset all L2ARC markers to tail position for the given spa. + * Flag all sublists for a single pass for lazy marker reset to tail. + * Each sublist's marker will be reset when next visited by a feed thread. + */ +static void +l2arc_flag_pass_reset(spa_t *spa, int pass) +{ + ASSERT(MUTEX_HELD(&spa->spa_l2arc_info.l2arc_sublist_lock)); + + multilist_t *ml = l2arc_get_list(pass); + int num_sublists = multilist_get_num_sublists(ml); + + for (int i = 0; i < num_sublists; i++) { + multilist_sublist_t *mls = multilist_sublist_lock_idx(ml, i); + spa->spa_l2arc_info.l2arc_sublist_reset[pass][i] = B_TRUE; + multilist_sublist_unlock(mls); + } +} + +/* + * Flag all L2ARC markers for lazy reset to tail for the given spa. + * Each sublist's marker will be reset when next visited by a feed thread. */ static void l2arc_reset_all_markers(spa_t *spa) { - ASSERT(spa->spa_l2arc_info.l2arc_markers != NULL); - ASSERT(MUTEX_HELD(&spa->spa_l2arc_info.l2arc_sublist_lock)); - - for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) { - if (spa->spa_l2arc_info.l2arc_markers[pass] == NULL) - continue; - - multilist_t *ml = l2arc_get_list(pass); - int num_sublists = multilist_get_num_sublists(ml); - - for (int i = 0; i < num_sublists; i++) { - ASSERT3P(spa->spa_l2arc_info.l2arc_markers[pass][i], - !=, NULL); - multilist_sublist_t *mls = - multilist_sublist_lock_idx(ml, i); - - /* Remove from current position */ - ASSERT(multilist_link_active(&spa->spa_l2arc_info. - l2arc_markers[pass][i]->b_l1hdr.b_arc_node)); - multilist_sublist_remove(mls, spa->spa_l2arc_info. - l2arc_markers[pass][i]); - - /* Insert at tail (like initialization) */ - multilist_sublist_insert_tail(mls, - spa->spa_l2arc_info.l2arc_markers[pass][i]); - - multilist_sublist_unlock(mls); - } - } + for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) + l2arc_flag_pass_reset(spa, pass); /* Reset write counter */ spa->spa_l2arc_info.l2arc_total_writes = 0;