L2ARC: Lazy sublist reset flags for persistent markers

Replace direct marker-to-tail manipulation with per-sublist boolean
flags consumed lazily by feed threads.  Each scanning thread resets its
own marker when it sees the flag, rather than having another thread
manipulate the marker directly.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #18289
This commit is contained in:
Ameer Hamza 2026-03-05 20:12:55 +05:00 committed by Brian Behlendorf
parent 22fdaf0b1f
commit 15fc3d64c8
2 changed files with 64 additions and 38 deletions

View File

@ -56,10 +56,12 @@ typedef struct l2arc_info {
uint64_t l2arc_total_capacity; /* total L2ARC capacity */
uint64_t l2arc_smallest_capacity; /* smallest device capacity */
/*
* Per-device thread coordination for sublist processing
* Per-device thread coordination for sublist processing.
* reset: flags sublist marker for lazy reset to tail.
*/
boolean_t *l2arc_sublist_busy[L2ARC_FEED_TYPES];
kmutex_t l2arc_sublist_lock; /* protects busy flags */
boolean_t *l2arc_sublist_reset[L2ARC_FEED_TYPES];
kmutex_t l2arc_sublist_lock; /* protects busy/reset flags */
int l2arc_next_sublist[L2ARC_FEED_TYPES]; /* round-robin */
} l2arc_info_t;

View File

@ -9073,6 +9073,8 @@ l2arc_pool_markers_init(spa_t *spa)
arc_state_alloc_markers(num_sublists);
spa->spa_l2arc_info.l2arc_sublist_busy[pass] =
kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP);
spa->spa_l2arc_info.l2arc_sublist_reset[pass] =
kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP);
for (int i = 0; i < num_sublists; i++) {
multilist_sublist_t *mls =
@ -9117,12 +9119,18 @@ l2arc_pool_markers_fini(spa_t *spa)
num_sublists);
spa->spa_l2arc_info.l2arc_markers[pass] = NULL;
/* Free sublist busy flags for this pass */
/* Free sublist busy and reset flags for this pass */
ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_busy[pass], !=,
NULL);
kmem_free(spa->spa_l2arc_info.l2arc_sublist_busy[pass],
num_sublists * sizeof (boolean_t));
spa->spa_l2arc_info.l2arc_sublist_busy[pass] = NULL;
ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_reset[pass], !=,
NULL);
kmem_free(spa->spa_l2arc_info.l2arc_sublist_reset[pass],
num_sublists * sizeof (boolean_t));
spa->spa_l2arc_info.l2arc_sublist_reset[pass] = NULL;
}
mutex_destroy(&spa->spa_l2arc_info.l2arc_sublist_lock);
@ -9608,6 +9616,19 @@ l2arc_write_sublist(spa_t *spa, l2arc_dev_t *dev, int pass, int sublist_idx,
persistent_marker = spa->spa_l2arc_info.
l2arc_markers[pass][sublist_idx];
/*
* Check if this sublist's marker was flagged for reset to tail.
* This handles depth cap resets and global resets without needing
* to coordinate with actively-scanning threads.
*/
if (save_position &&
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx]) {
multilist_sublist_remove(mls, persistent_marker);
multilist_sublist_insert_tail(mls, persistent_marker);
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx] =
B_FALSE;
}
if (save_position && persistent_marker == multilist_sublist_head(mls)) {
multilist_sublist_unlock(mls);
return (B_FALSE);
@ -9798,14 +9819,24 @@ next:
}
/*
* Position persistent marker for next iteration. In case of
* save_position, validate that prev_hdr still belongs to the current
* sublist. The sublist lock is dropped during L2ARC write I/O, allowing
* ARC eviction to potentially free prev_hdr. If freed, we can't do much
* except to reset the marker.
* Position persistent marker for next iteration.
*
* If a reset was flagged during our scan (sublist lock was dropped
* for I/O, allowing another thread to set the flag), honor it by
* moving the marker to tail instead of advancing.
*
* Otherwise, validate that prev_hdr still belongs to the current
* sublist. The sublist lock is dropped during L2ARC write I/O,
* allowing ARC eviction to potentially free prev_hdr. If freed,
* we can't do much except to reset the marker.
*/
multilist_sublist_remove(mls, persistent_marker);
if (save_position &&
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx]) {
multilist_sublist_insert_tail(mls, persistent_marker);
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx] =
B_FALSE;
} else if (save_position &&
multilist_link_active(&prev_hdr->b_l1hdr.b_arc_node)) {
if (hdr != NULL) {
/*
@ -9845,40 +9876,33 @@ l2arc_blk_fetch_done(zio_t *zio)
}
/*
* Reset all L2ARC markers to tail position for the given spa.
* Flag all sublists for a single pass for lazy marker reset to tail.
* Each sublist's marker will be reset when next visited by a feed thread.
*/
static void
l2arc_flag_pass_reset(spa_t *spa, int pass)
{
ASSERT(MUTEX_HELD(&spa->spa_l2arc_info.l2arc_sublist_lock));
multilist_t *ml = l2arc_get_list(pass);
int num_sublists = multilist_get_num_sublists(ml);
for (int i = 0; i < num_sublists; i++) {
multilist_sublist_t *mls = multilist_sublist_lock_idx(ml, i);
spa->spa_l2arc_info.l2arc_sublist_reset[pass][i] = B_TRUE;
multilist_sublist_unlock(mls);
}
}
/*
* Flag all L2ARC markers for lazy reset to tail for the given spa.
* Each sublist's marker will be reset when next visited by a feed thread.
*/
static void
l2arc_reset_all_markers(spa_t *spa)
{
ASSERT(spa->spa_l2arc_info.l2arc_markers != NULL);
ASSERT(MUTEX_HELD(&spa->spa_l2arc_info.l2arc_sublist_lock));
for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
if (spa->spa_l2arc_info.l2arc_markers[pass] == NULL)
continue;
multilist_t *ml = l2arc_get_list(pass);
int num_sublists = multilist_get_num_sublists(ml);
for (int i = 0; i < num_sublists; i++) {
ASSERT3P(spa->spa_l2arc_info.l2arc_markers[pass][i],
!=, NULL);
multilist_sublist_t *mls =
multilist_sublist_lock_idx(ml, i);
/* Remove from current position */
ASSERT(multilist_link_active(&spa->spa_l2arc_info.
l2arc_markers[pass][i]->b_l1hdr.b_arc_node));
multilist_sublist_remove(mls, spa->spa_l2arc_info.
l2arc_markers[pass][i]);
/* Insert at tail (like initialization) */
multilist_sublist_insert_tail(mls,
spa->spa_l2arc_info.l2arc_markers[pass][i]);
multilist_sublist_unlock(mls);
}
}
for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++)
l2arc_flag_pass_reset(spa, pass);
/* Reset write counter */
spa->spa_l2arc_info.l2arc_total_writes = 0;