L2ARC: Lazy sublist reset flags for persistent markers

Replace direct marker-to-tail manipulation with per-sublist boolean
flags consumed lazily by feed threads.  Each scanning thread resets its
own marker when it sees the flag, rather than having another thread
manipulate the marker directly.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #18289
This commit is contained in:
Ameer Hamza 2026-03-05 20:12:55 +05:00 committed by Brian Behlendorf
parent 22fdaf0b1f
commit 15fc3d64c8
2 changed files with 64 additions and 38 deletions

View File

@ -56,10 +56,12 @@ typedef struct l2arc_info {
uint64_t l2arc_total_capacity; /* total L2ARC capacity */ uint64_t l2arc_total_capacity; /* total L2ARC capacity */
uint64_t l2arc_smallest_capacity; /* smallest device capacity */ uint64_t l2arc_smallest_capacity; /* smallest device capacity */
/* /*
* Per-device thread coordination for sublist processing * Per-device thread coordination for sublist processing.
* reset: flags sublist marker for lazy reset to tail.
*/ */
boolean_t *l2arc_sublist_busy[L2ARC_FEED_TYPES]; boolean_t *l2arc_sublist_busy[L2ARC_FEED_TYPES];
kmutex_t l2arc_sublist_lock; /* protects busy flags */ boolean_t *l2arc_sublist_reset[L2ARC_FEED_TYPES];
kmutex_t l2arc_sublist_lock; /* protects busy/reset flags */
int l2arc_next_sublist[L2ARC_FEED_TYPES]; /* round-robin */ int l2arc_next_sublist[L2ARC_FEED_TYPES]; /* round-robin */
} l2arc_info_t; } l2arc_info_t;

View File

@ -9073,6 +9073,8 @@ l2arc_pool_markers_init(spa_t *spa)
arc_state_alloc_markers(num_sublists); arc_state_alloc_markers(num_sublists);
spa->spa_l2arc_info.l2arc_sublist_busy[pass] = spa->spa_l2arc_info.l2arc_sublist_busy[pass] =
kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP); kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP);
spa->spa_l2arc_info.l2arc_sublist_reset[pass] =
kmem_zalloc(num_sublists * sizeof (boolean_t), KM_SLEEP);
for (int i = 0; i < num_sublists; i++) { for (int i = 0; i < num_sublists; i++) {
multilist_sublist_t *mls = multilist_sublist_t *mls =
@ -9117,12 +9119,18 @@ l2arc_pool_markers_fini(spa_t *spa)
num_sublists); num_sublists);
spa->spa_l2arc_info.l2arc_markers[pass] = NULL; spa->spa_l2arc_info.l2arc_markers[pass] = NULL;
/* Free sublist busy flags for this pass */ /* Free sublist busy and reset flags for this pass */
ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_busy[pass], !=, ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_busy[pass], !=,
NULL); NULL);
kmem_free(spa->spa_l2arc_info.l2arc_sublist_busy[pass], kmem_free(spa->spa_l2arc_info.l2arc_sublist_busy[pass],
num_sublists * sizeof (boolean_t)); num_sublists * sizeof (boolean_t));
spa->spa_l2arc_info.l2arc_sublist_busy[pass] = NULL; spa->spa_l2arc_info.l2arc_sublist_busy[pass] = NULL;
ASSERT3P(spa->spa_l2arc_info.l2arc_sublist_reset[pass], !=,
NULL);
kmem_free(spa->spa_l2arc_info.l2arc_sublist_reset[pass],
num_sublists * sizeof (boolean_t));
spa->spa_l2arc_info.l2arc_sublist_reset[pass] = NULL;
} }
mutex_destroy(&spa->spa_l2arc_info.l2arc_sublist_lock); mutex_destroy(&spa->spa_l2arc_info.l2arc_sublist_lock);
@ -9608,6 +9616,19 @@ l2arc_write_sublist(spa_t *spa, l2arc_dev_t *dev, int pass, int sublist_idx,
persistent_marker = spa->spa_l2arc_info. persistent_marker = spa->spa_l2arc_info.
l2arc_markers[pass][sublist_idx]; l2arc_markers[pass][sublist_idx];
/*
* Check if this sublist's marker was flagged for reset to tail.
* This handles depth cap resets and global resets without needing
* to coordinate with actively-scanning threads.
*/
if (save_position &&
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx]) {
multilist_sublist_remove(mls, persistent_marker);
multilist_sublist_insert_tail(mls, persistent_marker);
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx] =
B_FALSE;
}
if (save_position && persistent_marker == multilist_sublist_head(mls)) { if (save_position && persistent_marker == multilist_sublist_head(mls)) {
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
return (B_FALSE); return (B_FALSE);
@ -9798,14 +9819,24 @@ next:
} }
/* /*
* Position persistent marker for next iteration. In case of * Position persistent marker for next iteration.
* save_position, validate that prev_hdr still belongs to the current *
* sublist. The sublist lock is dropped during L2ARC write I/O, allowing * If a reset was flagged during our scan (sublist lock was dropped
* ARC eviction to potentially free prev_hdr. If freed, we can't do much * for I/O, allowing another thread to set the flag), honor it by
* except to reset the marker. * moving the marker to tail instead of advancing.
*
* Otherwise, validate that prev_hdr still belongs to the current
* sublist. The sublist lock is dropped during L2ARC write I/O,
* allowing ARC eviction to potentially free prev_hdr. If freed,
* we can't do much except to reset the marker.
*/ */
multilist_sublist_remove(mls, persistent_marker); multilist_sublist_remove(mls, persistent_marker);
if (save_position && if (save_position &&
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx]) {
multilist_sublist_insert_tail(mls, persistent_marker);
spa->spa_l2arc_info.l2arc_sublist_reset[pass][sublist_idx] =
B_FALSE;
} else if (save_position &&
multilist_link_active(&prev_hdr->b_l1hdr.b_arc_node)) { multilist_link_active(&prev_hdr->b_l1hdr.b_arc_node)) {
if (hdr != NULL) { if (hdr != NULL) {
/* /*
@ -9845,40 +9876,33 @@ l2arc_blk_fetch_done(zio_t *zio)
} }
/* /*
* Reset all L2ARC markers to tail position for the given spa. * Flag all sublists for a single pass for lazy marker reset to tail.
* Each sublist's marker will be reset when next visited by a feed thread.
*/
static void
l2arc_flag_pass_reset(spa_t *spa, int pass)
{
ASSERT(MUTEX_HELD(&spa->spa_l2arc_info.l2arc_sublist_lock));
multilist_t *ml = l2arc_get_list(pass);
int num_sublists = multilist_get_num_sublists(ml);
for (int i = 0; i < num_sublists; i++) {
multilist_sublist_t *mls = multilist_sublist_lock_idx(ml, i);
spa->spa_l2arc_info.l2arc_sublist_reset[pass][i] = B_TRUE;
multilist_sublist_unlock(mls);
}
}
/*
* Flag all L2ARC markers for lazy reset to tail for the given spa.
* Each sublist's marker will be reset when next visited by a feed thread.
*/ */
static void static void
l2arc_reset_all_markers(spa_t *spa) l2arc_reset_all_markers(spa_t *spa)
{ {
ASSERT(spa->spa_l2arc_info.l2arc_markers != NULL); for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++)
ASSERT(MUTEX_HELD(&spa->spa_l2arc_info.l2arc_sublist_lock)); l2arc_flag_pass_reset(spa, pass);
for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
if (spa->spa_l2arc_info.l2arc_markers[pass] == NULL)
continue;
multilist_t *ml = l2arc_get_list(pass);
int num_sublists = multilist_get_num_sublists(ml);
for (int i = 0; i < num_sublists; i++) {
ASSERT3P(spa->spa_l2arc_info.l2arc_markers[pass][i],
!=, NULL);
multilist_sublist_t *mls =
multilist_sublist_lock_idx(ml, i);
/* Remove from current position */
ASSERT(multilist_link_active(&spa->spa_l2arc_info.
l2arc_markers[pass][i]->b_l1hdr.b_arc_node));
multilist_sublist_remove(mls, spa->spa_l2arc_info.
l2arc_markers[pass][i]);
/* Insert at tail (like initialization) */
multilist_sublist_insert_tail(mls,
spa->spa_l2arc_info.l2arc_markers[pass][i]);
multilist_sublist_unlock(mls);
}
}
/* Reset write counter */ /* Reset write counter */
spa->spa_l2arc_info.l2arc_total_writes = 0; spa->spa_l2arc_info.l2arc_total_writes = 0;