mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-04-12 22:51:46 +03:00
L2ARC: Scan-based depth cap for persistent markers
With persistent markers and inclusive scanning, the marker traverses the entire ARC state across many feed cycles, writing buffers far from the tail that may no longer be relevant. Track cumulative bytes scanned per pass in l2arc_ext_scanned. When scans reach l2arc_ext_headroom_pct (default 25%) of the ARC state size, reset the pass markers to the tail via lazy reset flags. This keeps markers focused on the tail zone where buffers soon to be evicted have the most value for L2ARC. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #18289
This commit is contained in:
parent
15fc3d64c8
commit
62ca8f721b
@ -46,6 +46,10 @@ extern "C" {
|
||||
* and each of the states has two types: data and metadata.
|
||||
*/
|
||||
#define L2ARC_FEED_TYPES 4
|
||||
#define L2ARC_MFU_META 0
|
||||
#define L2ARC_MRU_META 1
|
||||
#define L2ARC_MFU_DATA 2
|
||||
#define L2ARC_MRU_DATA 3
|
||||
|
||||
/*
|
||||
* L2ARC state and statistics for persistent marker management.
|
||||
@ -62,6 +66,12 @@ typedef struct l2arc_info {
|
||||
boolean_t *l2arc_sublist_busy[L2ARC_FEED_TYPES];
|
||||
boolean_t *l2arc_sublist_reset[L2ARC_FEED_TYPES];
|
||||
kmutex_t l2arc_sublist_lock; /* protects busy/reset flags */
|
||||
/*
|
||||
* Cumulative bytes scanned per pass since marker reset.
|
||||
* Limits how far persistent markers advance from tail
|
||||
* before resetting, based on % of state size.
|
||||
*/
|
||||
uint64_t l2arc_ext_scanned[L2ARC_FEED_TYPES];
|
||||
int l2arc_next_sublist[L2ARC_FEED_TYPES]; /* round-robin */
|
||||
} l2arc_info_t;
|
||||
|
||||
|
||||
@ -227,6 +227,12 @@ to enable caching/reading prefetches to/from L2ARC.
|
||||
.It Sy l2arc_norw Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||
No reads during writes.
|
||||
.
|
||||
.It Sy l2arc_ext_headroom_pct Ns = Ns Sy 25 Pq u64
|
||||
Percentage of each ARC state's size that a pass may scan before
|
||||
resetting its markers to the tail.
|
||||
Lower values keep the marker closer to the tail under active workloads.
|
||||
Set to 0 to disable the depth cap.
|
||||
.
|
||||
.It Sy l2arc_write_max Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64
|
||||
Maximum write rate in bytes per second for each L2ARC device.
|
||||
Used directly during initial fill, when DWPD limiting is disabled,
|
||||
|
||||
@ -956,6 +956,13 @@ int l2arc_exclude_special = 0;
|
||||
*/
|
||||
static int l2arc_mfuonly = 0;
|
||||
|
||||
/*
|
||||
* Depth cap as percentage of state size. Each pass resets its markers
|
||||
* to tail after scanning this fraction of the state. Keeps markers
|
||||
* focused on the tail zone where L2ARC adds the most value.
|
||||
*/
|
||||
static uint64_t l2arc_ext_headroom_pct = 25;
|
||||
|
||||
/*
|
||||
* L2ARC TRIM
|
||||
* l2arc_trim_ahead : A ZFS module parameter that controls how much ahead of
|
||||
@ -9083,6 +9090,8 @@ l2arc_pool_markers_init(spa_t *spa)
|
||||
spa->spa_l2arc_info.l2arc_markers[pass][i]);
|
||||
multilist_sublist_unlock(mls);
|
||||
}
|
||||
|
||||
spa->spa_l2arc_info.l2arc_ext_scanned[pass] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -9875,6 +9884,31 @@ l2arc_blk_fetch_done(zio_t *zio)
|
||||
kmem_free(cb, sizeof (l2arc_read_callback_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the total size of the ARC state corresponding to the given
|
||||
* L2ARC pass number (0..3).
|
||||
*/
|
||||
static uint64_t
|
||||
l2arc_get_state_size(int pass)
|
||||
{
|
||||
switch (pass) {
|
||||
case L2ARC_MFU_META:
|
||||
return (zfs_refcount_count(
|
||||
&arc_mfu->arcs_size[ARC_BUFC_METADATA]));
|
||||
case L2ARC_MRU_META:
|
||||
return (zfs_refcount_count(
|
||||
&arc_mru->arcs_size[ARC_BUFC_METADATA]));
|
||||
case L2ARC_MFU_DATA:
|
||||
return (zfs_refcount_count(
|
||||
&arc_mfu->arcs_size[ARC_BUFC_DATA]));
|
||||
case L2ARC_MRU_DATA:
|
||||
return (zfs_refcount_count(
|
||||
&arc_mru->arcs_size[ARC_BUFC_DATA]));
|
||||
default:
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Flag all sublists for a single pass for lazy marker reset to tail.
|
||||
* Each sublist's marker will be reset when next visited by a feed thread.
|
||||
@ -9892,6 +9926,8 @@ l2arc_flag_pass_reset(spa_t *spa, int pass)
|
||||
spa->spa_l2arc_info.l2arc_sublist_reset[pass][i] = B_TRUE;
|
||||
multilist_sublist_unlock(mls);
|
||||
}
|
||||
|
||||
spa->spa_l2arc_info.l2arc_ext_scanned[pass] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -10044,6 +10080,31 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
(spa->spa_l2arc_info.l2arc_next_sublist[pass] + 1) %
|
||||
num_sublists;
|
||||
|
||||
/*
|
||||
* Depth cap: track cumulative bytes scanned per pass
|
||||
* and reset markers when the scan cap is reached.
|
||||
* Keeps the marker near the tail where L2ARC adds
|
||||
* the most value.
|
||||
*/
|
||||
if (save_position) {
|
||||
mutex_enter(&spa->spa_l2arc_info.l2arc_sublist_lock);
|
||||
|
||||
spa->spa_l2arc_info.l2arc_ext_scanned[pass] +=
|
||||
consumed_headroom;
|
||||
|
||||
uint64_t state_sz = l2arc_get_state_size(pass);
|
||||
uint64_t scan_cap =
|
||||
state_sz * l2arc_ext_headroom_pct / 100;
|
||||
|
||||
if (scan_cap > 0 &&
|
||||
spa->spa_l2arc_info.l2arc_ext_scanned[pass] >=
|
||||
scan_cap) {
|
||||
l2arc_flag_pass_reset(spa, pass);
|
||||
}
|
||||
|
||||
mutex_exit(&spa->spa_l2arc_info.l2arc_sublist_lock);
|
||||
}
|
||||
|
||||
if (full == B_TRUE)
|
||||
break;
|
||||
}
|
||||
@ -11691,6 +11752,9 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
|
||||
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
|
||||
"Exclude dbufs on special vdevs from being cached to L2ARC if set.");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, ext_headroom_pct, U64, ZMOD_RW,
|
||||
"Depth cap as percentage of state size for marker reset");
|
||||
|
||||
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
|
||||
param_get_uint, ZMOD_RW, "System free memory I/O throttle in bytes");
|
||||
|
||||
|
||||
@ -47,6 +47,7 @@ INITIALIZE_VALUE initialize_value zfs_initialize_value
|
||||
KEEP_LOG_SPACEMAPS_AT_EXPORT keep_log_spacemaps_at_export zfs_keep_log_spacemaps_at_export
|
||||
LUA_MAX_MEMLIMIT lua.max_memlimit zfs_lua_max_memlimit
|
||||
L2ARC_DWPD_LIMIT l2arc.dwpd_limit l2arc_dwpd_limit
|
||||
L2ARC_EXT_HEADROOM_PCT l2arc.ext_headroom_pct l2arc_ext_headroom_pct
|
||||
L2ARC_MFUONLY l2arc.mfuonly l2arc_mfuonly
|
||||
L2ARC_NOPREFETCH l2arc.noprefetch l2arc_noprefetch
|
||||
L2ARC_REBUILD_BLOCKS_MIN_L2SIZE l2arc.rebuild_blocks_min_l2size l2arc_rebuild_blocks_min_l2size
|
||||
|
||||
Loading…
Reference in New Issue
Block a user