mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-04-13 07:01:46 +03:00
L2ARC: Write budget fairness for metadata monopolization
Under heavy metadata load, metadata passes can monopolize the write budget every cycle while data passes get nothing written. Track consecutive monopolized cycles per device in l2ad_meta_cycles. After l2arc_meta_cycles (default 2) consecutive cycles where metadata fills the write budget, skip metadata for one cycle to let data run. Reset the counter when nothing is written. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #18289
This commit is contained in:
parent
62ca8f721b
commit
b27a87f399
@ -469,6 +469,12 @@ typedef struct l2arc_dev {
|
||||
boolean_t l2ad_thread_exit; /* signal thread exit */
|
||||
kmutex_t l2ad_feed_thr_lock; /* thread sleep/wake */
|
||||
kcondvar_t l2ad_feed_cv; /* thread wakeup cv */
|
||||
/*
|
||||
* Consecutive cycles where metadata filled write budget
|
||||
* while data passes got nothing written. Used to detect
|
||||
* monopolization and skip metadata to give data a chance.
|
||||
*/
|
||||
uint64_t l2ad_meta_cycles;
|
||||
} l2arc_dev_t;
|
||||
|
||||
/*
|
||||
|
||||
@ -233,6 +233,13 @@ resetting its markers to the tail.
|
||||
Lower values keep the marker closer to the tail under active workloads.
|
||||
Set to 0 to disable the depth cap.
|
||||
.
|
||||
.It Sy l2arc_meta_cycles Ns = Ns Sy 2 Pq u64
|
||||
How many consecutive cycles metadata may monopolize the write budget
|
||||
before being skipped to let data run.
|
||||
The default of 2 gives metadata roughly 67% and data 33% of L2ARC
|
||||
write bandwidth under sustained load.
|
||||
Higher values favor metadata; set to 0 to disable.
|
||||
.
|
||||
.It Sy l2arc_write_max Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64
|
||||
Maximum write rate in bytes per second for each L2ARC device.
|
||||
Used directly during initial fill, when DWPD limiting is disabled,
|
||||
|
||||
@ -963,6 +963,15 @@ static int l2arc_mfuonly = 0;
|
||||
*/
|
||||
static uint64_t l2arc_ext_headroom_pct = 25;
|
||||
|
||||
/*
|
||||
* Metadata monopolization limit. When metadata fills the write budget
|
||||
* for this many consecutive cycles while data gets nothing, skip metadata
|
||||
* for one cycle to let data run, then reset the counter.
|
||||
* With N=2, the steady-state pattern under sustained monopolization is
|
||||
* 2 metadata cycles followed by 1 data cycle (67%/33% split).
|
||||
*/
|
||||
static uint64_t l2arc_meta_cycles = 2;
|
||||
|
||||
/*
|
||||
* L2ARC TRIM
|
||||
* l2arc_trim_ahead : A ZFS module parameter that controls how much ahead of
|
||||
@ -9998,6 +10007,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
/*
|
||||
* Copy buffers for L2ARC writing.
|
||||
*/
|
||||
boolean_t skip_meta = (save_position &&
|
||||
l2arc_meta_cycles > 0 &&
|
||||
dev->l2ad_meta_cycles >= l2arc_meta_cycles);
|
||||
if (skip_meta)
|
||||
dev->l2ad_meta_cycles = 0;
|
||||
|
||||
for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
|
||||
/*
|
||||
* pass == 0: MFU meta
|
||||
@ -10013,6 +10028,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (skip_meta && pass <= L2ARC_MRU_META)
|
||||
continue;
|
||||
|
||||
headroom = target_sz * l2arc_headroom;
|
||||
if (zfs_compressed_arc_enabled)
|
||||
headroom = (headroom * l2arc_headroom_boost) / 100;
|
||||
@ -10080,6 +10098,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
(spa->spa_l2arc_info.l2arc_next_sublist[pass] + 1) %
|
||||
num_sublists;
|
||||
|
||||
/*
|
||||
* Count consecutive metadata monopolization toward
|
||||
* l2arc_meta_cycles. Only count when metadata actually
|
||||
* filled the write budget, starving data passes.
|
||||
*/
|
||||
if (save_position && pass <= L2ARC_MRU_META && full)
|
||||
dev->l2ad_meta_cycles++;
|
||||
|
||||
/*
|
||||
* Depth cap: track cumulative bytes scanned per pass
|
||||
* and reset markers when the scan cap is reached.
|
||||
@ -10109,6 +10135,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If nothing was written at all, reset monopolization counter.
|
||||
* No point skipping metadata if data has nothing either.
|
||||
*/
|
||||
if (write_asize == 0)
|
||||
dev->l2ad_meta_cycles = 0;
|
||||
|
||||
/* No buffers selected for writing? */
|
||||
if (pio == NULL) {
|
||||
ASSERT0(write_psize);
|
||||
@ -11752,6 +11785,9 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
|
||||
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
|
||||
"Exclude dbufs on special vdevs from being cached to L2ARC if set.");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_cycles, U64, ZMOD_RW,
|
||||
"Consecutive metadata cycles before skipping to let data run");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, ext_headroom_pct, U64, ZMOD_RW,
|
||||
"Depth cap as percentage of state size for marker reset");
|
||||
|
||||
|
||||
@ -53,6 +53,7 @@ L2ARC_NOPREFETCH l2arc.noprefetch l2arc_noprefetch
|
||||
L2ARC_REBUILD_BLOCKS_MIN_L2SIZE l2arc.rebuild_blocks_min_l2size l2arc_rebuild_blocks_min_l2size
|
||||
L2ARC_REBUILD_ENABLED l2arc.rebuild_enabled l2arc_rebuild_enabled
|
||||
L2ARC_TRIM_AHEAD l2arc.trim_ahead l2arc_trim_ahead
|
||||
L2ARC_META_CYCLES l2arc.meta_cycles l2arc_meta_cycles
|
||||
L2ARC_WRITE_MAX l2arc.write_max l2arc_write_max
|
||||
LIVELIST_CONDENSE_NEW_ALLOC livelist.condense.new_alloc zfs_livelist_condense_new_alloc
|
||||
LIVELIST_CONDENSE_SYNC_CANCEL livelist.condense.sync_cancel zfs_livelist_condense_sync_cancel
|
||||
|
||||
Loading…
Reference in New Issue
Block a user