mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But for years people with metadata-centric workload demanded mechanisms to also manage data/metadata distribution, that in original ZFS was just a FIFO. As result ZFS effectively got separate states for data and metadata, minimum and maximum metadata limits etc, but it all required manual tuning, was not adaptive and in its heart remained a bad FIFO. This change removes most of existing eviction logic, rewriting it from scratch. This makes MRU/MFU adaptation individual for data and meta- data, same as the distribution between data and metadata themselves. Since most of required states separation was already done, it only required to make arcs_size state field specific per data/metadata. The adaptation logic is still based on previous concept of ghost hits, just now it balances ARC capacity between 4 states: MRU data, MRU metadata, MFU data and MFU metadata. To simplify arc_c changes instead of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd and arc_pm, representing ARC balance between metadata and data, MRU and MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed point fractions. Since we care about the math result only when need to evict, this moves all the logic from arc_adapt() to arc_evict(), that reduces per-block overhead, since per-block operations are limited to stats collection, now moved from arc_adapt() to arc_access() and using cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag from many places. This change also removes number of metadata specific tunables, part of which were actually not functioning correctly, since not all metadata are equal and some (like L2ARC headers) are not really evictable. Instead it introduced single opaque knob zfs_arc_meta_balance, tuning ARC's reaction on ghost hits, allowing administrator give more or less preference to metadata without setting strict limits. Some of old code parts like arc_evict_meta() are just removed, because since introduction of ABD ARC they really make no sense: only headers referenced by small number of buffers are not evictable, and they are really not evictable no matter what this code do. Instead just call arc_prune_async() if too much metadata appear not evictable. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #14359
This commit is contained in:
@@ -200,7 +200,6 @@ struct arc_buf {
|
||||
};
|
||||
|
||||
typedef enum arc_buf_contents {
|
||||
ARC_BUFC_INVALID, /* invalid type */
|
||||
ARC_BUFC_DATA, /* buffer contains data */
|
||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||
ARC_BUFC_NUMTYPES
|
||||
|
||||
+30
-13
@@ -82,15 +82,18 @@ typedef struct arc_state {
|
||||
* supports the "dbufs" kstat
|
||||
*/
|
||||
arc_state_type_t arcs_state;
|
||||
/*
|
||||
* total amount of data in this state.
|
||||
*/
|
||||
zfs_refcount_t arcs_size[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
|
||||
/*
|
||||
* total amount of evictable data in this state
|
||||
*/
|
||||
zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
|
||||
zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
|
||||
/*
|
||||
* total amount of data in this state; this includes: evictable,
|
||||
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
|
||||
* amount of hit bytes for this state (counted only for ghost states)
|
||||
*/
|
||||
zfs_refcount_t arcs_size;
|
||||
wmsum_t arcs_hits[ARC_BUFC_NUMTYPES];
|
||||
} arc_state_t;
|
||||
|
||||
typedef struct arc_callback arc_callback_t;
|
||||
@@ -358,8 +361,9 @@ typedef struct l2arc_lb_ptr_buf {
|
||||
#define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x)
|
||||
#define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8)
|
||||
#define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x)
|
||||
#define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8)
|
||||
#define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x)
|
||||
/* +/- 1 here are to keep compatibility after ARC_BUFC_INVALID removal. */
|
||||
#define L2BLK_GET_TYPE(field) (BF64_GET((field), 48, 8) - 1)
|
||||
#define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, (x) + 1)
|
||||
#define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1)
|
||||
#define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x)
|
||||
#define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4)
|
||||
@@ -582,7 +586,9 @@ typedef struct arc_stats {
|
||||
kstat_named_t arcstat_hash_collisions;
|
||||
kstat_named_t arcstat_hash_chains;
|
||||
kstat_named_t arcstat_hash_chain_max;
|
||||
kstat_named_t arcstat_p;
|
||||
kstat_named_t arcstat_meta;
|
||||
kstat_named_t arcstat_pd;
|
||||
kstat_named_t arcstat_pm;
|
||||
kstat_named_t arcstat_c;
|
||||
kstat_named_t arcstat_c_min;
|
||||
kstat_named_t arcstat_c_max;
|
||||
@@ -655,6 +661,8 @@ typedef struct arc_stats {
|
||||
* are all included in this value.
|
||||
*/
|
||||
kstat_named_t arcstat_anon_size;
|
||||
kstat_named_t arcstat_anon_data;
|
||||
kstat_named_t arcstat_anon_metadata;
|
||||
/*
|
||||
* Number of bytes consumed by ARC buffers that meet the
|
||||
* following criteria: backing buffers of type ARC_BUFC_DATA,
|
||||
@@ -676,6 +684,8 @@ typedef struct arc_stats {
|
||||
* are all included in this value.
|
||||
*/
|
||||
kstat_named_t arcstat_mru_size;
|
||||
kstat_named_t arcstat_mru_data;
|
||||
kstat_named_t arcstat_mru_metadata;
|
||||
/*
|
||||
* Number of bytes consumed by ARC buffers that meet the
|
||||
* following criteria: backing buffers of type ARC_BUFC_DATA,
|
||||
@@ -700,6 +710,8 @@ typedef struct arc_stats {
|
||||
* buffers *would have* consumed this number of bytes.
|
||||
*/
|
||||
kstat_named_t arcstat_mru_ghost_size;
|
||||
kstat_named_t arcstat_mru_ghost_data;
|
||||
kstat_named_t arcstat_mru_ghost_metadata;
|
||||
/*
|
||||
* Number of bytes that *would have been* consumed by ARC
|
||||
* buffers that are eligible for eviction, of type
|
||||
@@ -719,6 +731,8 @@ typedef struct arc_stats {
|
||||
* are all included in this value.
|
||||
*/
|
||||
kstat_named_t arcstat_mfu_size;
|
||||
kstat_named_t arcstat_mfu_data;
|
||||
kstat_named_t arcstat_mfu_metadata;
|
||||
/*
|
||||
* Number of bytes consumed by ARC buffers that are eligible for
|
||||
* eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
|
||||
@@ -737,6 +751,8 @@ typedef struct arc_stats {
|
||||
* arcstat_mru_ghost_size for more details.
|
||||
*/
|
||||
kstat_named_t arcstat_mfu_ghost_size;
|
||||
kstat_named_t arcstat_mfu_ghost_data;
|
||||
kstat_named_t arcstat_mfu_ghost_metadata;
|
||||
/*
|
||||
* Number of bytes that *would have been* consumed by ARC
|
||||
* buffers that are eligible for eviction, of type
|
||||
@@ -754,6 +770,8 @@ typedef struct arc_stats {
|
||||
* ARC_FLAG_UNCACHED being set.
|
||||
*/
|
||||
kstat_named_t arcstat_uncached_size;
|
||||
kstat_named_t arcstat_uncached_data;
|
||||
kstat_named_t arcstat_uncached_metadata;
|
||||
/*
|
||||
* Number of data bytes that are going to be evicted from ARC due to
|
||||
* ARC_FLAG_UNCACHED being set.
|
||||
@@ -876,10 +894,7 @@ typedef struct arc_stats {
|
||||
kstat_named_t arcstat_loaned_bytes;
|
||||
kstat_named_t arcstat_prune;
|
||||
kstat_named_t arcstat_meta_used;
|
||||
kstat_named_t arcstat_meta_limit;
|
||||
kstat_named_t arcstat_dnode_limit;
|
||||
kstat_named_t arcstat_meta_max;
|
||||
kstat_named_t arcstat_meta_min;
|
||||
kstat_named_t arcstat_async_upgrade_sync;
|
||||
/* Number of predictive prefetch requests. */
|
||||
kstat_named_t arcstat_predictive_prefetch;
|
||||
@@ -942,7 +957,7 @@ typedef struct arc_sums {
|
||||
wmsum_t arcstat_data_size;
|
||||
wmsum_t arcstat_metadata_size;
|
||||
wmsum_t arcstat_dbuf_size;
|
||||
aggsum_t arcstat_dnode_size;
|
||||
wmsum_t arcstat_dnode_size;
|
||||
wmsum_t arcstat_bonus_size;
|
||||
wmsum_t arcstat_l2_hits;
|
||||
wmsum_t arcstat_l2_misses;
|
||||
@@ -987,7 +1002,7 @@ typedef struct arc_sums {
|
||||
wmsum_t arcstat_memory_direct_count;
|
||||
wmsum_t arcstat_memory_indirect_count;
|
||||
wmsum_t arcstat_prune;
|
||||
aggsum_t arcstat_meta_used;
|
||||
wmsum_t arcstat_meta_used;
|
||||
wmsum_t arcstat_async_upgrade_sync;
|
||||
wmsum_t arcstat_predictive_prefetch;
|
||||
wmsum_t arcstat_demand_hit_predictive_prefetch;
|
||||
@@ -1015,7 +1030,9 @@ typedef struct arc_evict_waiter {
|
||||
#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
|
||||
|
||||
#define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
|
||||
#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
|
||||
#define arc_meta ARCSTAT(arcstat_meta) /* target frac of metadata */
|
||||
#define arc_pd ARCSTAT(arcstat_pd) /* target frac of data MRU */
|
||||
#define arc_pm ARCSTAT(arcstat_pm) /* target frac of meta MRU */
|
||||
#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
|
||||
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
|
||||
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
|
||||
|
||||
Reference in New Issue
Block a user