Use wmsum for arc, abd, dbuf and zfetch statistics. (#12172)

wmsum was designed exactly for cases like these with many updates
and rare reads.  It allows to completely avoid atomic operations on
congested global variables.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #12172
This commit is contained in:
Alexander Motin 2021-06-16 20:19:34 -04:00 committed by GitHub
parent 9ffcaa370a
commit c4c162c1e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 777 additions and 215 deletions

View File

@ -27,6 +27,7 @@
#define _ABD_IMPL_H #define _ABD_IMPL_H
#include <sys/abd.h> #include <sys/abd.h>
#include <sys/wmsum.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -82,9 +83,8 @@ void abd_iter_unmap(struct abd_iter *);
/* /*
* Helper macros * Helper macros
*/ */
#define ABDSTAT(stat) (abd_stats.stat.value.ui64)
#define ABDSTAT_INCR(stat, val) \ #define ABDSTAT_INCR(stat, val) \
atomic_add_64(&abd_stats.stat.value.ui64, (val)) wmsum_add(&abd_sums.stat, (val))
#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) #define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1)
#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) #define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1)

View File

@ -33,6 +33,7 @@
#include <sys/zio_crypt.h> #include <sys/zio_crypt.h>
#include <sys/zthr.h> #include <sys/zthr.h>
#include <sys/aggsum.h> #include <sys/aggsum.h>
#include <sys/wmsum.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -563,7 +564,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_c; kstat_named_t arcstat_c;
kstat_named_t arcstat_c_min; kstat_named_t arcstat_c_min;
kstat_named_t arcstat_c_max; kstat_named_t arcstat_c_max;
/* Not updated directly; only synced in arc_kstat_update. */
kstat_named_t arcstat_size; kstat_named_t arcstat_size;
/* /*
* Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
@ -592,14 +592,12 @@ typedef struct arc_stats {
* (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
* caches), and arc_buf_t structures (allocated via arc_buf_t * caches), and arc_buf_t structures (allocated via arc_buf_t
* cache). * cache).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_hdr_size; kstat_named_t arcstat_hdr_size;
/* /*
* Number of bytes consumed by ARC buffers of type equal to * Number of bytes consumed by ARC buffers of type equal to
* ARC_BUFC_DATA. This is generally consumed by buffers backing * ARC_BUFC_DATA. This is generally consumed by buffers backing
* on disk user data (e.g. plain file contents). * on disk user data (e.g. plain file contents).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_data_size; kstat_named_t arcstat_data_size;
/* /*
@ -607,22 +605,18 @@ typedef struct arc_stats {
* ARC_BUFC_METADATA. This is generally consumed by buffers * ARC_BUFC_METADATA. This is generally consumed by buffers
* backing on disk data that is used for internal ZFS * backing on disk data that is used for internal ZFS
* structures (e.g. ZAP, dnode, indirect blocks, etc). * structures (e.g. ZAP, dnode, indirect blocks, etc).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_metadata_size; kstat_named_t arcstat_metadata_size;
/* /*
* Number of bytes consumed by dmu_buf_impl_t objects. * Number of bytes consumed by dmu_buf_impl_t objects.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_dbuf_size; kstat_named_t arcstat_dbuf_size;
/* /*
* Number of bytes consumed by dnode_t objects. * Number of bytes consumed by dnode_t objects.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_dnode_size; kstat_named_t arcstat_dnode_size;
/* /*
* Number of bytes consumed by bonus buffers. * Number of bytes consumed by bonus buffers.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_bonus_size; kstat_named_t arcstat_bonus_size;
#if defined(COMPAT_FREEBSD11) #if defined(COMPAT_FREEBSD11)
@ -637,7 +631,6 @@ typedef struct arc_stats {
* arc_anon state. This includes *all* buffers in the arc_anon * arc_anon state. This includes *all* buffers in the arc_anon
* state; e.g. data, metadata, evictable, and unevictable buffers * state; e.g. data, metadata, evictable, and unevictable buffers
* are all included in this value. * are all included in this value.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_anon_size; kstat_named_t arcstat_anon_size;
/* /*
@ -645,7 +638,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_DATA, * following criteria: backing buffers of type ARC_BUFC_DATA,
* residing in the arc_anon state, and are eligible for eviction * residing in the arc_anon state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer). * (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_anon_evictable_data; kstat_named_t arcstat_anon_evictable_data;
/* /*
@ -653,7 +645,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_METADATA, * following criteria: backing buffers of type ARC_BUFC_METADATA,
* residing in the arc_anon state, and are eligible for eviction * residing in the arc_anon state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer). * (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_anon_evictable_metadata; kstat_named_t arcstat_anon_evictable_metadata;
/* /*
@ -661,7 +652,6 @@ typedef struct arc_stats {
* arc_mru state. This includes *all* buffers in the arc_mru * arc_mru state. This includes *all* buffers in the arc_mru
* state; e.g. data, metadata, evictable, and unevictable buffers * state; e.g. data, metadata, evictable, and unevictable buffers
* are all included in this value. * are all included in this value.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mru_size; kstat_named_t arcstat_mru_size;
/* /*
@ -669,7 +659,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_DATA, * following criteria: backing buffers of type ARC_BUFC_DATA,
* residing in the arc_mru state, and are eligible for eviction * residing in the arc_mru state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer). * (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mru_evictable_data; kstat_named_t arcstat_mru_evictable_data;
/* /*
@ -677,7 +666,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_METADATA, * following criteria: backing buffers of type ARC_BUFC_METADATA,
* residing in the arc_mru state, and are eligible for eviction * residing in the arc_mru state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer). * (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mru_evictable_metadata; kstat_named_t arcstat_mru_evictable_metadata;
/* /*
@ -688,21 +676,18 @@ typedef struct arc_stats {
* don't actually have ARC buffers linked off of these headers. * don't actually have ARC buffers linked off of these headers.
* Thus, *if* the headers had associated ARC buffers, these * Thus, *if* the headers had associated ARC buffers, these
* buffers *would have* consumed this number of bytes. * buffers *would have* consumed this number of bytes.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mru_ghost_size; kstat_named_t arcstat_mru_ghost_size;
/* /*
* Number of bytes that *would have been* consumed by ARC * Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type * buffers that are eligible for eviction, of type
* ARC_BUFC_DATA, and linked off the arc_mru_ghost state. * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mru_ghost_evictable_data; kstat_named_t arcstat_mru_ghost_evictable_data;
/* /*
* Number of bytes that *would have been* consumed by ARC * Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type * buffers that are eligible for eviction, of type
* ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mru_ghost_evictable_metadata; kstat_named_t arcstat_mru_ghost_evictable_metadata;
/* /*
@ -710,42 +695,36 @@ typedef struct arc_stats {
* arc_mfu state. This includes *all* buffers in the arc_mfu * arc_mfu state. This includes *all* buffers in the arc_mfu
* state; e.g. data, metadata, evictable, and unevictable buffers * state; e.g. data, metadata, evictable, and unevictable buffers
* are all included in this value. * are all included in this value.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mfu_size; kstat_named_t arcstat_mfu_size;
/* /*
* Number of bytes consumed by ARC buffers that are eligible for * Number of bytes consumed by ARC buffers that are eligible for
* eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
* state. * state.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mfu_evictable_data; kstat_named_t arcstat_mfu_evictable_data;
/* /*
* Number of bytes consumed by ARC buffers that are eligible for * Number of bytes consumed by ARC buffers that are eligible for
* eviction, of type ARC_BUFC_METADATA, and reside in the * eviction, of type ARC_BUFC_METADATA, and reside in the
* arc_mfu state. * arc_mfu state.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mfu_evictable_metadata; kstat_named_t arcstat_mfu_evictable_metadata;
/* /*
* Total number of bytes that *would have been* consumed by ARC * Total number of bytes that *would have been* consumed by ARC
* buffers in the arc_mfu_ghost state. See the comment above * buffers in the arc_mfu_ghost state. See the comment above
* arcstat_mru_ghost_size for more details. * arcstat_mru_ghost_size for more details.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mfu_ghost_size; kstat_named_t arcstat_mfu_ghost_size;
/* /*
* Number of bytes that *would have been* consumed by ARC * Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type * buffers that are eligible for eviction, of type
* ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mfu_ghost_evictable_data; kstat_named_t arcstat_mfu_ghost_evictable_data;
/* /*
* Number of bytes that *would have been* consumed by ARC * Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type * buffers that are eligible for eviction, of type
* ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/ */
kstat_named_t arcstat_mfu_ghost_evictable_metadata; kstat_named_t arcstat_mfu_ghost_evictable_metadata;
kstat_named_t arcstat_l2_hits; kstat_named_t arcstat_l2_hits;
@ -779,7 +758,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_io_error; kstat_named_t arcstat_l2_io_error;
kstat_named_t arcstat_l2_lsize; kstat_named_t arcstat_l2_lsize;
kstat_named_t arcstat_l2_psize; kstat_named_t arcstat_l2_psize;
/* Not updated directly; only synced in arc_kstat_update. */
kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_l2_hdr_size;
/* /*
* Number of L2ARC log blocks written. These are used for restoring the * Number of L2ARC log blocks written. These are used for restoring the
@ -860,7 +838,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_tempreserve; kstat_named_t arcstat_tempreserve;
kstat_named_t arcstat_loaned_bytes; kstat_named_t arcstat_loaned_bytes;
kstat_named_t arcstat_prune; kstat_named_t arcstat_prune;
/* Not updated directly; only synced in arc_kstat_update. */
kstat_named_t arcstat_meta_used; kstat_named_t arcstat_meta_used;
kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_dnode_limit; kstat_named_t arcstat_dnode_limit;
@ -876,6 +853,96 @@ typedef struct arc_stats {
kstat_named_t arcstat_abd_chunk_waste_size; kstat_named_t arcstat_abd_chunk_waste_size;
} arc_stats_t; } arc_stats_t;
typedef struct arc_sums {
wmsum_t arcstat_hits;
wmsum_t arcstat_misses;
wmsum_t arcstat_demand_data_hits;
wmsum_t arcstat_demand_data_misses;
wmsum_t arcstat_demand_metadata_hits;
wmsum_t arcstat_demand_metadata_misses;
wmsum_t arcstat_prefetch_data_hits;
wmsum_t arcstat_prefetch_data_misses;
wmsum_t arcstat_prefetch_metadata_hits;
wmsum_t arcstat_prefetch_metadata_misses;
wmsum_t arcstat_mru_hits;
wmsum_t arcstat_mru_ghost_hits;
wmsum_t arcstat_mfu_hits;
wmsum_t arcstat_mfu_ghost_hits;
wmsum_t arcstat_deleted;
wmsum_t arcstat_mutex_miss;
wmsum_t arcstat_access_skip;
wmsum_t arcstat_evict_skip;
wmsum_t arcstat_evict_not_enough;
wmsum_t arcstat_evict_l2_cached;
wmsum_t arcstat_evict_l2_eligible;
wmsum_t arcstat_evict_l2_eligible_mfu;
wmsum_t arcstat_evict_l2_eligible_mru;
wmsum_t arcstat_evict_l2_ineligible;
wmsum_t arcstat_evict_l2_skip;
wmsum_t arcstat_hash_collisions;
wmsum_t arcstat_hash_chains;
aggsum_t arcstat_size;
wmsum_t arcstat_compressed_size;
wmsum_t arcstat_uncompressed_size;
wmsum_t arcstat_overhead_size;
wmsum_t arcstat_hdr_size;
wmsum_t arcstat_data_size;
wmsum_t arcstat_metadata_size;
wmsum_t arcstat_dbuf_size;
aggsum_t arcstat_dnode_size;
wmsum_t arcstat_bonus_size;
wmsum_t arcstat_l2_hits;
wmsum_t arcstat_l2_misses;
wmsum_t arcstat_l2_prefetch_asize;
wmsum_t arcstat_l2_mru_asize;
wmsum_t arcstat_l2_mfu_asize;
wmsum_t arcstat_l2_bufc_data_asize;
wmsum_t arcstat_l2_bufc_metadata_asize;
wmsum_t arcstat_l2_feeds;
wmsum_t arcstat_l2_rw_clash;
wmsum_t arcstat_l2_read_bytes;
wmsum_t arcstat_l2_write_bytes;
wmsum_t arcstat_l2_writes_sent;
wmsum_t arcstat_l2_writes_done;
wmsum_t arcstat_l2_writes_error;
wmsum_t arcstat_l2_writes_lock_retry;
wmsum_t arcstat_l2_evict_lock_retry;
wmsum_t arcstat_l2_evict_reading;
wmsum_t arcstat_l2_evict_l1cached;
wmsum_t arcstat_l2_free_on_write;
wmsum_t arcstat_l2_abort_lowmem;
wmsum_t arcstat_l2_cksum_bad;
wmsum_t arcstat_l2_io_error;
wmsum_t arcstat_l2_lsize;
wmsum_t arcstat_l2_psize;
aggsum_t arcstat_l2_hdr_size;
wmsum_t arcstat_l2_log_blk_writes;
wmsum_t arcstat_l2_log_blk_asize;
wmsum_t arcstat_l2_log_blk_count;
wmsum_t arcstat_l2_rebuild_success;
wmsum_t arcstat_l2_rebuild_abort_unsupported;
wmsum_t arcstat_l2_rebuild_abort_io_errors;
wmsum_t arcstat_l2_rebuild_abort_dh_errors;
wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors;
wmsum_t arcstat_l2_rebuild_abort_lowmem;
wmsum_t arcstat_l2_rebuild_size;
wmsum_t arcstat_l2_rebuild_asize;
wmsum_t arcstat_l2_rebuild_bufs;
wmsum_t arcstat_l2_rebuild_bufs_precached;
wmsum_t arcstat_l2_rebuild_log_blks;
wmsum_t arcstat_memory_throttle_count;
wmsum_t arcstat_memory_direct_count;
wmsum_t arcstat_memory_indirect_count;
wmsum_t arcstat_prune;
aggsum_t arcstat_meta_used;
wmsum_t arcstat_async_upgrade_sync;
wmsum_t arcstat_demand_hit_predictive_prefetch;
wmsum_t arcstat_demand_hit_prescient_prefetch;
wmsum_t arcstat_raw_size;
wmsum_t arcstat_cached_only_in_progress;
wmsum_t arcstat_abd_chunk_waste_size;
} arc_sums_t;
typedef struct arc_evict_waiter { typedef struct arc_evict_waiter {
list_node_t aew_node; list_node_t aew_node;
kcondvar_t aew_cv; kcondvar_t aew_cv;
@ -885,7 +952,7 @@ typedef struct arc_evict_waiter {
#define ARCSTAT(stat) (arc_stats.stat.value.ui64) #define ARCSTAT(stat) (arc_stats.stat.value.ui64)
#define ARCSTAT_INCR(stat, val) \ #define ARCSTAT_INCR(stat, val) \
atomic_add_64(&arc_stats.stat.value.ui64, (val)) wmsum_add(&arc_sums.stat, (val))
#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
@ -899,6 +966,7 @@ typedef struct arc_evict_waiter {
extern taskq_t *arc_prune_taskq; extern taskq_t *arc_prune_taskq;
extern arc_stats_t arc_stats; extern arc_stats_t arc_stats;
extern arc_sums_t arc_sums;
extern hrtime_t arc_growtime; extern hrtime_t arc_growtime;
extern boolean_t arc_warm; extern boolean_t arc_warm;
extern int arc_grow_retry; extern int arc_grow_retry;
@ -906,7 +974,6 @@ extern int arc_no_grow_shift;
extern int arc_shrink_shift; extern int arc_shrink_shift;
extern kmutex_t arc_prune_mtx; extern kmutex_t arc_prune_mtx;
extern list_t arc_prune_list; extern list_t arc_prune_list;
extern aggsum_t arc_size;
extern arc_state_t *arc_mfu; extern arc_state_t *arc_mfu;
extern arc_state_t *arc_mru; extern arc_state_t *arc_mru;
extern uint_t zfs_arc_pc_percent; extern uint_t zfs_arc_pc_percent;

View File

@ -69,6 +69,15 @@ static abd_stats_t abd_stats = {
{ "linear_data_size", KSTAT_DATA_UINT64 }, { "linear_data_size", KSTAT_DATA_UINT64 },
}; };
struct {
wmsum_t abdstat_struct_size;
wmsum_t abdstat_scatter_cnt;
wmsum_t abdstat_scatter_data_size;
wmsum_t abdstat_scatter_chunk_waste;
wmsum_t abdstat_linear_cnt;
wmsum_t abdstat_linear_data_size;
} abd_sums;
/* /*
* The size of the chunks ABD allocates. Because the sizes allocated from the * The size of the chunks ABD allocates. Because the sizes allocated from the
* kmem_cache can't change, this tunable can only be modified at boot. Changing * kmem_cache can't change, this tunable can only be modified at boot. Changing
@ -271,16 +280,46 @@ abd_free_zero_scatter(void)
kmem_free(abd_zero_buf, zfs_abd_chunk_size); kmem_free(abd_zero_buf, zfs_abd_chunk_size);
} }
static int
abd_kstats_update(kstat_t *ksp, int rw)
{
abd_stats_t *as = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
as->abdstat_struct_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_struct_size);
as->abdstat_scatter_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_cnt);
as->abdstat_scatter_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_data_size);
as->abdstat_scatter_chunk_waste.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
as->abdstat_linear_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_cnt);
as->abdstat_linear_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_data_size);
return (0);
}
void void
abd_init(void) abd_init(void)
{ {
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG); NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
wmsum_init(&abd_sums.abdstat_struct_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
if (abd_ksp != NULL) { if (abd_ksp != NULL) {
abd_ksp->ks_data = &abd_stats; abd_ksp->ks_data = &abd_stats;
abd_ksp->ks_update = abd_kstats_update;
kstat_install(abd_ksp); kstat_install(abd_ksp);
} }
@ -297,6 +336,13 @@ abd_fini(void)
abd_ksp = NULL; abd_ksp = NULL;
} }
wmsum_fini(&abd_sums.abdstat_struct_size);
wmsum_fini(&abd_sums.abdstat_scatter_cnt);
wmsum_fini(&abd_sums.abdstat_scatter_data_size);
wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
wmsum_fini(&abd_sums.abdstat_linear_cnt);
wmsum_fini(&abd_sums.abdstat_linear_data_size);
kmem_cache_destroy(abd_chunk_cache); kmem_cache_destroy(abd_chunk_cache);
abd_chunk_cache = NULL; abd_chunk_cache = NULL;
} }

View File

@ -132,6 +132,20 @@ static abd_stats_t abd_stats = {
{ "scatter_sg_table_retry", KSTAT_DATA_UINT64 }, { "scatter_sg_table_retry", KSTAT_DATA_UINT64 },
}; };
struct {
wmsum_t abdstat_struct_size;
wmsum_t abdstat_linear_cnt;
wmsum_t abdstat_linear_data_size;
wmsum_t abdstat_scatter_cnt;
wmsum_t abdstat_scatter_data_size;
wmsum_t abdstat_scatter_chunk_waste;
wmsum_t abdstat_scatter_orders[MAX_ORDER];
wmsum_t abdstat_scatter_page_multi_chunk;
wmsum_t abdstat_scatter_page_multi_zone;
wmsum_t abdstat_scatter_page_alloc_retry;
wmsum_t abdstat_scatter_sg_table_retry;
} abd_sums;
#define abd_for_each_sg(abd, sg, n, i) \ #define abd_for_each_sg(abd, sg, n, i) \
for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i) for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
@ -687,6 +701,40 @@ abd_free_zero_scatter(void)
#endif /* _KERNEL */ #endif /* _KERNEL */
} }
static int
abd_kstats_update(kstat_t *ksp, int rw)
{
abd_stats_t *as = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
as->abdstat_struct_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_struct_size);
as->abdstat_linear_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_cnt);
as->abdstat_linear_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_data_size);
as->abdstat_scatter_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_cnt);
as->abdstat_scatter_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_data_size);
as->abdstat_scatter_chunk_waste.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
for (int i = 0; i < MAX_ORDER; i++) {
as->abdstat_scatter_orders[i].value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_orders[i]);
}
as->abdstat_scatter_page_multi_chunk.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_page_multi_chunk);
as->abdstat_scatter_page_multi_zone.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_page_multi_zone);
as->abdstat_scatter_page_alloc_retry.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_page_alloc_retry);
as->abdstat_scatter_sg_table_retry.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_sg_table_retry);
return (0);
}
void void
abd_init(void) abd_init(void)
{ {
@ -695,6 +743,19 @@ abd_init(void)
abd_cache = kmem_cache_create("abd_t", sizeof (abd_t), abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
0, NULL, NULL, NULL, NULL, NULL, 0); 0, NULL, NULL, NULL, NULL, NULL, 0);
wmsum_init(&abd_sums.abdstat_struct_size, 0);
wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
for (i = 0; i < MAX_ORDER; i++)
wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0);
wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0);
wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0);
wmsum_init(&abd_sums.abdstat_scatter_page_alloc_retry, 0);
wmsum_init(&abd_sums.abdstat_scatter_sg_table_retry, 0);
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
if (abd_ksp != NULL) { if (abd_ksp != NULL) {
@ -705,6 +766,7 @@ abd_init(void)
KSTAT_DATA_UINT64; KSTAT_DATA_UINT64;
} }
abd_ksp->ks_data = &abd_stats; abd_ksp->ks_data = &abd_stats;
abd_ksp->ks_update = abd_kstats_update;
kstat_install(abd_ksp); kstat_install(abd_ksp);
} }
@ -721,6 +783,19 @@ abd_fini(void)
abd_ksp = NULL; abd_ksp = NULL;
} }
wmsum_fini(&abd_sums.abdstat_struct_size);
wmsum_fini(&abd_sums.abdstat_linear_cnt);
wmsum_fini(&abd_sums.abdstat_linear_data_size);
wmsum_fini(&abd_sums.abdstat_scatter_cnt);
wmsum_fini(&abd_sums.abdstat_scatter_data_size);
wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
for (int i = 0; i < MAX_ORDER; i++)
wmsum_fini(&abd_sums.abdstat_scatter_orders[i]);
wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk);
wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone);
wmsum_fini(&abd_sums.abdstat_scatter_page_alloc_retry);
wmsum_fini(&abd_sums.abdstat_scatter_sg_table_retry);
if (abd_cache) { if (abd_cache) {
kmem_cache_destroy(abd_cache); kmem_cache_destroy(abd_cache);
abd_cache = NULL; abd_cache = NULL;

View File

@ -135,7 +135,7 @@ arc_available_memory(void)
static uint64_t static uint64_t
arc_evictable_memory(void) arc_evictable_memory(void)
{ {
int64_t asize = aggsum_value(&arc_size); int64_t asize = aggsum_value(&arc_sums.arcstat_size);
uint64_t arc_clean = uint64_t arc_clean =
zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) + zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) +
zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) +

View File

@ -600,6 +600,8 @@ arc_stats_t arc_stats = {
{ "abd_chunk_waste_size", KSTAT_DATA_UINT64 }, { "abd_chunk_waste_size", KSTAT_DATA_UINT64 },
}; };
arc_sums_t arc_sums;
#define ARCSTAT_MAX(stat, val) { \ #define ARCSTAT_MAX(stat, val) { \
uint64_t m; \ uint64_t m; \
while ((val) > (m = arc_stats.stat.value.ui64) && \ while ((val) > (m = arc_stats.stat.value.ui64) && \
@ -607,9 +609,6 @@ arc_stats_t arc_stats = {
continue; \ continue; \
} }
#define ARCSTAT_MAXSTAT(stat) \
ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
/* /*
* We define a macro to allow ARC hits/misses to be easily broken down by * We define a macro to allow ARC hits/misses to be easily broken down by
* two separate conditions, giving a total of four different subtypes for * two separate conditions, giving a total of four different subtypes for
@ -671,37 +670,8 @@ arc_state_t *arc_mfu;
/* max size for dnodes */ /* max size for dnodes */
#define arc_dnode_size_limit ARCSTAT(arcstat_dnode_limit) #define arc_dnode_size_limit ARCSTAT(arcstat_dnode_limit)
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* waiting to be evicted */ #define arc_need_free ARCSTAT(arcstat_need_free) /* waiting to be evicted */
/* size of all b_rabd's in entire arc */
#define arc_raw_size ARCSTAT(arcstat_raw_size)
/* compressed size of entire arc */
#define arc_compressed_size ARCSTAT(arcstat_compressed_size)
/* uncompressed size of entire arc */
#define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size)
/* number of bytes in the arc from arc_buf_t's */
#define arc_overhead_size ARCSTAT(arcstat_overhead_size)
/*
* There are also some ARC variables that we want to export, but that are
* updated so often that having the canonical representation be the statistic
* variable causes a performance bottleneck. We want to use aggsum_t's for these
* instead, but still be able to export the kstat in the same way as before.
* The solution is to always use the aggsum version, except in the kstat update
* callback.
*/
aggsum_t arc_size;
aggsum_t arc_meta_used;
wmsum_t astat_data_size;
wmsum_t astat_metadata_size;
wmsum_t astat_dbuf_size;
aggsum_t astat_dnode_size;
wmsum_t astat_bonus_size;
wmsum_t astat_hdr_size;
aggsum_t astat_l2_hdr_size;
wmsum_t astat_abd_chunk_waste_size;
hrtime_t arc_growtime; hrtime_t arc_growtime;
list_t arc_prune_list; list_t arc_prune_list;
kmutex_t arc_prune_mtx; kmutex_t arc_prune_mtx;
@ -819,9 +789,6 @@ uint64_t zfs_crc64_table[256];
*/ */
#define L2ARC_FEED_TYPES 4 #define L2ARC_FEED_TYPES 4
#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
/* L2ARC Performance Tunables */ /* L2ARC Performance Tunables */
unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
@ -1085,9 +1052,9 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
ARCSTAT_MAX(arcstat_hash_chain_max, i); ARCSTAT_MAX(arcstat_hash_chain_max, i);
} }
uint64_t he = atomic_inc_64_nv(
ARCSTAT_BUMP(arcstat_hash_elements); &arc_stats.arcstat_hash_elements.value.ui64);
ARCSTAT_MAXSTAT(arcstat_hash_elements); ARCSTAT_MAX(arcstat_hash_elements_max, he);
return (NULL); return (NULL);
} }
@ -1111,7 +1078,7 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE); arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
/* collect some hash table performance data */ /* collect some hash table performance data */
ARCSTAT_BUMPDOWN(arcstat_hash_elements); atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64);
if (buf_hash_table.ht_table[idx] && if (buf_hash_table.ht_table[idx] &&
buf_hash_table.ht_table[idx]->b_hash_next == NULL) buf_hash_table.ht_table[idx]->b_hash_next == NULL)
@ -2646,25 +2613,25 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
default: default:
break; break;
case ARC_SPACE_DATA: case ARC_SPACE_DATA:
wmsum_add(&astat_data_size, space); ARCSTAT_INCR(arcstat_data_size, space);
break; break;
case ARC_SPACE_META: case ARC_SPACE_META:
wmsum_add(&astat_metadata_size, space); ARCSTAT_INCR(arcstat_metadata_size, space);
break; break;
case ARC_SPACE_BONUS: case ARC_SPACE_BONUS:
wmsum_add(&astat_bonus_size, space); ARCSTAT_INCR(arcstat_bonus_size, space);
break; break;
case ARC_SPACE_DNODE: case ARC_SPACE_DNODE:
aggsum_add(&astat_dnode_size, space); aggsum_add(&arc_sums.arcstat_dnode_size, space);
break; break;
case ARC_SPACE_DBUF: case ARC_SPACE_DBUF:
wmsum_add(&astat_dbuf_size, space); ARCSTAT_INCR(arcstat_dbuf_size, space);
break; break;
case ARC_SPACE_HDRS: case ARC_SPACE_HDRS:
wmsum_add(&astat_hdr_size, space); ARCSTAT_INCR(arcstat_hdr_size, space);
break; break;
case ARC_SPACE_L2HDRS: case ARC_SPACE_L2HDRS:
aggsum_add(&astat_l2_hdr_size, space); aggsum_add(&arc_sums.arcstat_l2_hdr_size, space);
break; break;
case ARC_SPACE_ABD_CHUNK_WASTE: case ARC_SPACE_ABD_CHUNK_WASTE:
/* /*
@ -2673,14 +2640,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
* scatter ABD's come from the ARC, because other users are * scatter ABD's come from the ARC, because other users are
* very short-lived. * very short-lived.
*/ */
wmsum_add(&astat_abd_chunk_waste_size, space); ARCSTAT_INCR(arcstat_abd_chunk_waste_size, space);
break; break;
} }
if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE)
aggsum_add(&arc_meta_used, space); aggsum_add(&arc_sums.arcstat_meta_used, space);
aggsum_add(&arc_size, space); aggsum_add(&arc_sums.arcstat_size, space);
} }
void void
@ -2692,45 +2659,41 @@ arc_space_return(uint64_t space, arc_space_type_t type)
default: default:
break; break;
case ARC_SPACE_DATA: case ARC_SPACE_DATA:
wmsum_add(&astat_data_size, -space); ARCSTAT_INCR(arcstat_data_size, -space);
break; break;
case ARC_SPACE_META: case ARC_SPACE_META:
wmsum_add(&astat_metadata_size, -space); ARCSTAT_INCR(arcstat_metadata_size, -space);
break; break;
case ARC_SPACE_BONUS: case ARC_SPACE_BONUS:
wmsum_add(&astat_bonus_size, -space); ARCSTAT_INCR(arcstat_bonus_size, -space);
break; break;
case ARC_SPACE_DNODE: case ARC_SPACE_DNODE:
aggsum_add(&astat_dnode_size, -space); aggsum_add(&arc_sums.arcstat_dnode_size, -space);
break; break;
case ARC_SPACE_DBUF: case ARC_SPACE_DBUF:
wmsum_add(&astat_dbuf_size, -space); ARCSTAT_INCR(arcstat_dbuf_size, -space);
break; break;
case ARC_SPACE_HDRS: case ARC_SPACE_HDRS:
wmsum_add(&astat_hdr_size, -space); ARCSTAT_INCR(arcstat_hdr_size, -space);
break; break;
case ARC_SPACE_L2HDRS: case ARC_SPACE_L2HDRS:
aggsum_add(&astat_l2_hdr_size, -space); aggsum_add(&arc_sums.arcstat_l2_hdr_size, -space);
break; break;
case ARC_SPACE_ABD_CHUNK_WASTE: case ARC_SPACE_ABD_CHUNK_WASTE:
wmsum_add(&astat_abd_chunk_waste_size, -space); ARCSTAT_INCR(arcstat_abd_chunk_waste_size, -space);
break; break;
} }
if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) { if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) {
ASSERT(aggsum_compare(&arc_meta_used, space) >= 0); ASSERT(aggsum_compare(&arc_sums.arcstat_meta_used,
/* space) >= 0);
* We use the upper bound here rather than the precise value ARCSTAT_MAX(arcstat_meta_max,
* because the arc_meta_max value doesn't need to be aggsum_upper_bound(&arc_sums.arcstat_meta_used));
* precise. It's only consumed by humans via arcstats. aggsum_add(&arc_sums.arcstat_meta_used, -space);
*/
if (arc_meta_max < aggsum_upper_bound(&arc_meta_used))
arc_meta_max = aggsum_upper_bound(&arc_meta_used);
aggsum_add(&arc_meta_used, -space);
} }
ASSERT(aggsum_compare(&arc_size, space) >= 0); ASSERT(aggsum_compare(&arc_sums.arcstat_size, space) >= 0);
aggsum_add(&arc_size, -space); aggsum_add(&arc_sums.arcstat_size, -space);
} }
/* /*
@ -4246,9 +4209,10 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* Request that 10% of the LRUs be scanned by the superblock * Request that 10% of the LRUs be scanned by the superblock
* shrinker. * shrinker.
*/ */
if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size, if (type == ARC_BUFC_DATA && aggsum_compare(
arc_dnode_size_limit) > 0) { &arc_sums.arcstat_dnode_size, arc_dnode_size_limit) > 0) {
arc_prune_async((aggsum_upper_bound(&astat_dnode_size) - arc_prune_async((aggsum_upper_bound(
&arc_sums.arcstat_dnode_size) -
arc_dnode_size_limit) / sizeof (dnode_t) / arc_dnode_size_limit) / sizeof (dnode_t) /
zfs_arc_dnode_reduce_percent); zfs_arc_dnode_reduce_percent);
} }
@ -4478,7 +4442,7 @@ restart:
} }
/* /*
* Evict metadata buffers from the cache, such that arc_meta_used is * Evict metadata buffers from the cache, such that arcstat_meta_used is
* capped by the arc_meta_limit tunable. * capped by the arc_meta_limit tunable.
*/ */
static uint64_t static uint64_t
@ -4599,7 +4563,7 @@ arc_evict_type(arc_state_t *state)
} }
/* /*
* Evict buffers from the cache, such that arc_size is capped by arc_c. * Evict buffers from the cache, such that arcstat_size is capped by arc_c.
*/ */
static uint64_t static uint64_t
arc_evict(void) arc_evict(void)
@ -4607,8 +4571,8 @@ arc_evict(void)
uint64_t total_evicted = 0; uint64_t total_evicted = 0;
uint64_t bytes; uint64_t bytes;
int64_t target; int64_t target;
uint64_t asize = aggsum_value(&arc_size); uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
uint64_t ameta = aggsum_value(&arc_meta_used); uint64_t ameta = aggsum_value(&arc_sums.arcstat_meta_used);
/* /*
* If we're over arc_meta_limit, we want to correct that before * If we're over arc_meta_limit, we want to correct that before
@ -4668,8 +4632,8 @@ arc_evict(void)
/* /*
* Re-sum ARC stats after the first round of evictions. * Re-sum ARC stats after the first round of evictions.
*/ */
asize = aggsum_value(&arc_size); asize = aggsum_value(&arc_sums.arcstat_size);
ameta = aggsum_value(&arc_meta_used); ameta = aggsum_value(&arc_sums.arcstat_meta_used);
/* /*
@ -4783,7 +4747,7 @@ arc_flush(spa_t *spa, boolean_t retry)
void void
arc_reduce_target_size(int64_t to_free) arc_reduce_target_size(int64_t to_free)
{ {
uint64_t asize = aggsum_value(&arc_size); uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
/* /*
* All callers want the ARC to actually evict (at least) this much * All callers want the ARC to actually evict (at least) this much
@ -4836,8 +4800,8 @@ arc_kmem_reap_soon(void)
extern kmem_cache_t *zio_data_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[];
#ifdef _KERNEL #ifdef _KERNEL
if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) && if ((aggsum_compare(&arc_sums.arcstat_meta_used,
zfs_arc_meta_prune) { arc_meta_limit) >= 0) && zfs_arc_meta_prune) {
/* /*
* We are exceeding our meta-data cache limit. * We are exceeding our meta-data cache limit.
* Prune some entries to release holds on meta-data. * Prune some entries to release holds on meta-data.
@ -4940,7 +4904,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)
*/ */
mutex_enter(&arc_evict_lock); mutex_enter(&arc_evict_lock);
arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) && arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) &&
evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0; evicted > 0 && aggsum_compare(&arc_sums.arcstat_size, arc_c) > 0;
if (!arc_evict_needed) { if (!arc_evict_needed) {
/* /*
* We're either no longer overflowing, or we * We're either no longer overflowing, or we
@ -5153,7 +5117,7 @@ arc_adapt(int bytes, arc_state_t *state)
* cache size, increment the target cache size * cache size, increment the target cache size
*/ */
ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT); ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT);
if (aggsum_upper_bound(&arc_size) >= if (aggsum_upper_bound(&arc_sums.arcstat_size) >=
arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) { arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
atomic_add_64(&arc_c, (int64_t)bytes); atomic_add_64(&arc_c, (int64_t)bytes);
if (arc_c > arc_c_max) if (arc_c > arc_c_max)
@ -5186,7 +5150,8 @@ arc_is_overflowing(void)
* in the ARC. In practice, that's in the tens of MB, which is low * in the ARC. In practice, that's in the tens of MB, which is low
* enough to be safe. * enough to be safe.
*/ */
return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow); return (aggsum_lower_bound(&arc_sums.arcstat_size) >=
(int64_t)arc_c + overflow);
} }
static abd_t * static abd_t *
@ -5355,7 +5320,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
* If we are growing the cache, and we are adding anonymous * If we are growing the cache, and we are adding anonymous
* data, and we have outgrown arc_p, update arc_p * data, and we have outgrown arc_p, update arc_p
*/ */
if (aggsum_upper_bound(&arc_size) < arc_c && if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
hdr->b_l1hdr.b_state == arc_anon && hdr->b_l1hdr.b_state == arc_anon &&
(zfs_refcount_count(&arc_anon->arcs_size) + (zfs_refcount_count(&arc_anon->arcs_size) +
zfs_refcount_count(&arc_mru->arcs_size) > arc_p)) zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
@ -7240,55 +7205,219 @@ arc_kstat_update(kstat_t *ksp, int rw)
{ {
arc_stats_t *as = ksp->ks_data; arc_stats_t *as = ksp->ks_data;
if (rw == KSTAT_WRITE) { if (rw == KSTAT_WRITE)
return (SET_ERROR(EACCES)); return (SET_ERROR(EACCES));
} else {
arc_kstat_update_state(arc_anon,
&as->arcstat_anon_size,
&as->arcstat_anon_evictable_data,
&as->arcstat_anon_evictable_metadata);
arc_kstat_update_state(arc_mru,
&as->arcstat_mru_size,
&as->arcstat_mru_evictable_data,
&as->arcstat_mru_evictable_metadata);
arc_kstat_update_state(arc_mru_ghost,
&as->arcstat_mru_ghost_size,
&as->arcstat_mru_ghost_evictable_data,
&as->arcstat_mru_ghost_evictable_metadata);
arc_kstat_update_state(arc_mfu,
&as->arcstat_mfu_size,
&as->arcstat_mfu_evictable_data,
&as->arcstat_mfu_evictable_metadata);
arc_kstat_update_state(arc_mfu_ghost,
&as->arcstat_mfu_ghost_size,
&as->arcstat_mfu_ghost_evictable_data,
&as->arcstat_mfu_ghost_evictable_metadata);
ARCSTAT(arcstat_size) = aggsum_value(&arc_size); as->arcstat_hits.value.ui64 =
ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used); wmsum_value(&arc_sums.arcstat_hits);
ARCSTAT(arcstat_data_size) = wmsum_value(&astat_data_size); as->arcstat_misses.value.ui64 =
ARCSTAT(arcstat_metadata_size) = wmsum_value(&arc_sums.arcstat_misses);
wmsum_value(&astat_metadata_size); as->arcstat_demand_data_hits.value.ui64 =
ARCSTAT(arcstat_hdr_size) = wmsum_value(&astat_hdr_size); wmsum_value(&arc_sums.arcstat_demand_data_hits);
ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size); as->arcstat_demand_data_misses.value.ui64 =
ARCSTAT(arcstat_dbuf_size) = wmsum_value(&astat_dbuf_size); wmsum_value(&arc_sums.arcstat_demand_data_misses);
as->arcstat_demand_metadata_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_metadata_hits);
as->arcstat_demand_metadata_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_metadata_misses);
as->arcstat_prefetch_data_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_data_hits);
as->arcstat_prefetch_data_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_data_misses);
as->arcstat_prefetch_metadata_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits);
as->arcstat_prefetch_metadata_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses);
as->arcstat_mru_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mru_hits);
as->arcstat_mru_ghost_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mru_ghost_hits);
as->arcstat_mfu_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mfu_hits);
as->arcstat_mfu_ghost_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mfu_ghost_hits);
as->arcstat_deleted.value.ui64 =
wmsum_value(&arc_sums.arcstat_deleted);
as->arcstat_mutex_miss.value.ui64 =
wmsum_value(&arc_sums.arcstat_mutex_miss);
as->arcstat_access_skip.value.ui64 =
wmsum_value(&arc_sums.arcstat_access_skip);
as->arcstat_evict_skip.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_skip);
as->arcstat_evict_not_enough.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_not_enough);
as->arcstat_evict_l2_cached.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_cached);
as->arcstat_evict_l2_eligible.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_eligible);
as->arcstat_evict_l2_eligible_mfu.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mfu);
as->arcstat_evict_l2_eligible_mru.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mru);
as->arcstat_evict_l2_ineligible.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_ineligible);
as->arcstat_evict_l2_skip.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_skip);
as->arcstat_hash_collisions.value.ui64 =
wmsum_value(&arc_sums.arcstat_hash_collisions);
as->arcstat_hash_chains.value.ui64 =
wmsum_value(&arc_sums.arcstat_hash_chains);
as->arcstat_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_size);
as->arcstat_compressed_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_compressed_size);
as->arcstat_uncompressed_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_uncompressed_size);
as->arcstat_overhead_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_overhead_size);
as->arcstat_hdr_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_hdr_size);
as->arcstat_data_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_data_size);
as->arcstat_metadata_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_metadata_size);
as->arcstat_dbuf_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_dbuf_size);
#if defined(COMPAT_FREEBSD11) #if defined(COMPAT_FREEBSD11)
ARCSTAT(arcstat_other_size) = wmsum_value(&astat_bonus_size) + as->arcstat_other_size.value.ui64 =
aggsum_value(&astat_dnode_size) + wmsum_value(&arc_sums.arcstat_bonus_size) +
wmsum_value(&astat_dbuf_size); aggsum_value(&arc_sums.arcstat_dnode_size) +
wmsum_value(&arc_sums.arcstat_dbuf_size);
#endif #endif
ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
ARCSTAT(arcstat_bonus_size) = wmsum_value(&astat_bonus_size);
ARCSTAT(arcstat_abd_chunk_waste_size) =
wmsum_value(&astat_abd_chunk_waste_size);
as->arcstat_memory_all_bytes.value.ui64 = arc_kstat_update_state(arc_anon,
arc_all_memory(); &as->arcstat_anon_size,
as->arcstat_memory_free_bytes.value.ui64 = &as->arcstat_anon_evictable_data,
arc_free_memory(); &as->arcstat_anon_evictable_metadata);
as->arcstat_memory_available_bytes.value.i64 = arc_kstat_update_state(arc_mru,
arc_available_memory(); &as->arcstat_mru_size,
} &as->arcstat_mru_evictable_data,
&as->arcstat_mru_evictable_metadata);
arc_kstat_update_state(arc_mru_ghost,
&as->arcstat_mru_ghost_size,
&as->arcstat_mru_ghost_evictable_data,
&as->arcstat_mru_ghost_evictable_metadata);
arc_kstat_update_state(arc_mfu,
&as->arcstat_mfu_size,
&as->arcstat_mfu_evictable_data,
&as->arcstat_mfu_evictable_metadata);
arc_kstat_update_state(arc_mfu_ghost,
&as->arcstat_mfu_ghost_size,
&as->arcstat_mfu_ghost_evictable_data,
&as->arcstat_mfu_ghost_evictable_metadata);
as->arcstat_dnode_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_dnode_size);
as->arcstat_bonus_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_bonus_size);
as->arcstat_l2_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_hits);
as->arcstat_l2_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_misses);
as->arcstat_l2_prefetch_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_prefetch_asize);
as->arcstat_l2_mru_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_mru_asize);
as->arcstat_l2_mfu_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_mfu_asize);
as->arcstat_l2_bufc_data_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_bufc_data_asize);
as->arcstat_l2_bufc_metadata_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_bufc_metadata_asize);
as->arcstat_l2_feeds.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_feeds);
as->arcstat_l2_rw_clash.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rw_clash);
as->arcstat_l2_read_bytes.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_read_bytes);
as->arcstat_l2_write_bytes.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_write_bytes);
as->arcstat_l2_writes_sent.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_sent);
as->arcstat_l2_writes_done.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_done);
as->arcstat_l2_writes_error.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_error);
as->arcstat_l2_writes_lock_retry.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_lock_retry);
as->arcstat_l2_evict_lock_retry.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_evict_lock_retry);
as->arcstat_l2_evict_reading.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_evict_reading);
as->arcstat_l2_evict_l1cached.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_evict_l1cached);
as->arcstat_l2_free_on_write.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_free_on_write);
as->arcstat_l2_abort_lowmem.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_abort_lowmem);
as->arcstat_l2_cksum_bad.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_cksum_bad);
as->arcstat_l2_io_error.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_io_error);
as->arcstat_l2_lsize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_lsize);
as->arcstat_l2_psize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_psize);
as->arcstat_l2_hdr_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_l2_hdr_size);
as->arcstat_l2_log_blk_writes.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_log_blk_writes);
as->arcstat_l2_log_blk_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_log_blk_asize);
as->arcstat_l2_log_blk_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_log_blk_count);
as->arcstat_l2_rebuild_success.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_success);
as->arcstat_l2_rebuild_abort_unsupported.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
as->arcstat_l2_rebuild_abort_io_errors.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
as->arcstat_l2_rebuild_abort_dh_errors.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
as->arcstat_l2_rebuild_abort_cksum_lb_errors.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
as->arcstat_l2_rebuild_abort_lowmem.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
as->arcstat_l2_rebuild_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_size);
as->arcstat_l2_rebuild_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_asize);
as->arcstat_l2_rebuild_bufs.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs);
as->arcstat_l2_rebuild_bufs_precached.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs_precached);
as->arcstat_l2_rebuild_log_blks.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_log_blks);
as->arcstat_memory_throttle_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_memory_throttle_count);
as->arcstat_memory_direct_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_memory_direct_count);
as->arcstat_memory_indirect_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_memory_indirect_count);
as->arcstat_memory_all_bytes.value.ui64 =
arc_all_memory();
as->arcstat_memory_free_bytes.value.ui64 =
arc_free_memory();
as->arcstat_memory_available_bytes.value.i64 =
arc_available_memory();
as->arcstat_prune.value.ui64 =
wmsum_value(&arc_sums.arcstat_prune);
as->arcstat_meta_used.value.ui64 =
aggsum_value(&arc_sums.arcstat_meta_used);
as->arcstat_async_upgrade_sync.value.ui64 =
wmsum_value(&arc_sums.arcstat_async_upgrade_sync);
as->arcstat_demand_hit_predictive_prefetch.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch);
as->arcstat_demand_hit_prescient_prefetch.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch);
as->arcstat_raw_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_raw_size);
as->arcstat_cached_only_in_progress.value.ui64 =
wmsum_value(&arc_sums.arcstat_cached_only_in_progress);
as->arcstat_abd_chunk_waste_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_abd_chunk_waste_size);
return (0); return (0);
} }
@ -7511,16 +7640,93 @@ arc_state_init(void)
zfs_refcount_create(&arc_mfu_ghost->arcs_size); zfs_refcount_create(&arc_mfu_ghost->arcs_size);
zfs_refcount_create(&arc_l2c_only->arcs_size); zfs_refcount_create(&arc_l2c_only->arcs_size);
aggsum_init(&arc_meta_used, 0); wmsum_init(&arc_sums.arcstat_hits, 0);
aggsum_init(&arc_size, 0); wmsum_init(&arc_sums.arcstat_misses, 0);
wmsum_init(&astat_data_size, 0); wmsum_init(&arc_sums.arcstat_demand_data_hits, 0);
wmsum_init(&astat_metadata_size, 0); wmsum_init(&arc_sums.arcstat_demand_data_misses, 0);
wmsum_init(&astat_hdr_size, 0); wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0);
aggsum_init(&astat_l2_hdr_size, 0); wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0);
wmsum_init(&astat_bonus_size, 0); wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0);
aggsum_init(&astat_dnode_size, 0); wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0);
wmsum_init(&astat_dbuf_size, 0); wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0);
wmsum_init(&astat_abd_chunk_waste_size, 0); wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0);
wmsum_init(&arc_sums.arcstat_mru_hits, 0);
wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0);
wmsum_init(&arc_sums.arcstat_mfu_hits, 0);
wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0);
wmsum_init(&arc_sums.arcstat_deleted, 0);
wmsum_init(&arc_sums.arcstat_mutex_miss, 0);
wmsum_init(&arc_sums.arcstat_access_skip, 0);
wmsum_init(&arc_sums.arcstat_evict_skip, 0);
wmsum_init(&arc_sums.arcstat_evict_not_enough, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_cached, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_eligible, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mfu, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0);
wmsum_init(&arc_sums.arcstat_hash_collisions, 0);
wmsum_init(&arc_sums.arcstat_hash_chains, 0);
aggsum_init(&arc_sums.arcstat_size, 0);
wmsum_init(&arc_sums.arcstat_compressed_size, 0);
wmsum_init(&arc_sums.arcstat_uncompressed_size, 0);
wmsum_init(&arc_sums.arcstat_overhead_size, 0);
wmsum_init(&arc_sums.arcstat_hdr_size, 0);
wmsum_init(&arc_sums.arcstat_data_size, 0);
wmsum_init(&arc_sums.arcstat_metadata_size, 0);
wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
aggsum_init(&arc_sums.arcstat_dnode_size, 0);
wmsum_init(&arc_sums.arcstat_bonus_size, 0);
wmsum_init(&arc_sums.arcstat_l2_hits, 0);
wmsum_init(&arc_sums.arcstat_l2_misses, 0);
wmsum_init(&arc_sums.arcstat_l2_prefetch_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_mru_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_mfu_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_bufc_data_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_bufc_metadata_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_feeds, 0);
wmsum_init(&arc_sums.arcstat_l2_rw_clash, 0);
wmsum_init(&arc_sums.arcstat_l2_read_bytes, 0);
wmsum_init(&arc_sums.arcstat_l2_write_bytes, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_sent, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_done, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_error, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_lock_retry, 0);
wmsum_init(&arc_sums.arcstat_l2_evict_lock_retry, 0);
wmsum_init(&arc_sums.arcstat_l2_evict_reading, 0);
wmsum_init(&arc_sums.arcstat_l2_evict_l1cached, 0);
wmsum_init(&arc_sums.arcstat_l2_free_on_write, 0);
wmsum_init(&arc_sums.arcstat_l2_abort_lowmem, 0);
wmsum_init(&arc_sums.arcstat_l2_cksum_bad, 0);
wmsum_init(&arc_sums.arcstat_l2_io_error, 0);
wmsum_init(&arc_sums.arcstat_l2_lsize, 0);
wmsum_init(&arc_sums.arcstat_l2_psize, 0);
aggsum_init(&arc_sums.arcstat_l2_hdr_size, 0);
wmsum_init(&arc_sums.arcstat_l2_log_blk_writes, 0);
wmsum_init(&arc_sums.arcstat_l2_log_blk_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_log_blk_count, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_success, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_unsupported, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_io_errors, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_dh_errors, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_lowmem, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_size, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs_precached, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_log_blks, 0);
wmsum_init(&arc_sums.arcstat_memory_throttle_count, 0);
wmsum_init(&arc_sums.arcstat_memory_direct_count, 0);
wmsum_init(&arc_sums.arcstat_memory_indirect_count, 0);
wmsum_init(&arc_sums.arcstat_prune, 0);
aggsum_init(&arc_sums.arcstat_meta_used, 0);
wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0);
wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0);
wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0);
wmsum_init(&arc_sums.arcstat_raw_size, 0);
wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0);
wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0);
arc_anon->arcs_state = ARC_STATE_ANON; arc_anon->arcs_state = ARC_STATE_ANON;
arc_mru->arcs_state = ARC_STATE_MRU; arc_mru->arcs_state = ARC_STATE_MRU;
@ -7564,16 +7770,93 @@ arc_state_fini(void)
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
aggsum_fini(&arc_meta_used); wmsum_fini(&arc_sums.arcstat_hits);
aggsum_fini(&arc_size); wmsum_fini(&arc_sums.arcstat_misses);
wmsum_fini(&astat_data_size); wmsum_fini(&arc_sums.arcstat_demand_data_hits);
wmsum_fini(&astat_metadata_size); wmsum_fini(&arc_sums.arcstat_demand_data_misses);
wmsum_fini(&astat_hdr_size); wmsum_fini(&arc_sums.arcstat_demand_metadata_hits);
aggsum_fini(&astat_l2_hdr_size); wmsum_fini(&arc_sums.arcstat_demand_metadata_misses);
wmsum_fini(&astat_bonus_size); wmsum_fini(&arc_sums.arcstat_prefetch_data_hits);
aggsum_fini(&astat_dnode_size); wmsum_fini(&arc_sums.arcstat_prefetch_data_misses);
wmsum_fini(&astat_dbuf_size); wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits);
wmsum_fini(&astat_abd_chunk_waste_size); wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses);
wmsum_fini(&arc_sums.arcstat_mru_hits);
wmsum_fini(&arc_sums.arcstat_mru_ghost_hits);
wmsum_fini(&arc_sums.arcstat_mfu_hits);
wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits);
wmsum_fini(&arc_sums.arcstat_deleted);
wmsum_fini(&arc_sums.arcstat_mutex_miss);
wmsum_fini(&arc_sums.arcstat_access_skip);
wmsum_fini(&arc_sums.arcstat_evict_skip);
wmsum_fini(&arc_sums.arcstat_evict_not_enough);
wmsum_fini(&arc_sums.arcstat_evict_l2_cached);
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible);
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mfu);
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru);
wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible);
wmsum_fini(&arc_sums.arcstat_evict_l2_skip);
wmsum_fini(&arc_sums.arcstat_hash_collisions);
wmsum_fini(&arc_sums.arcstat_hash_chains);
aggsum_fini(&arc_sums.arcstat_size);
wmsum_fini(&arc_sums.arcstat_compressed_size);
wmsum_fini(&arc_sums.arcstat_uncompressed_size);
wmsum_fini(&arc_sums.arcstat_overhead_size);
wmsum_fini(&arc_sums.arcstat_hdr_size);
wmsum_fini(&arc_sums.arcstat_data_size);
wmsum_fini(&arc_sums.arcstat_metadata_size);
wmsum_fini(&arc_sums.arcstat_dbuf_size);
aggsum_fini(&arc_sums.arcstat_dnode_size);
wmsum_fini(&arc_sums.arcstat_bonus_size);
wmsum_fini(&arc_sums.arcstat_l2_hits);
wmsum_fini(&arc_sums.arcstat_l2_misses);
wmsum_fini(&arc_sums.arcstat_l2_prefetch_asize);
wmsum_fini(&arc_sums.arcstat_l2_mru_asize);
wmsum_fini(&arc_sums.arcstat_l2_mfu_asize);
wmsum_fini(&arc_sums.arcstat_l2_bufc_data_asize);
wmsum_fini(&arc_sums.arcstat_l2_bufc_metadata_asize);
wmsum_fini(&arc_sums.arcstat_l2_feeds);
wmsum_fini(&arc_sums.arcstat_l2_rw_clash);
wmsum_fini(&arc_sums.arcstat_l2_read_bytes);
wmsum_fini(&arc_sums.arcstat_l2_write_bytes);
wmsum_fini(&arc_sums.arcstat_l2_writes_sent);
wmsum_fini(&arc_sums.arcstat_l2_writes_done);
wmsum_fini(&arc_sums.arcstat_l2_writes_error);
wmsum_fini(&arc_sums.arcstat_l2_writes_lock_retry);
wmsum_fini(&arc_sums.arcstat_l2_evict_lock_retry);
wmsum_fini(&arc_sums.arcstat_l2_evict_reading);
wmsum_fini(&arc_sums.arcstat_l2_evict_l1cached);
wmsum_fini(&arc_sums.arcstat_l2_free_on_write);
wmsum_fini(&arc_sums.arcstat_l2_abort_lowmem);
wmsum_fini(&arc_sums.arcstat_l2_cksum_bad);
wmsum_fini(&arc_sums.arcstat_l2_io_error);
wmsum_fini(&arc_sums.arcstat_l2_lsize);
wmsum_fini(&arc_sums.arcstat_l2_psize);
aggsum_fini(&arc_sums.arcstat_l2_hdr_size);
wmsum_fini(&arc_sums.arcstat_l2_log_blk_writes);
wmsum_fini(&arc_sums.arcstat_l2_log_blk_asize);
wmsum_fini(&arc_sums.arcstat_l2_log_blk_count);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_success);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_size);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_asize);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs_precached);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_log_blks);
wmsum_fini(&arc_sums.arcstat_memory_throttle_count);
wmsum_fini(&arc_sums.arcstat_memory_direct_count);
wmsum_fini(&arc_sums.arcstat_memory_indirect_count);
wmsum_fini(&arc_sums.arcstat_prune);
aggsum_fini(&arc_sums.arcstat_meta_used);
wmsum_fini(&arc_sums.arcstat_async_upgrade_sync);
wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch);
wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch);
wmsum_fini(&arc_sums.arcstat_raw_size);
wmsum_fini(&arc_sums.arcstat_cached_only_in_progress);
wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size);
} }
uint64_t uint64_t
@ -7623,8 +7906,6 @@ arc_init(void)
/* Set min to 1/2 of arc_c_min */ /* Set min to 1/2 of arc_c_min */
arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT; arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
/* Initialize maximum observed usage to zero */
arc_meta_max = 0;
/* /*
* Set arc_meta_limit to a percent of arc_c_max with a floor of * Set arc_meta_limit to a percent of arc_c_max with a floor of
* arc_meta_min, and a ceiling of arc_c_max. * arc_meta_min, and a ceiling of arc_c_max.
@ -8350,7 +8631,7 @@ top:
} }
} }
atomic_inc_64(&l2arc_writes_done); ARCSTAT_BUMP(arcstat_l2_writes_done);
list_remove(buflist, head); list_remove(buflist, head);
ASSERT(!HDR_HAS_L1HDR(head)); ASSERT(!HDR_HAS_L1HDR(head));
kmem_cache_free(hdr_l2only_cache, head); kmem_cache_free(hdr_l2only_cache, head);
@ -9322,7 +9603,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
static boolean_t static boolean_t
l2arc_hdr_limit_reached(void) l2arc_hdr_limit_reached(void)
{ {
int64_t s = aggsum_upper_bound(&astat_l2_hdr_size); int64_t s = aggsum_upper_bound(&arc_sums.arcstat_l2_hdr_size);
return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) || return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) ||
(s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100)); (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100));
@ -9656,8 +9937,6 @@ l2arc_init(void)
{ {
l2arc_thread_exit = 0; l2arc_thread_exit = 0;
l2arc_ndev = 0; l2arc_ndev = 0;
l2arc_writes_sent = 0;
l2arc_writes_done = 0;
mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL); cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);

View File

@ -52,6 +52,7 @@
#include <sys/vdev.h> #include <sys/vdev.h>
#include <cityhash.h> #include <cityhash.h>
#include <sys/spa_impl.h> #include <sys/spa_impl.h>
#include <sys/wmsum.h>
kstat_t *dbuf_ksp; kstat_t *dbuf_ksp;
@ -135,8 +136,22 @@ dbuf_stats_t dbuf_stats = {
{ "metadata_cache_overflow", KSTAT_DATA_UINT64 } { "metadata_cache_overflow", KSTAT_DATA_UINT64 }
}; };
struct {
wmsum_t cache_count;
wmsum_t cache_total_evicts;
wmsum_t cache_levels[DN_MAX_LEVELS];
wmsum_t cache_levels_bytes[DN_MAX_LEVELS];
wmsum_t hash_hits;
wmsum_t hash_misses;
wmsum_t hash_collisions;
wmsum_t hash_chains;
wmsum_t hash_insert_race;
wmsum_t metadata_cache_count;
wmsum_t metadata_cache_overflow;
} dbuf_sums;
#define DBUF_STAT_INCR(stat, val) \ #define DBUF_STAT_INCR(stat, val) \
atomic_add_64(&dbuf_stats.stat.value.ui64, (val)); wmsum_add(&dbuf_sums.stat, val);
#define DBUF_STAT_DECR(stat, val) \ #define DBUF_STAT_DECR(stat, val) \
DBUF_STAT_INCR(stat, -(val)); DBUF_STAT_INCR(stat, -(val));
#define DBUF_STAT_BUMP(stat) \ #define DBUF_STAT_BUMP(stat) \
@ -297,8 +312,6 @@ dbuf_dest(void *vdb, void *unused)
*/ */
static dbuf_hash_table_t dbuf_hash_table; static dbuf_hash_table_t dbuf_hash_table;
static uint64_t dbuf_hash_count;
/* /*
* We use Cityhash for this. It's fast, and has good hash properties without * We use Cityhash for this. It's fast, and has good hash properties without
* requiring any large static buffers. * requiring any large static buffers.
@ -409,8 +422,8 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
db->db_hash_next = h->hash_table[idx]; db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db; h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx)); mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_inc_64(&dbuf_hash_count); uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count); DBUF_STAT_MAX(hash_elements_max, he);
return (NULL); return (NULL);
} }
@ -483,7 +496,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
h->hash_table[idx]->db_hash_next == NULL) h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains); DBUF_STAT_BUMPDOWN(hash_chains);
mutex_exit(DBUF_HASH_MUTEX(h, idx)); mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_hash_count); atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
} }
typedef enum { typedef enum {
@ -767,19 +780,40 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
{ {
dbuf_stats_t *ds = ksp->ks_data; dbuf_stats_t *ds = ksp->ks_data;
if (rw == KSTAT_WRITE) { if (rw == KSTAT_WRITE)
return (SET_ERROR(EACCES)); return (SET_ERROR(EACCES));
} else {
ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count(
&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
ds->cache_size_bytes.value.ui64 =
zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
ds->hash_elements.value.ui64 = dbuf_hash_count;
}
ds->cache_count.value.ui64 =
wmsum_value(&dbuf_sums.cache_count);
ds->cache_size_bytes.value.ui64 =
zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
ds->cache_total_evicts.value.ui64 =
wmsum_value(&dbuf_sums.cache_total_evicts);
for (int i = 0; i < DN_MAX_LEVELS; i++) {
ds->cache_levels[i].value.ui64 =
wmsum_value(&dbuf_sums.cache_levels[i]);
ds->cache_levels_bytes[i].value.ui64 =
wmsum_value(&dbuf_sums.cache_levels_bytes[i]);
}
ds->hash_hits.value.ui64 =
wmsum_value(&dbuf_sums.hash_hits);
ds->hash_misses.value.ui64 =
wmsum_value(&dbuf_sums.hash_misses);
ds->hash_collisions.value.ui64 =
wmsum_value(&dbuf_sums.hash_collisions);
ds->hash_chains.value.ui64 =
wmsum_value(&dbuf_sums.hash_chains);
ds->hash_insert_race.value.ui64 =
wmsum_value(&dbuf_sums.hash_insert_race);
ds->metadata_cache_count.value.ui64 =
wmsum_value(&dbuf_sums.metadata_cache_count);
ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count(
&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
ds->metadata_cache_overflow.value.ui64 =
wmsum_value(&dbuf_sums.metadata_cache_overflow);
return (0); return (0);
} }
@ -846,6 +880,20 @@ retry:
dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread, dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
NULL, 0, &p0, TS_RUN, minclsyspri); NULL, 0, &p0, TS_RUN, minclsyspri);
wmsum_init(&dbuf_sums.cache_count, 0);
wmsum_init(&dbuf_sums.cache_total_evicts, 0);
for (i = 0; i < DN_MAX_LEVELS; i++) {
wmsum_init(&dbuf_sums.cache_levels[i], 0);
wmsum_init(&dbuf_sums.cache_levels_bytes[i], 0);
}
wmsum_init(&dbuf_sums.hash_hits, 0);
wmsum_init(&dbuf_sums.hash_misses, 0);
wmsum_init(&dbuf_sums.hash_collisions, 0);
wmsum_init(&dbuf_sums.hash_chains, 0);
wmsum_init(&dbuf_sums.hash_insert_race, 0);
wmsum_init(&dbuf_sums.metadata_cache_count, 0);
wmsum_init(&dbuf_sums.metadata_cache_overflow, 0);
dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc", dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc",
KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t), KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL); KSTAT_FLAG_VIRTUAL);
@ -908,6 +956,20 @@ dbuf_fini(void)
kstat_delete(dbuf_ksp); kstat_delete(dbuf_ksp);
dbuf_ksp = NULL; dbuf_ksp = NULL;
} }
wmsum_fini(&dbuf_sums.cache_count);
wmsum_fini(&dbuf_sums.cache_total_evicts);
for (i = 0; i < DN_MAX_LEVELS; i++) {
wmsum_fini(&dbuf_sums.cache_levels[i]);
wmsum_fini(&dbuf_sums.cache_levels_bytes[i]);
}
wmsum_fini(&dbuf_sums.hash_hits);
wmsum_fini(&dbuf_sums.hash_misses);
wmsum_fini(&dbuf_sums.hash_collisions);
wmsum_fini(&dbuf_sums.hash_chains);
wmsum_fini(&dbuf_sums.hash_insert_race);
wmsum_fini(&dbuf_sums.metadata_cache_count);
wmsum_fini(&dbuf_sums.metadata_cache_overflow);
} }
/* /*
@ -3708,9 +3770,11 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
db->db_caching_status = dcs; db->db_caching_status = dcs;
multilist_insert(&dbuf_caches[dcs].cache, db); multilist_insert(&dbuf_caches[dcs].cache, db);
uint64_t db_size = db->db.db_size;
size = zfs_refcount_add_many( size = zfs_refcount_add_many(
&dbuf_caches[dcs].size, &dbuf_caches[dcs].size, db_size, db);
db->db.db_size, db); uint8_t db_level = db->db_level;
mutex_exit(&db->db_mtx);
if (dcs == DB_DBUF_METADATA_CACHE) { if (dcs == DB_DBUF_METADATA_CACHE) {
DBUF_STAT_BUMP(metadata_cache_count); DBUF_STAT_BUMP(metadata_cache_count);
@ -3718,16 +3782,14 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
metadata_cache_size_bytes_max, metadata_cache_size_bytes_max,
size); size);
} else { } else {
DBUF_STAT_BUMP(
cache_levels[db->db_level]);
DBUF_STAT_BUMP(cache_count); DBUF_STAT_BUMP(cache_count);
DBUF_STAT_INCR(
cache_levels_bytes[db->db_level],
db->db.db_size);
DBUF_STAT_MAX(cache_size_bytes_max, DBUF_STAT_MAX(cache_size_bytes_max,
size); size);
DBUF_STAT_BUMP(cache_levels[db_level]);
DBUF_STAT_INCR(
cache_levels_bytes[db_level],
db_size);
} }
mutex_exit(&db->db_mtx);
if (dcs == DB_DBUF_CACHE && !evicting) if (dcs == DB_DBUF_CACHE && !evicting)
dbuf_evict_notify(size); dbuf_evict_notify(size);

View File

@ -34,6 +34,7 @@
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/dbuf.h> #include <sys/dbuf.h>
#include <sys/kstat.h> #include <sys/kstat.h>
#include <sys/wmsum.h>
/* /*
* This tunable disables predictive prefetch. Note that it leaves "prescient" * This tunable disables predictive prefetch. Note that it leaves "prescient"
@ -69,27 +70,54 @@ static zfetch_stats_t zfetch_stats = {
{ "io_issued", KSTAT_DATA_UINT64 }, { "io_issued", KSTAT_DATA_UINT64 },
}; };
#define ZFETCHSTAT_BUMP(stat) \ struct {
atomic_inc_64(&zfetch_stats.stat.value.ui64) wmsum_t zfetchstat_hits;
wmsum_t zfetchstat_misses;
wmsum_t zfetchstat_max_streams;
wmsum_t zfetchstat_io_issued;
} zfetch_sums;
#define ZFETCHSTAT_BUMP(stat) \
wmsum_add(&zfetch_sums.stat, 1)
#define ZFETCHSTAT_ADD(stat, val) \ #define ZFETCHSTAT_ADD(stat, val) \
atomic_add_64(&zfetch_stats.stat.value.ui64, val) wmsum_add(&zfetch_sums.stat, val)
#define ZFETCHSTAT_SET(stat, val) \
zfetch_stats.stat.value.ui64 = val
#define ZFETCHSTAT_GET(stat) \
zfetch_stats.stat.value.ui64
kstat_t *zfetch_ksp; kstat_t *zfetch_ksp;
static int
zfetch_kstats_update(kstat_t *ksp, int rw)
{
zfetch_stats_t *zs = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zs->zfetchstat_hits.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_hits);
zs->zfetchstat_misses.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_misses);
zs->zfetchstat_max_streams.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_max_streams);
zs->zfetchstat_io_issued.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_io_issued);
return (0);
}
void void
zfetch_init(void) zfetch_init(void)
{ {
wmsum_init(&zfetch_sums.zfetchstat_hits, 0);
wmsum_init(&zfetch_sums.zfetchstat_misses, 0);
wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0);
wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0);
zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL); KSTAT_FLAG_VIRTUAL);
if (zfetch_ksp != NULL) { if (zfetch_ksp != NULL) {
zfetch_ksp->ks_data = &zfetch_stats; zfetch_ksp->ks_data = &zfetch_stats;
zfetch_ksp->ks_update = zfetch_kstats_update;
kstat_install(zfetch_ksp); kstat_install(zfetch_ksp);
} }
} }
@ -101,6 +129,11 @@ zfetch_fini(void)
kstat_delete(zfetch_ksp); kstat_delete(zfetch_ksp);
zfetch_ksp = NULL; zfetch_ksp = NULL;
} }
wmsum_fini(&zfetch_sums.zfetchstat_hits);
wmsum_fini(&zfetch_sums.zfetchstat_misses);
wmsum_fini(&zfetch_sums.zfetchstat_max_streams);
wmsum_fini(&zfetch_sums.zfetchstat_io_issued);
} }
/* /*