From 0873bb6337452e3e028e40f5dad945b30deab185 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 8 Jan 2018 09:52:36 -0800 Subject: [PATCH] Fix ARC hit rate When the compressed ARC feature was added in commit d3c2ae1 the method of reference counting in the ARC was modified. As part of this accounting change the arc_buf_add_ref() function was removed entirely. This would have be fine but the arc_buf_add_ref() function served a second undocumented purpose of updating the ARC access information when taking a hold on a dbuf. Without this logic in place a cached dbuf would not migrate its associated arc_buf_hdr_t to the MFU list. This would negatively impact the ARC hit rate, particularly on systems with a small ARC. This change reinstates the missing call to arc_access() from dbuf_hold() by implementing a new arc_buf_access() function. Reviewed-by: Giuseppe Di Natale Reviewed-by: Tony Hutter Reviewed-by: Tim Chase Reviewed by: George Wilson Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #6171 Closes #6852 Closes #6989 --- include/sys/arc.h | 1 + module/zfs/arc.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++- module/zfs/dbuf.c | 4 +++- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/include/sys/arc.h b/include/sys/arc.h index 0e7a85188..de280362d 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -263,6 +263,7 @@ void arc_buf_destroy(arc_buf_t *buf, void *tag); void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index); uint64_t arc_buf_size(arc_buf_t *buf); uint64_t arc_buf_lsize(arc_buf_t *buf); +void arc_buf_access(arc_buf_t *buf); void arc_release(arc_buf_t *buf, void *tag); int arc_released(arc_buf_t *buf); void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 476351eb4..45b0abe7f 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -448,9 +448,14 @@ typedef struct arc_stats { * by multiple buffers. */ kstat_named_t arcstat_mutex_miss; + /* + * Number of buffers skipped when updating the access state due to the + * header having already been released after acquiring the hash lock. + */ + kstat_named_t arcstat_access_skip; /* * Number of buffers skipped because they have I/O in progress, are - * indrect prefetch buffers that have not lived long enough, or are + * indirect prefetch buffers that have not lived long enough, or are * not from the spa we're trying to evict from. */ kstat_named_t arcstat_evict_skip; @@ -688,6 +693,7 @@ static arc_stats_t arc_stats = { { "mfu_ghost_hits", KSTAT_DATA_UINT64 }, { "deleted", KSTAT_DATA_UINT64 }, { "mutex_miss", KSTAT_DATA_UINT64 }, + { "access_skip", KSTAT_DATA_UINT64 }, { "evict_skip", KSTAT_DATA_UINT64 }, { "evict_not_enough", KSTAT_DATA_UINT64 }, { "evict_l2_cached", KSTAT_DATA_UINT64 }, @@ -5611,6 +5617,50 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) } } +/* + * This routine is called by dbuf_hold() to update the arc_access() state + * which otherwise would be skipped for entries in the dbuf cache. + */ +void +arc_buf_access(arc_buf_t *buf) +{ + mutex_enter(&buf->b_evict_lock); + arc_buf_hdr_t *hdr = buf->b_hdr; + + /* + * Avoid taking the hash_lock when possible as an optimization. + * The header must be checked again under the hash_lock in order + * to handle the case where it is concurrently being released. + */ + if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) { + mutex_exit(&buf->b_evict_lock); + return; + } + + kmutex_t *hash_lock = HDR_LOCK(hdr); + mutex_enter(hash_lock); + + if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) { + mutex_exit(hash_lock); + mutex_exit(&buf->b_evict_lock); + ARCSTAT_BUMP(arcstat_access_skip); + return; + } + + mutex_exit(&buf->b_evict_lock); + + ASSERT(hdr->b_l1hdr.b_state == arc_mru || + hdr->b_l1hdr.b_state == arc_mfu); + + DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); + arc_access(hdr, hash_lock); + mutex_exit(hash_lock); + + ARCSTAT_BUMP(arcstat_hits); + ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr) && !HDR_PRESCIENT_PREFETCH(hdr), + demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits); +} + /* a generic arc_read_done_func_t which you can use */ /* ARGSUSED */ void diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 190d0656a..517a284de 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2821,8 +2821,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh) return (SET_ERROR(ENOENT)); } - if (dh->dh_db->db_buf != NULL) + if (dh->dh_db->db_buf != NULL) { + arc_buf_access(dh->dh_db->db_buf); ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data); + } ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));