152 lines
5.1 KiB
Diff
152 lines
5.1 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
|
Date: Mon, 8 Jan 2018 09:52:36 -0800
|
|
Subject: [PATCH] Fix ARC hit rate
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
When the compressed ARC feature was added in commit d3c2ae1
|
|
the method of reference counting in the ARC was modified. As
|
|
part of this accounting change the arc_buf_add_ref() function
|
|
was removed entirely.
|
|
|
|
This would have be fine but the arc_buf_add_ref() function
|
|
served a second undocumented purpose of updating the ARC access
|
|
information when taking a hold on a dbuf. Without this logic
|
|
in place a cached dbuf would not migrate its associated
|
|
arc_buf_hdr_t to the MFU list. This would negatively impact
|
|
the ARC hit rate, particularly on systems with a small ARC.
|
|
|
|
This change reinstates the missing call to arc_access() from
|
|
dbuf_hold() by implementing a new arc_buf_access() function.
|
|
|
|
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
|
|
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
|
|
Reviewed-by: Tim Chase <tim@chase2k.com>
|
|
Reviewed by: George Wilson <george.wilson@delphix.com>
|
|
Reviewed-by: George Melikov <mail@gmelikov.ru>
|
|
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
|
Closes #6171
|
|
Closes #6852
|
|
Closes #6989
|
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
---
|
|
include/sys/arc.h | 1 +
|
|
module/zfs/arc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
|
|
module/zfs/dbuf.c | 4 +++-
|
|
3 files changed, 56 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/include/sys/arc.h b/include/sys/arc.h
|
|
index 66f37cf71..1ea4937bd 100644
|
|
--- a/include/sys/arc.h
|
|
+++ b/include/sys/arc.h
|
|
@@ -221,6 +221,7 @@ void arc_buf_destroy(arc_buf_t *buf, void *tag);
|
|
void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
|
|
uint64_t arc_buf_size(arc_buf_t *buf);
|
|
uint64_t arc_buf_lsize(arc_buf_t *buf);
|
|
+void arc_buf_access(arc_buf_t *buf);
|
|
void arc_release(arc_buf_t *buf, void *tag);
|
|
int arc_released(arc_buf_t *buf);
|
|
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
|
|
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
|
|
index 2b0a78d4b..264e67735 100644
|
|
--- a/module/zfs/arc.c
|
|
+++ b/module/zfs/arc.c
|
|
@@ -429,9 +429,14 @@ typedef struct arc_stats {
|
|
* by multiple buffers.
|
|
*/
|
|
kstat_named_t arcstat_mutex_miss;
|
|
+ /*
|
|
+ * Number of buffers skipped when updating the access state due to the
|
|
+ * header having already been released after acquiring the hash lock.
|
|
+ */
|
|
+ kstat_named_t arcstat_access_skip;
|
|
/*
|
|
* Number of buffers skipped because they have I/O in progress, are
|
|
- * indrect prefetch buffers that have not lived long enough, or are
|
|
+ * indirect prefetch buffers that have not lived long enough, or are
|
|
* not from the spa we're trying to evict from.
|
|
*/
|
|
kstat_named_t arcstat_evict_skip;
|
|
@@ -667,6 +672,7 @@ static arc_stats_t arc_stats = {
|
|
{ "mfu_ghost_hits", KSTAT_DATA_UINT64 },
|
|
{ "deleted", KSTAT_DATA_UINT64 },
|
|
{ "mutex_miss", KSTAT_DATA_UINT64 },
|
|
+ { "access_skip", KSTAT_DATA_UINT64 },
|
|
{ "evict_skip", KSTAT_DATA_UINT64 },
|
|
{ "evict_not_enough", KSTAT_DATA_UINT64 },
|
|
{ "evict_l2_cached", KSTAT_DATA_UINT64 },
|
|
@@ -4926,7 +4932,51 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
|
|
}
|
|
}
|
|
|
|
-/* a generic arc_done_func_t which you can use */
|
|
+/*
|
|
+ * This routine is called by dbuf_hold() to update the arc_access() state
|
|
+ * which otherwise would be skipped for entries in the dbuf cache.
|
|
+ */
|
|
+void
|
|
+arc_buf_access(arc_buf_t *buf)
|
|
+{
|
|
+ mutex_enter(&buf->b_evict_lock);
|
|
+ arc_buf_hdr_t *hdr = buf->b_hdr;
|
|
+
|
|
+ /*
|
|
+ * Avoid taking the hash_lock when possible as an optimization.
|
|
+ * The header must be checked again under the hash_lock in order
|
|
+ * to handle the case where it is concurrently being released.
|
|
+ */
|
|
+ if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) {
|
|
+ mutex_exit(&buf->b_evict_lock);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ kmutex_t *hash_lock = HDR_LOCK(hdr);
|
|
+ mutex_enter(hash_lock);
|
|
+
|
|
+ if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) {
|
|
+ mutex_exit(hash_lock);
|
|
+ mutex_exit(&buf->b_evict_lock);
|
|
+ ARCSTAT_BUMP(arcstat_access_skip);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ mutex_exit(&buf->b_evict_lock);
|
|
+
|
|
+ ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
|
|
+ hdr->b_l1hdr.b_state == arc_mfu);
|
|
+
|
|
+ DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
|
|
+ arc_access(hdr, hash_lock);
|
|
+ mutex_exit(hash_lock);
|
|
+
|
|
+ ARCSTAT_BUMP(arcstat_hits);
|
|
+ ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), demand, prefetch,
|
|
+ !HDR_ISTYPE_METADATA(hdr), data, metadata, hits);
|
|
+}
|
|
+
|
|
+/* a generic arc_read_done_func_t which you can use */
|
|
/* ARGSUSED */
|
|
void
|
|
arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
|
|
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
|
|
index 60f52d294..4ee121f5a 100644
|
|
--- a/module/zfs/dbuf.c
|
|
+++ b/module/zfs/dbuf.c
|
|
@@ -2719,8 +2719,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
|
|
return (SET_ERROR(ENOENT));
|
|
}
|
|
|
|
- if (dh->dh_db->db_buf != NULL)
|
|
+ if (dh->dh_db->db_buf != NULL) {
|
|
+ arc_buf_access(dh->dh_db->db_buf);
|
|
ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
|
|
+ }
|
|
|
|
ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
|
|
|
|
--
|
|
2.14.2
|
|
|