From 1ff46825e232b3ad3414f60fab8dcba8ed17d778 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 25 Jul 2019 11:57:58 -0700 Subject: [PATCH] Replace zf_rwlock with a mutex The rwlock implementation on linux does not perform as well as mutexes. We can realize a performance benefit by replacing the zf_rwlock with a mutex. Local microbenchmarks show ~50% improvement, and over NFS we see ~5% improvement on several of the ZFS Performance Tests cases, especially randwrite and seq_write. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Nguyen Reviewed-by: Olaf Faaland Signed-off-by: Matthew Ahrens Closes #9062 --- include/sys/dmu_zfetch.h | 2 +- module/zfs/dmu_zfetch.c | 31 ++++++++++--------------------- module/zfs/dnode.c | 2 +- 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/include/sys/dmu_zfetch.h b/include/sys/dmu_zfetch.h index 1e539086d..4303ab314 100644 --- a/include/sys/dmu_zfetch.h +++ b/include/sys/dmu_zfetch.h @@ -56,7 +56,7 @@ typedef struct zstream { } zstream_t; typedef struct zfetch { - krwlock_t zf_rwlock; /* protects zfetch structure */ + kmutex_t zf_lock; /* protects zfetch structure */ list_t zf_stream; /* list of zstream_t's */ struct dnode *zf_dnode; /* dnode that owns this zfetch */ } zfetch_t; diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index ee771c9fa..6511e4f8e 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -110,13 +110,13 @@ dmu_zfetch_init(zfetch_t *zf, dnode_t *dno) list_create(&zf->zf_stream, sizeof (zstream_t), offsetof(zstream_t, zs_node)); - rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL); + mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL); } static void dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs) { - ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); + ASSERT(MUTEX_HELD(&zf->zf_lock)); list_remove(&zf->zf_stream, zs); mutex_destroy(&zs->zs_lock); kmem_free(zs, sizeof (*zs)); @@ -131,14 +131,12 @@ dmu_zfetch_fini(zfetch_t *zf) { zstream_t *zs; - ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock)); - - rw_enter(&zf->zf_rwlock, RW_WRITER); + mutex_enter(&zf->zf_lock); while ((zs = list_head(&zf->zf_stream)) != NULL) dmu_zfetch_stream_remove(zf, zs); - rw_exit(&zf->zf_rwlock); + mutex_exit(&zf->zf_lock); list_destroy(&zf->zf_stream); - rw_destroy(&zf->zf_rwlock); + mutex_destroy(&zf->zf_lock); zf->zf_dnode = NULL; } @@ -155,7 +153,7 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) zstream_t *zs_next; int numstreams = 0; - ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); + ASSERT(MUTEX_HELD(&zf->zf_lock)); /* * Clean up old streams. @@ -215,7 +213,6 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data, uint64_t end_of_access_blkid; end_of_access_blkid = blkid + nblks; spa_t *spa = zf->zf_dnode->dn_objset->os_spa; - krw_t rw = RW_READER; if (zfs_prefetch_disable) return; @@ -236,10 +233,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data, if (blkid == 0) return; -retry: if (!have_lock) rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER); - rw_enter(&zf->zf_rwlock, rw); + mutex_enter(&zf->zf_lock); /* * Find matching prefetch stream. Depending on whether the accesses @@ -262,7 +258,7 @@ retry: if (nblks == 0) { /* Already prefetched this before. */ mutex_exit(&zs->zs_lock); - rw_exit(&zf->zf_rwlock); + mutex_exit(&zf->zf_lock); if (!have_lock) { rw_exit(&zf->zf_dnode-> dn_struct_rwlock); @@ -281,16 +277,9 @@ retry: * a new stream for it. */ ZFETCHSTAT_BUMP(zfetchstat_misses); - if (rw == RW_READER && !rw_tryupgrade(&zf->zf_rwlock)) { - rw_exit(&zf->zf_rwlock); - if (!have_lock) - rw_exit(&zf->zf_dnode->dn_struct_rwlock); - rw = RW_WRITER; - goto retry; - } dmu_zfetch_stream_create(zf, end_of_access_blkid); - rw_exit(&zf->zf_rwlock); + mutex_exit(&zf->zf_lock); if (!have_lock) rw_exit(&zf->zf_dnode->dn_struct_rwlock); return; @@ -356,7 +345,7 @@ retry: zs->zs_atime = gethrtime(); zs->zs_blkid = end_of_access_blkid; mutex_exit(&zs->zs_lock); - rw_exit(&zf->zf_rwlock); + mutex_exit(&zf->zf_lock); /* * dbuf_prefetch() is asynchronous (even when it needs to read diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index d7cd5ce78..c9ff43fa6 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -752,7 +752,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock)); ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx)); ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx)); - ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock)); + ASSERT(!MUTEX_HELD(&odn->dn_zfetch.zf_lock)); /* Copy fields. */ ndn->dn_objset = odn->dn_objset;