Decrease contention on dn_struct_rwlock

Currently, sequential async write workloads spend a lot of time 
contending on the dn_struct_rwlock. This lock is responsible for 
protecting the entire block tree below it; this naturally results 
in some serialization during heavy write workloads. This can be 
resolved by having per-dbuf locking, which will allow multiple 
writers in the same object at the same time.

We introduce a new rwlock, the db_rwlock. This lock is responsible 
for protecting the contents of the dbuf that it is a part of; when 
reading a block pointer from a dbuf, you hold the lock as a reader. 
When writing data to a dbuf, you hold it as a writer. This allows 
multiple threads to write to different parts of a file at the same 
time.

Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Matt Ahrens matt@delphix.com
Reviewed by: George Wilson george.wilson@delphix.com
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
External-issue: DLPX-52564
External-issue: DLPX-53085
External-issue: DLPX-57384
Closes #8946
This commit is contained in:
Paul Dagnelie
2019-07-08 13:18:50 -07:00
committed by Brian Behlendorf
parent cb70964221
commit f664f1ee7f
7 changed files with 247 additions and 120 deletions
+15 -2
View File
@@ -24,7 +24,7 @@
*/
/*
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -205,7 +205,8 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
* TRUE -- prefetch predicted data blocks plus following indirect blocks.
*/
void
dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
boolean_t have_lock)
{
zstream_t *zs;
int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
@@ -236,6 +237,8 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
return;
retry:
if (!have_lock)
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
rw_enter(&zf->zf_rwlock, rw);
/*
@@ -260,6 +263,10 @@ retry:
/* Already prefetched this before. */
mutex_exit(&zs->zs_lock);
rw_exit(&zf->zf_rwlock);
if (!have_lock) {
rw_exit(&zf->zf_dnode->
dn_struct_rwlock);
}
return;
}
break;
@@ -276,12 +283,16 @@ retry:
ZFETCHSTAT_BUMP(zfetchstat_misses);
if (rw == RW_READER && !rw_tryupgrade(&zf->zf_rwlock)) {
rw_exit(&zf->zf_rwlock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
rw = RW_WRITER;
goto retry;
}
dmu_zfetch_stream_create(zf, end_of_access_blkid);
rw_exit(&zf->zf_rwlock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return;
}
@@ -361,6 +372,8 @@ retry:
dbuf_prefetch(zf->zf_dnode, 1, iblk,
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
}
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
ZFETCHSTAT_BUMP(zfetchstat_hits);
}