Decrease contention on dn_struct_rwlock

Currently, sequential async write workloads spend a lot of time 
contending on the dn_struct_rwlock. This lock is responsible for 
protecting the entire block tree below it; this naturally results 
in some serialization during heavy write workloads. This can be 
resolved by having per-dbuf locking, which will allow multiple 
writers in the same object at the same time.

We introduce a new rwlock, the db_rwlock. This lock is responsible 
for protecting the contents of the dbuf that it is a part of; when 
reading a block pointer from a dbuf, you hold the lock as a reader. 
When writing data to a dbuf, you hold it as a writer. This allows 
multiple threads to write to different parts of a file at the same 
time.

Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Matt Ahrens matt@delphix.com
Reviewed by: George Wilson george.wilson@delphix.com
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
External-issue: DLPX-52564
External-issue: DLPX-53085
External-issue: DLPX-57384
Closes #8946
This commit is contained in:
Paul Dagnelie
2019-07-08 13:18:50 -07:00
committed by Brian Behlendorf
parent cb70964221
commit f664f1ee7f
7 changed files with 247 additions and 120 deletions
+4 -5
View File
@@ -158,8 +158,8 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
uint64_t blkid;
dmu_buf_impl_t *db;
blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = dbuf_whichblock(dn, 0, offset);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
@@ -183,8 +183,8 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = dbuf_whichblock(dn, 0, offset);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
@@ -549,7 +549,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
read && DNODE_IS_CACHEABLE(dn));
read && DNODE_IS_CACHEABLE(dn), B_TRUE);
}
rw_exit(&dn->dn_struct_rwlock);
@@ -681,7 +681,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
if (err != 0)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
/*
* offset + len - 1 is the last byte we want to prefetch for, and offset
* is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
@@ -689,6 +688,7 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
* offset) is the first. Then the number we need to prefetch is the
* last - first + 1.
*/
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (level > 0 || dn->dn_datablkshift != 0) {
nblks = dbuf_whichblock(dn, level, offset + len - 1) -
dbuf_whichblock(dn, level, offset) + 1;
@@ -701,7 +701,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
for (int i = 0; i < nblks; i++)
dbuf_prefetch(dn, level, blkid + i, pri, 0);
}
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);