mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 03:37:45 +03:00
Decrease contention on dn_struct_rwlock
Currently, sequential async write workloads spend a lot of time contending on the dn_struct_rwlock. This lock is responsible for protecting the entire block tree below it; this naturally results in some serialization during heavy write workloads. This can be resolved by having per-dbuf locking, which will allow multiple writers in the same object at the same time. We introduce a new rwlock, the db_rwlock. This lock is responsible for protecting the contents of the dbuf that it is a part of; when reading a block pointer from a dbuf, you hold the lock as a reader. When writing data to a dbuf, you hold it as a writer. This allows multiple threads to write to different parts of a file at the same time. Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Matt Ahrens matt@delphix.com Reviewed by: George Wilson george.wilson@delphix.com Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> External-issue: DLPX-52564 External-issue: DLPX-53085 External-issue: DLPX-57384 Closes #8946
This commit is contained in:
committed by
Brian Behlendorf
parent
cb70964221
commit
f664f1ee7f
+4
-5
@@ -158,8 +158,8 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
uint64_t blkid;
|
||||
dmu_buf_impl_t *db;
|
||||
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
db = dbuf_hold(dn, blkid, tag);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
@@ -183,8 +183,8 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
db = dbuf_hold(dn, blkid, tag);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_rele(dn, FTAG);
|
||||
@@ -549,7 +549,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
|
||||
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
|
||||
dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
|
||||
read && DNODE_IS_CACHEABLE(dn));
|
||||
read && DNODE_IS_CACHEABLE(dn), B_TRUE);
|
||||
}
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
@@ -681,7 +681,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
||||
if (err != 0)
|
||||
return;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
/*
|
||||
* offset + len - 1 is the last byte we want to prefetch for, and offset
|
||||
* is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
|
||||
@@ -689,6 +688,7 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
||||
* offset) is the first. Then the number we need to prefetch is the
|
||||
* last - first + 1.
|
||||
*/
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
if (level > 0 || dn->dn_datablkshift != 0) {
|
||||
nblks = dbuf_whichblock(dn, level, offset + len - 1) -
|
||||
dbuf_whichblock(dn, level, offset) + 1;
|
||||
@@ -701,7 +701,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
||||
for (int i = 0; i < nblks; i++)
|
||||
dbuf_prefetch(dn, level, blkid + i, pri, 0);
|
||||
}
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
Reference in New Issue
Block a user