mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Decrease contention on dn_struct_rwlock
Currently, sequential async write workloads spend a lot of time contending on the dn_struct_rwlock. This lock is responsible for protecting the entire block tree below it; this naturally results in some serialization during heavy write workloads. This can be resolved by having per-dbuf locking, which will allow multiple writers in the same object at the same time. We introduce a new rwlock, the db_rwlock. This lock is responsible for protecting the contents of the dbuf that it is a part of; when reading a block pointer from a dbuf, you hold the lock as a reader. When writing data to a dbuf, you hold it as a writer. This allows multiple threads to write to different parts of a file at the same time. Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Matt Ahrens matt@delphix.com Reviewed by: George Wilson george.wilson@delphix.com Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> External-issue: DLPX-52564 External-issue: DLPX-53085 External-issue: DLPX-57384 Closes #8946
This commit is contained in:
committed by
Brian Behlendorf
parent
cb70964221
commit
f664f1ee7f
+24
-3
@@ -108,6 +108,12 @@ typedef enum override_states {
|
||||
DR_OVERRIDDEN
|
||||
} override_states_t;
|
||||
|
||||
typedef enum db_lock_type {
|
||||
DLT_NONE,
|
||||
DLT_PARENT,
|
||||
DLT_OBJSET
|
||||
} db_lock_type_t;
|
||||
|
||||
typedef struct dbuf_dirty_record {
|
||||
/* link on our parents dirty list */
|
||||
list_node_t dr_dirty_node;
|
||||
@@ -217,6 +223,22 @@ typedef struct dmu_buf_impl {
|
||||
*/
|
||||
uint8_t db_level;
|
||||
|
||||
/*
|
||||
* Protects db_buf's contents if they contain an indirect block or data
|
||||
* block of the meta-dnode. We use this lock to protect the structure of
|
||||
* the block tree. This means that when modifying this dbuf's data, we
|
||||
* grab its rwlock. When modifying its parent's data (including the
|
||||
* blkptr to this dbuf), we grab the parent's rwlock. The lock ordering
|
||||
* for this lock is:
|
||||
* 1) dn_struct_rwlock
|
||||
* 2) db_rwlock
|
||||
* We don't currently grab multiple dbufs' db_rwlocks at once.
|
||||
*/
|
||||
krwlock_t db_rwlock;
|
||||
|
||||
/* buffer holding our data */
|
||||
arc_buf_t *db_buf;
|
||||
|
||||
/* db_mtx protects the members below */
|
||||
kmutex_t db_mtx;
|
||||
|
||||
@@ -232,9 +254,6 @@ typedef struct dmu_buf_impl {
|
||||
*/
|
||||
zfs_refcount_t db_holds;
|
||||
|
||||
/* buffer holding our data */
|
||||
arc_buf_t *db_buf;
|
||||
|
||||
kcondvar_t db_changed;
|
||||
dbuf_dirty_record_t *db_data_pending;
|
||||
|
||||
@@ -335,6 +354,8 @@ void dbuf_destroy(dmu_buf_impl_t *db);
|
||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||
void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
|
||||
void dbuf_release_bp(dmu_buf_impl_t *db);
|
||||
db_lock_type_t dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag);
|
||||
void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag);
|
||||
|
||||
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
|
||||
struct dmu_tx *);
|
||||
|
||||
Reference in New Issue
Block a user