OpenZFS 7968 - multi-threaded spa_sync()

Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: Matthew Ahrens <mahrens@delphix.com>

spa_sync() iterates over all the dirty dnodes and processes each of them
by calling dnode_sync(). If there are many dirty dnodes (e.g. because we
created or removed a lot of files), the single thread of spa_sync()
calling dnode_sync() can become a bottleneck. Additionally, if many
dnodes are dirtied concurrently in open context (e.g. due to concurrent
file creation), the os_lock will experience lock contention via
dnode_setdirty().

The solution is to track dirty dnodes on a multilist_t, and for
spa_sync() to use separate threads to process each of the sublists in
the multilist.

OpenZFS-issue: https://www.illumos.org/issues/7968
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/4a2a54c
Closes #5752
This commit is contained in:
Matthew Ahrens
2017-03-20 18:36:00 -07:00
committed by Brian Behlendorf
parent a3478c0747
commit 64fc776208
16 changed files with 303 additions and 159 deletions
+1 -1
View File
@@ -70,7 +70,7 @@ typedef struct arc_state {
/*
* list of evictable buffers
*/
multilist_t arcs_list[ARC_BUFC_NUMTYPES];
multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
/*
* total amount of evictable data in this state
*/
+6 -4
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -113,7 +113,7 @@ struct objset {
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
zil_header_t os_zil_header;
list_t os_synced_dnodes;
multilist_t *os_synced_dnodes;
uint64_t os_flags;
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;
@@ -124,11 +124,13 @@ struct objset {
/* Protected by os_lock */
kmutex_t os_lock;
list_t os_dirty_dnodes[TXG_SIZE];
list_t os_free_dnodes[TXG_SIZE];
multilist_t *os_dirty_dnodes[TXG_SIZE];
list_t os_dnodes;
list_t os_downgraded_dbufs;
/* Protects changes to DMU_{USER,GROUP}USED_OBJECT */
kmutex_t os_userused_lock;
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
+3 -2
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -35,6 +35,7 @@
#include <sys/refcount.h>
#include <sys/dmu_zfetch.h>
#include <sys/zrlock.h>
#include <sys/multilist.h>
#ifdef __cplusplus
extern "C" {
@@ -243,7 +244,7 @@ struct dnode {
uint32_t dn_dbufs_count; /* count of dn_dbufs */
/* protected by os_lock: */
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
multilist_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
/* protected by dn_mtx: */
kmutex_t dn_mtx;
+2 -1
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
@@ -124,6 +124,7 @@ typedef struct dsl_pool {
txg_list_t dp_dirty_zilogs;
txg_list_t dp_dirty_dirs;
txg_list_t dp_sync_tasks;
taskq_t *dp_sync_taskq;
/*
* Protects administrative changes (properties, namespace)
+2 -2
View File
@@ -72,8 +72,7 @@ struct multilist {
};
void multilist_destroy(multilist_t *);
void multilist_create(multilist_t *, size_t, size_t,
multilist_sublist_index_func_t *);
multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *);
void multilist_insert(multilist_t *, void *);
void multilist_remove(multilist_t *, void *);
@@ -83,6 +82,7 @@ unsigned int multilist_get_num_sublists(multilist_t *);
unsigned int multilist_get_random_index(multilist_t *);
multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
multilist_sublist_t *multilist_sublist_lock_obj(multilist_t *, void *);
void multilist_sublist_unlock(multilist_sublist_t *);
void multilist_sublist_insert_head(multilist_sublist_t *, void *);
+2 -2
View File
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
@@ -454,7 +454,7 @@ struct zio {
taskq_ent_t io_tqent;
};
extern int zio_timestamp_compare(const void *, const void *);
extern int zio_bookmark_compare(const void *, const void *);
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
zio_done_func_t *done, void *private, enum zio_flag flags);