mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Improve ZFS objset sync parallelism
As part of transaction group commit, dsl_pool_sync() sequentially calls dsl_dataset_sync() for each dirty dataset, which subsequently calls dmu_objset_sync(). dmu_objset_sync() in turn uses up to 75% of CPU cores to run sync_dnodes_task() in taskq threads to sync the dirty dnodes (files). There are two problems: 1. Each ZVOL in a pool is a separate dataset/objset having a single dnode. This means the objsets are synchronized serially, which leads to a bottleneck of ~330K blocks written per second per pool. 2. In the case of multiple dirty dnodes/files on a dataset/objset on a big system they will be sync'd in parallel taskq threads. However, it is inefficient to to use 75% of CPU cores of a big system to do that, because of (a) bottlenecks on a single write issue taskq, and (b) allocation throttling. In addition, if not for the allocation throttling sorting write requests by bookmarks (logical address), writes for different files may reach space allocators interleaved, leading to unwanted fragmentation. The solution to both problems is to always sync no more and (if possible) no fewer dnodes at the same time than there are allocators the pool. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Edmund Nadolski <edmund.nadolski@ixsystems.com> Closes #15197
This commit is contained in:
@@ -825,6 +825,11 @@ extern void spa_sync_allpools(void);
|
||||
|
||||
extern uint_t zfs_sync_pass_deferred_free;
|
||||
|
||||
/* spa sync taskqueues */
|
||||
taskq_t *spa_sync_tq_create(spa_t *spa, const char *name);
|
||||
void spa_sync_tq_destroy(spa_t *spa);
|
||||
void spa_select_allocator(zio_t *zio);
|
||||
|
||||
/* spa namespace global mutex */
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
|
||||
|
||||
+11
-1
@@ -188,6 +188,12 @@ typedef struct spa_taskqs {
|
||||
taskq_t **stqs_taskq;
|
||||
} spa_taskqs_t;
|
||||
|
||||
/* one for each thread in the spa sync taskq */
|
||||
typedef struct spa_syncthread_info {
|
||||
kthread_t *sti_thread;
|
||||
taskq_t *sti_wr_iss_tq; /* assigned wr_iss taskq */
|
||||
} spa_syncthread_info_t;
|
||||
|
||||
typedef enum spa_all_vdev_zap_action {
|
||||
AVZ_ACTION_NONE = 0,
|
||||
AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */
|
||||
@@ -265,6 +271,10 @@ struct spa {
|
||||
int spa_alloc_count;
|
||||
int spa_active_allocator; /* selectable allocator */
|
||||
|
||||
/* per-allocator sync thread taskqs */
|
||||
taskq_t *spa_sync_tq;
|
||||
spa_syncthread_info_t *spa_syncthreads;
|
||||
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
nvlist_t *spa_label_features; /* Features for reading MOS */
|
||||
@@ -456,7 +466,7 @@ extern char *spa_config_path;
|
||||
extern const char *zfs_deadman_failmode;
|
||||
extern uint_t spa_slop_shift;
|
||||
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
|
||||
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
|
||||
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, zio_t *zio);
|
||||
extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
|
||||
task_func_t *func, void *arg, uint_t flags);
|
||||
extern void spa_load_spares(spa_t *spa);
|
||||
|
||||
@@ -496,6 +496,8 @@ extern taskq_t *system_taskq;
|
||||
extern taskq_t *system_delay_taskq;
|
||||
|
||||
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||
extern taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
|
||||
kthread_t ***);
|
||||
#define taskq_create_proc(a, b, c, d, e, p, f) \
|
||||
(taskq_create(a, b, c, d, e, f))
|
||||
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
|
||||
|
||||
@@ -223,6 +223,9 @@ typedef uint64_t zio_flag_t;
|
||||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||
|
||||
#define ZIO_ALLOCATOR_NONE (-1)
|
||||
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0
|
||||
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
|
||||
|
||||
@@ -526,6 +529,9 @@ struct zio {
|
||||
|
||||
/* Taskq dispatching state */
|
||||
taskq_ent_t io_tqent;
|
||||
|
||||
/* write issue taskq selection, based upon sync thread */
|
||||
taskq_t *io_wr_iss_tq;
|
||||
};
|
||||
|
||||
enum blk_verify_flag {
|
||||
|
||||
Reference in New Issue
Block a user