mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-01-14 01:02:04 +03:00
ZIO: Set minimum number of free issue threads to 32
Free issue threads might block waiting for synchronous DDT, BRT or GANG header reads. So unlike other taskqs using ZTI_SCALE to scale with number of CPUs, here we also need some amount of threads to potentially saturate pool reads. I am not sure we always want the 96 threads we had before ZTI_SCALE introduction at #11966 on small systems, but lets make it at least 32. While here, make free taskqs configurable, similar to read and write ones. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Rob Norris <robn@despairlabs.com> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #17903
This commit is contained in:
parent
583db40030
commit
aaf374bd40
@ -104,6 +104,9 @@
|
||||
#define spa_taskq_write_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
|
||||
|
||||
#define spa_taskq_free_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, spa_taskq_free_param, "A"
|
||||
|
||||
#define fletcher_4_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
||||
|
||||
|
||||
@ -2660,12 +2660,50 @@ Set value only applies to pools imported/created after that.
|
||||
Set the queue and thread configuration for the IO read queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
Each of the four values corresponds to the issue, issue high-priority,
|
||||
interrupt, and interrupt high-priority queues.
|
||||
Valid values are
|
||||
.Sy fixed,N,M
|
||||
(M queues with N threads each),
|
||||
.Sy scale[,MIN]
|
||||
(scale with CPUs, minimum MIN total threads),
|
||||
.Sy sync ,
|
||||
and
|
||||
.Sy null .
|
||||
Set values only apply to pools imported/created after that.
|
||||
.
|
||||
.It Sy zio_taskq_write Ns = Ns Sy sync null scale null Pq charp
|
||||
Set the queue and thread configuration for the IO write queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
Each of the four values corresponds to the issue, issue high-priority,
|
||||
interrupt, and interrupt high-priority queues.
|
||||
Valid values are
|
||||
.Sy fixed,N,M
|
||||
(M queues with N threads each),
|
||||
.Sy scale[,MIN]
|
||||
(scale with CPUs, minimum MIN total threads),
|
||||
.Sy sync ,
|
||||
and
|
||||
.Sy null .
|
||||
Set values only apply to pools imported/created after that.
|
||||
.
|
||||
.It Sy zio_taskq_free Ns = Ns Sy scale,32 null null null Pq charp
|
||||
Set the queue and thread configuration for the IO free queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
Each of the four values corresponds to the issue, issue high-priority,
|
||||
interrupt, and interrupt high-priority queues.
|
||||
Valid values are
|
||||
.Sy fixed,N,M
|
||||
(M queues with N threads each),
|
||||
.Sy scale[,MIN]
|
||||
(scale with CPUs, minimum MIN total threads),
|
||||
.Sy sync ,
|
||||
and
|
||||
.Sy null .
|
||||
The default uses a minimum of 32 threads to improve parallelism for
|
||||
DDT and BRT metadata operations during frees.
|
||||
Set values only apply to pools imported/created after that.
|
||||
.
|
||||
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
|
||||
104
module/zfs/spa.c
104
module/zfs/spa.c
@ -141,7 +141,7 @@ typedef enum zti_modes {
|
||||
|
||||
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
|
||||
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
|
||||
#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 }
|
||||
#define ZTI_SCALE(min) { ZTI_MODE_SCALE, (min), 1 }
|
||||
#define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 }
|
||||
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
|
||||
|
||||
@ -180,13 +180,13 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
|
||||
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
|
||||
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
|
||||
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
|
||||
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* READ */
|
||||
#ifdef illumos
|
||||
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
|
||||
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE(0), ZTI_N(5) }, /* WRITE */
|
||||
#else
|
||||
{ ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */
|
||||
{ ZTI_SYNC, ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* WRITE */
|
||||
#endif
|
||||
{ ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
|
||||
{ ZTI_SCALE(32), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
|
||||
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
|
||||
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */
|
||||
{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */
|
||||
@ -1170,7 +1170,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
||||
uint_t value = ztip->zti_value;
|
||||
uint_t count = ztip->zti_count;
|
||||
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
||||
uint_t cpus, flags = TASKQ_DYNAMIC;
|
||||
uint_t cpus, threads, flags = TASKQ_DYNAMIC;
|
||||
|
||||
switch (mode) {
|
||||
case ZTI_MODE_FIXED:
|
||||
@ -1183,8 +1183,8 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
||||
* Create one wr_iss taskq for every 'zio_taskq_write_tpq' CPUs,
|
||||
* not to exceed the number of spa allocators, and align to it.
|
||||
*/
|
||||
cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
|
||||
count = MAX(1, cpus / MAX(1, zio_taskq_write_tpq));
|
||||
threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
|
||||
count = MAX(1, threads / MAX(1, zio_taskq_write_tpq));
|
||||
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
|
||||
count = MIN(count, spa->spa_alloc_count);
|
||||
while (spa->spa_alloc_count % count != 0 &&
|
||||
@ -1201,14 +1201,14 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
||||
break;
|
||||
|
||||
case ZTI_MODE_SCALE:
|
||||
flags |= TASKQ_THREADS_CPU_PCT;
|
||||
/*
|
||||
* We want more taskqs to reduce lock contention, but we want
|
||||
* less for better request ordering and CPU utilization.
|
||||
*/
|
||||
cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
|
||||
threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
|
||||
threads = MAX(threads, value);
|
||||
if (zio_taskq_batch_tpq > 0) {
|
||||
count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) /
|
||||
count = MAX(1, (threads + zio_taskq_batch_tpq / 2) /
|
||||
zio_taskq_batch_tpq);
|
||||
} else {
|
||||
/*
|
||||
@ -1228,13 +1228,23 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
||||
* 128 10 8% 10 100
|
||||
* 256 14 6% 15 210
|
||||
*/
|
||||
count = 1 + cpus / 6;
|
||||
cpus = MIN(threads, boot_ncpus);
|
||||
count = 1 + threads / 6;
|
||||
while (count * count > cpus)
|
||||
count--;
|
||||
}
|
||||
/* Limit each taskq within 100% to not trigger assertion. */
|
||||
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
|
||||
value = (zio_taskq_batch_pct + count / 2) / count;
|
||||
|
||||
/*
|
||||
* Try to represent the number of threads per taskq as percent
|
||||
* of online CPUs to allow scaling with later online/offline.
|
||||
* Fall back to absolute numbers if can't.
|
||||
*/
|
||||
value = (threads * 100 + boot_ncpus * count / 2) /
|
||||
(boot_ncpus * count);
|
||||
if (value < 5 || value > 100)
|
||||
value = MAX(1, (threads + count / 2) / count);
|
||||
else
|
||||
flags |= TASKQ_THREADS_CPU_PCT;
|
||||
break;
|
||||
|
||||
case ZTI_MODE_NULL:
|
||||
@ -1433,8 +1443,30 @@ spa_taskq_param_set(zio_type_t t, char *cfg)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* SCALE is optionally parameterised by minimum number of
|
||||
* threads.
|
||||
*/
|
||||
case ZTI_MODE_SCALE: {
|
||||
const zio_taskq_info_t zti = ZTI_SCALE;
|
||||
unsigned long long mint = 0;
|
||||
if (c != NULL && *c != '\0') {
|
||||
/* Need a number */
|
||||
if (!(isdigit(*c)))
|
||||
break;
|
||||
tok = c;
|
||||
|
||||
/* Take digits */
|
||||
err = ddi_strtoull(tok, &tok, 10, &mint);
|
||||
/* Must succeed, and moved forward */
|
||||
if (err != 0 || tok == c || *tok != '\0')
|
||||
break;
|
||||
|
||||
/* Sanity check */
|
||||
if (mint >= 16384)
|
||||
break;
|
||||
}
|
||||
|
||||
const zio_taskq_info_t zti = ZTI_SCALE(mint);
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
@ -1501,6 +1533,9 @@ spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline)
|
||||
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
|
||||
modes[zti->zti_mode], zti->zti_count,
|
||||
zti->zti_value);
|
||||
else if (zti->zti_mode == ZTI_MODE_SCALE && zti->zti_value > 0)
|
||||
pos += sprintf(&buf[pos], "%s%s,%u", sep,
|
||||
modes[zti->zti_mode], zti->zti_value);
|
||||
else
|
||||
pos += sprintf(&buf[pos], "%s%s", sep,
|
||||
modes[zti->zti_mode]);
|
||||
@ -1520,9 +1555,10 @@ spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg);
|
||||
kmem_free(cfg, strlen(val)+1);
|
||||
kmem_strfree(cfg);
|
||||
return (-err);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
@ -1534,14 +1570,30 @@ spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg);
|
||||
kmem_free(cfg, strlen(val)+1);
|
||||
kmem_strfree(cfg);
|
||||
return (-err);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_free_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_FREE, cfg);
|
||||
kmem_strfree(cfg);
|
||||
return (-err);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_free_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (spa_taskq_param_get(ZIO_TYPE_FREE, buf, TRUE));
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* On FreeBSD load-time parameters can be set up before malloc() is available,
|
||||
@ -1574,6 +1626,19 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
|
||||
return (err);
|
||||
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_free_param(ZFS_MODULE_PARAM_ARGS)
|
||||
{
|
||||
char buf[SPA_TASKQ_PARAM_MAX];
|
||||
int err;
|
||||
|
||||
(void) spa_taskq_param_get(ZIO_TYPE_FREE, buf, FALSE);
|
||||
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||
if (err || req->newptr == NULL)
|
||||
return (err);
|
||||
return (spa_taskq_param_set(ZIO_TYPE_FREE, buf));
|
||||
}
|
||||
#endif
|
||||
#endif /* _KERNEL */
|
||||
|
||||
@ -11273,6 +11338,9 @@ ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
|
||||
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
|
||||
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RW,
|
||||
"Configure IO queues for write IO");
|
||||
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_free,
|
||||
spa_taskq_free_param_set, spa_taskq_free_param_get, ZMOD_RW,
|
||||
"Configure IO queues for free IO");
|
||||
#endif
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_write_tpq, UINT, ZMOD_RW,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user