Align thread priority with Linux defaults

Under Linux filesystem threads responsible for handling I/O are
normally created with the maximum priority.  Non-I/O filesystem
processes run with the default priority.  ZFS should adopt the
same priority scheme under Linux to maintain good performance
and so that it will complete fairly when other Linux filesystems
are active.  The priorities have been updated to the following:

$ ps -eLo rtprio,cls,pid,pri,nice,cmd | egrep 'z_|spl_|zvol|arc|dbu|meta'
     -  TS 10743  19 -20 [spl_kmem_cache]
     -  TS 10744  19 -20 [spl_system_task]
     -  TS 10745  19 -20 [spl_dynamic_tas]
     -  TS 10764  19   0 [dbu_evict]
     -  TS 10765  19   0 [arc_prune]
     -  TS 10766  19   0 [arc_reclaim]
     -  TS 10767  19   0 [arc_user_evicts]
     -  TS 10768  19   0 [l2arc_feed]
     -  TS 10769  39   0 [z_unmount]
     -  TS 10770  39 -20 [zvol]
     -  TS 11011  39 -20 [z_null_iss]
     -  TS 11012  39 -20 [z_null_int]
     -  TS 11013  39 -20 [z_rd_iss]
     -  TS 11014  39 -20 [z_rd_int_0]
     -  TS 11022  38 -19 [z_wr_iss]
     -  TS 11023  39 -20 [z_wr_iss_h]
     -  TS 11024  39 -20 [z_wr_int_0]
     -  TS 11032  39 -20 [z_wr_int_h]
     -  TS 11033  39 -20 [z_fr_iss_0]
     -  TS 11041  39 -20 [z_fr_int]
     -  TS 11042  39 -20 [z_cl_iss]
     -  TS 11043  39 -20 [z_cl_int]
     -  TS 11044  39 -20 [z_ioctl_iss]
     -  TS 11045  39 -20 [z_ioctl_int]
     -  TS 11046  39 -20 [metaslab_group_]
     -  TS 11050  19   0 [z_iput]
     -  TS 11121  38 -19 [z_wr_iss]

Note that under Linux the meaning of a processes priority is inverted
with respect to illumos.  High values on Linux indicate a _low_ priority
while high value on illumos indicate a _high_ priority.

In order to preserve the logical meaning of the minclsyspri and
maxclsyspri macros when they are used by the illumos wrapper functions
their values have been inverted.  This way when changes are merged
from upstream illumos we won't need to remember to invert the macro.
It could also lead to confusion.

This patch depends on https://github.com/zfsonlinux/spl/pull/466.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ned Bass <bass6@llnl.gov>
Closes #3607
This commit is contained in:
Brian Behlendorf 2015-07-24 10:08:31 -07:00
parent c97d30691c
commit 1229323d5f
12 changed files with 29 additions and 20 deletions

View File

@ -233,6 +233,7 @@ typedef struct kthread {
kt_did_t t_tid; kt_did_t t_tid;
thread_func_t t_func; thread_func_t t_func;
void * t_arg; void * t_arg;
pri_t t_pri;
} kthread_t; } kthread_t;
#define curthread zk_thread_current() #define curthread zk_thread_current()
@ -615,8 +616,12 @@ extern void delay(clock_t ticks);
#define max_ncpus 64 #define max_ncpus 64
#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN)) #define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
#define minclsyspri 60 /*
#define maxclsyspri 99 * Process priorities as defined by setpriority(2) and getpriority(2).
*/
#define minclsyspri 19
#define maxclsyspri -20
#define defclsyspri 0
#define CPU_SEQID (pthread_self() & (max_ncpus - 1)) #define CPU_SEQID (pthread_self() & (max_ncpus - 1))

View File

@ -128,6 +128,7 @@ zk_thread_helper(void *arg)
VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
kthread_nr++; kthread_nr++;
VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
(void) setpriority(PRIO_PROCESS, 0, kt->t_pri);
kt->t_tid = pthread_self(); kt->t_tid = pthread_self();
((thread_func_arg_t) kt->t_func)(kt->t_arg); ((thread_func_arg_t) kt->t_func)(kt->t_arg);
@ -151,6 +152,7 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
kt = umem_zalloc(sizeof (kthread_t), UMEM_NOFAIL); kt = umem_zalloc(sizeof (kthread_t), UMEM_NOFAIL);
kt->t_func = func; kt->t_func = func;
kt->t_arg = arg; kt->t_arg = arg;
kt->t_pri = pri;
VERIFY0(pthread_attr_init(&attr)); VERIFY0(pthread_attr_init(&attr));
VERIFY0(pthread_attr_setdetachstate(&attr, detachstate)); VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));

View File

@ -308,7 +308,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
for (t = 0; t < nthreads; t++) for (t = 0; t < nthreads; t++)
VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0, VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0,
taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL); taskq_thread, tq, TS_RUN, NULL, 0, pri)) != NULL);
return (tq); return (tq);
} }
@ -371,7 +371,7 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
void void
system_taskq_init(void) system_taskq_init(void)
{ {
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
TASKQ_DYNAMIC | TASKQ_PREPOPULATE); TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
} }

View File

@ -5431,7 +5431,7 @@ arc_init(void)
mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL); mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t)); bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));
arc_prune_taskq = taskq_create("arc_prune", max_ncpus, minclsyspri, arc_prune_taskq = taskq_create("arc_prune", max_ncpus, defclsyspri,
max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC); max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED, arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
@ -5444,10 +5444,10 @@ arc_init(void)
} }
(void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0, (void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
TS_RUN, minclsyspri); TS_RUN, defclsyspri);
(void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0, (void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0,
TS_RUN, minclsyspri); TS_RUN, defclsyspri);
arc_dead = FALSE; arc_dead = FALSE;
arc_warm = B_FALSE; arc_warm = B_FALSE;
@ -6954,7 +6954,7 @@ l2arc_start(void)
return; return;
(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0, (void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
TS_RUN, minclsyspri); TS_RUN, defclsyspri);
} }
void void

View File

@ -413,7 +413,7 @@ retry:
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
* configuration is not required. * configuration is not required.
*/ */
dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0); dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
} }
void void

View File

@ -1839,7 +1839,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
ntasks = dmu_find_threads; ntasks = dmu_find_threads;
if (ntasks == 0) if (ntasks == 0)
ntasks = vdev_count_leaves(dp->dp_spa) * 4; ntasks = vdev_count_leaves(dp->dp_spa) * 4;
tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks, tq = taskq_create("dmu_objset_find", ntasks, maxclsyspri, ntasks,
INT_MAX, 0); INT_MAX, 0);
if (tq == NULL) { if (tq == NULL) {
kmem_free(dcp, sizeof (*dcp)); kmem_free(dcp, sizeof (*dcp));

View File

@ -170,7 +170,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL); cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, minclsyspri, dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC); max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
return (dp); return (dp);

View File

@ -492,7 +492,7 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
mg->mg_activation_count = 0; mg->mg_activation_count = 0;
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct, mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
minclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC); maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
return (mg); return (mg);
} }

View File

@ -898,11 +898,13 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
pri_t pri = maxclsyspri; pri_t pri = maxclsyspri;
/* /*
* The write issue taskq can be extremely CPU * The write issue taskq can be extremely CPU
* intensive. Run it at slightly lower priority * intensive. Run it at slightly less important
* than the other taskqs. * priority than the other taskqs. Under Linux this
* means incrementing the priority value on platforms
* like illumos it should be decremented.
*/ */
if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
pri--; pri++;
tq = taskq_create_proc(name, value, pri, 50, tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags); INT_MAX, spa->spa_proc, flags);

View File

@ -205,7 +205,7 @@ txg_sync_start(dsl_pool_t *dp)
tx->tx_threads = 2; tx->tx_threads = 2;
tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread, tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
dp, 0, &p0, TS_RUN, minclsyspri); dp, 0, &p0, TS_RUN, defclsyspri);
/* /*
* The sync thread can need a larger-than-default stack size on * The sync thread can need a larger-than-default stack size on
@ -213,7 +213,7 @@ txg_sync_start(dsl_pool_t *dp)
* scrub_visitbp() recursion. * scrub_visitbp() recursion.
*/ */
tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread, tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread,
dp, 0, &p0, TS_RUN, minclsyspri); dp, 0, &p0, TS_RUN, defclsyspri);
mutex_exit(&tx->tx_sync_lock); mutex_exit(&tx->tx_sync_lock);
} }
@ -445,7 +445,7 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
* Commit callback taskq hasn't been created yet. * Commit callback taskq hasn't been created yet.
*/ */
tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb", tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2, max_ncpus, defclsyspri, max_ncpus, max_ncpus * 2,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC); TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
} }

View File

@ -1009,7 +1009,7 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
void void
zfsctl_init(void) zfsctl_init(void)
{ {
zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri, zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
1, 8, TASKQ_PREPOPULATE); 1, 8, TASKQ_PREPOPULATE);
} }

View File

@ -1888,7 +1888,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT(list_is_empty(&zilog->zl_lwb_list));
zilog->zl_get_data = get_data; zilog->zl_get_data = get_data;
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, zilog->zl_clean_taskq = taskq_create("zil_clean", 1, defclsyspri,
2, 2, TASKQ_PREPOPULATE); 2, 2, TASKQ_PREPOPULATE);
return (zilog); return (zilog);