Cleanup: Use OpenSolaris functions to call scheduler

In our codebase, `cond_resched() and `schedule()` are Linux kernel
functions that have replaced the OpenSolaris `kpreempt()` functions in
the codebase to such an extent that `kpreempt()` in zfs_context.h was
broken. Nobody noticed because we did not actually use it. The header
had defined `kpreempt()` as `yield()`, which works on OpenSolaris and
Illumos where `sched_yield()` is a wrapper for `yield()`, but that does
not work on any other platform.

The FreeBSD platform specific code implemented shims for these, but the
shim for `schedule()` forced us to wait, which is different than merely
rescheduling to another thread as the original Linux code does, while
the shim for `cond_resched()` had the same definition as its kernel
kpreempt() shim.

After studying this, I have concluded that we should reintroduce the
kpreempt() function in platform independent code with the following
definitions:

	- In the Linux kernel:
		kpreempt(unused)	-> cond_resched()

	- In the FreeBSD kernel:
		kpreempt(unused)	-> kern_yield(PRI_USER)

	- In userspace:
		kpreempt(unused)	-> sched_yield()

In userspace, nothing changes from this cleanup. In the kernels, the
function `fm_fini()` will now call `kern_yield(PRI_USER)` on FreeBSD and
`cond_resched()` on Linux.  This is instead of `pause("schedule", 1)` on
FreeBSD and `schedule()` on Linux. This makes our behavior consistent
across platforms.

Note that Linux's SPL continues to use `cond_resched()` and
`schedule()`.  However, those functions have been removed from both the
FreeBSD code and userspace code.

This should have the benefit of making it slightly easier to port the
code to new platforms by making how things should be mapped less
confusing.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13845
This commit is contained in:
Richard Yao 2022-09-12 12:55:37 -04:00 committed by GitHub
parent ede037cda7
commit 0e4c830bc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 15 additions and 14 deletions

View File

@ -31,6 +31,8 @@
#include <sys/proc.h> #include <sys/proc.h>
#define KPREEMPT_SYNC (-1)
#define kpreempt(x) kern_yield(PRI_USER) #define kpreempt(x) kern_yield(PRI_USER)
#endif /* _OPENSOLARIS_SYS_DISP_H_ */ #endif /* _OPENSOLARIS_SYS_DISP_H_ */

View File

@ -33,6 +33,4 @@
#define usleep_range(wakeup, wakeupepsilon) \ #define usleep_range(wakeup, wakeupepsilon) \
pause_sbt("usleep_range", ustosbt(wakeup), \ pause_sbt("usleep_range", ustosbt(wakeup), \
ustosbt(wakeupepsilon - wakeup), 0) ustosbt(wakeupepsilon - wakeup), 0)
#define schedule() pause("schedule", 1)
#endif #endif

View File

@ -45,8 +45,6 @@
#define HAVE_LARGE_STACKS 1 #define HAVE_LARGE_STACKS 1
#endif #endif
#define cond_resched() kern_yield(PRI_USER)
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \ #define taskq_create_sysdc(a, b, d, e, p, dc, f) \
((void) sizeof (dc), taskq_create(a, b, maxclsyspri, d, e, f)) ((void) sizeof (dc), taskq_create(a, b, maxclsyspri, d, e, f))

View File

@ -26,7 +26,9 @@
#include <linux/preempt.h> #include <linux/preempt.h>
#define kpreempt(unused) schedule() #define KPREEMPT_SYNC (-1)
#define kpreempt(unused) cond_resched()
#define kpreempt_disable() preempt_disable() #define kpreempt_disable() preempt_disable()
#define kpreempt_enable() preempt_enable() #define kpreempt_enable() preempt_enable()

View File

@ -219,7 +219,6 @@ typedef pthread_t kthread_t;
#define TS_JOINABLE 0x00000004 #define TS_JOINABLE 0x00000004
#define curthread ((void *)(uintptr_t)pthread_self()) #define curthread ((void *)(uintptr_t)pthread_self())
#define kpreempt(x) yield()
#define getcomm() "unknown" #define getcomm() "unknown"
#define thread_create_named(name, stk, stksize, func, arg, len, \ #define thread_create_named(name, stk, stksize, func, arg, len, \
@ -248,9 +247,11 @@ extern kthread_t *zk_thread_create(void (*func)(void *), void *arg,
#define issig(why) (FALSE) #define issig(why) (FALSE)
#define ISSIG(thr, why) (FALSE) #define ISSIG(thr, why) (FALSE)
#define KPREEMPT_SYNC (-1)
#define kpreempt(x) sched_yield()
#define kpreempt_disable() ((void)0) #define kpreempt_disable() ((void)0)
#define kpreempt_enable() ((void)0) #define kpreempt_enable() ((void)0)
#define cond_resched() sched_yield()
/* /*
* Mutexes * Mutexes

View File

@ -4165,7 +4165,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
* this CPU are able to make progress, make a voluntary preemption * this CPU are able to make progress, make a voluntary preemption
* call here. * call here.
*/ */
cond_resched(); kpreempt(KPREEMPT_SYNC);
return (bytes_evicted); return (bytes_evicted);
} }
@ -10335,7 +10335,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
!dev->l2ad_first) !dev->l2ad_first)
goto out; goto out;
cond_resched(); kpreempt(KPREEMPT_SYNC);
for (;;) { for (;;) {
mutex_enter(&l2arc_rebuild_thr_lock); mutex_enter(&l2arc_rebuild_thr_lock);
if (dev->l2ad_rebuild_cancel) { if (dev->l2ad_rebuild_cancel) {

View File

@ -1142,7 +1142,7 @@ dnode_free_interior_slots(dnode_t *dn)
while (!dnode_slots_tryenter(children, idx, slots)) { while (!dnode_slots_tryenter(children, idx, slots)) {
DNODE_STAT_BUMP(dnode_free_interior_lock_retry); DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
cond_resched(); kpreempt(KPREEMPT_SYNC);
} }
dnode_set_slots(children, idx, slots, DN_SLOT_FREE); dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
@ -1423,7 +1423,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
dnode_slots_rele(dnc, idx, slots); dnode_slots_rele(dnc, idx, slots);
while (!dnode_slots_tryenter(dnc, idx, slots)) { while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry); DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry);
cond_resched(); kpreempt(KPREEMPT_SYNC);
} }
/* /*
@ -1478,7 +1478,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
dnode_slots_rele(dnc, idx, slots); dnode_slots_rele(dnc, idx, slots);
while (!dnode_slots_tryenter(dnc, idx, slots)) { while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_lock_retry); DNODE_STAT_BUMP(dnode_hold_free_lock_retry);
cond_resched(); kpreempt(KPREEMPT_SYNC);
} }
if (!dnode_check_slots_free(dnc, idx, slots)) { if (!dnode_check_slots_free(dnc, idx, slots)) {

View File

@ -1354,7 +1354,7 @@ fm_fini(void)
zevent_flags |= ZEVENT_SHUTDOWN; zevent_flags |= ZEVENT_SHUTDOWN;
while (zevent_waiters > 0) { while (zevent_waiters > 0) {
mutex_exit(&zevent_lock); mutex_exit(&zevent_lock);
schedule(); kpreempt(KPREEMPT_SYNC);
mutex_enter(&zevent_lock); mutex_enter(&zevent_lock);
} }
mutex_exit(&zevent_lock); mutex_exit(&zevent_lock);

View File

@ -1176,7 +1176,7 @@ spa_ld_log_sm_data(spa_t *spa)
} }
/* Load TXG log spacemap into ms_unflushed_allocs/frees. */ /* Load TXG log spacemap into ms_unflushed_allocs/frees. */
cond_resched(); kpreempt(KPREEMPT_SYNC);
ASSERT0(sls->sls_nblocks); ASSERT0(sls->sls_nblocks);
sls->sls_nblocks = space_map_nblocks(sls->sls_sm); sls->sls_nblocks = space_map_nblocks(sls->sls_sm);
spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks; spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;