Merge branch 'zfsonlinux/merge-spl'

Merge a minimal version of the zfsonlinux/spl repository in to the
zfsonlinux/zfs repository.  Care was taken to prevent file conflicts
when merging and to preserve the spl repository history.  The spl
kernel module remains under the GPLv2 license as documented by the
additional THIRDPARTYLICENSE.gplv2 file.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2018-05-29 14:57:07 -07:00
commit 1272941f49
98 changed files with 16031 additions and 0 deletions

View File

@ -0,0 +1,18 @@
dnl #
dnl # 2.6.33 API change,
dnl # Removed .ctl_name from struct ctl_table.
dnl #
AC_DEFUN([SPL_AC_CTL_NAME], [
AC_MSG_CHECKING([whether struct ctl_table has ctl_name])
SPL_LINUX_TRY_COMPILE([
#include <linux/sysctl.h>
],[
struct ctl_table ctl __attribute__ ((unused));
ctl.ctl_name = 0;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_CTL_NAME, 1, [struct ctl_table has ctl_name])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -0,0 +1,19 @@
dnl #
dnl # PaX Linux 2.6.38 - 3.x API
dnl #
AC_DEFUN([SPL_AC_PAX_KERNEL_FILE_FALLOCATE], [
AC_MSG_CHECKING([whether fops->fallocate() exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
long (*fallocate) (struct file *, int, loff_t, loff_t) = NULL;
struct file_operations_no_const fops __attribute__ ((unused)) = {
.fallocate = fallocate,
};
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -0,0 +1,21 @@
dnl #
dnl # 4.9 API change
dnl # group_info changed from 2d array via >blocks to 1d array via ->gid
dnl #
AC_DEFUN([SPL_AC_GROUP_INFO_GID], [
AC_MSG_CHECKING([whether group_info->gid exists])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/cred.h>
],[
struct group_info *gi = groups_alloc(1);
gi->gid[0] = KGIDT_INIT(0);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

View File

@ -0,0 +1,23 @@
dnl #
dnl # 4.7 API change
dnl # i_mutex is changed to i_rwsem. Instead of directly using
dnl # i_mutex/i_rwsem, we should use inode_lock() and inode_lock_shared()
dnl # We test inode_lock_shared because inode_lock is introduced earlier.
dnl #
AC_DEFUN([SPL_AC_INODE_LOCK], [
AC_MSG_CHECKING([whether inode_lock_shared() exists])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
struct inode *inode = NULL;
inode_lock_shared(inode);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_LOCK_SHARED, 1, [yes])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

View File

@ -0,0 +1,72 @@
dnl #
dnl # 2.6.35 API change,
dnl # The cachep->gfpflags member was renamed cachep->allocflags. These are
dnl # private allocation flags which are applied when allocating a new slab
dnl # in kmem_getpages(). Unfortunately there is no public API for setting
dnl # non-default flags.
dnl #
AC_DEFUN([SPL_AC_KMEM_CACHE_ALLOCFLAGS], [
AC_MSG_CHECKING([whether struct kmem_cache has allocflags])
SPL_LINUX_TRY_COMPILE([
#include <linux/slab.h>
],[
struct kmem_cache cachep __attribute__ ((unused));
cachep.allocflags = GFP_KERNEL;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_KMEM_CACHE_ALLOCFLAGS, 1,
[struct kmem_cache has allocflags])
],[
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether struct kmem_cache has gfpflags])
SPL_LINUX_TRY_COMPILE([
#include <linux/slab.h>
],[
struct kmem_cache cachep __attribute__ ((unused));
cachep.gfpflags = GFP_KERNEL;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_KMEM_CACHE_GFPFLAGS, 1,
[struct kmem_cache has gfpflags])
],[
AC_MSG_RESULT(no)
])
])
])
dnl #
dnl # grsecurity API change,
dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by
dnl # kmem_cache_create_usercopy().
dnl #
AC_DEFUN([SPL_AC_KMEM_CACHE_CREATE_USERCOPY], [
AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/slab.h>
static void ctor(void *foo)
{
// fake ctor
}
],[
struct kmem_cache *skc_linux_cache;
const char *name = "test";
size_t size = 4096;
size_t align = 8;
unsigned long flags = 0;
size_t useroffset = 0;
size_t usersize = size - useroffset;
skc_linux_cache = kmem_cache_create_usercopy(
name, size, align, flags, useroffset, usersize, ctor);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_KMEM_CACHE_CREATE_USERCOPY, 1,
[kmem_cache_create_usercopy() exists])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

58
config/kernel-kmem.m4 Normal file
View File

@ -0,0 +1,58 @@
dnl #
dnl # Enabled by default it provides a minimal level of memory tracking.
dnl # A total count of bytes allocated is kept for each alloc and free.
dnl # Then at module unload time a report to the console will be printed
dnl # if memory was leaked.
dnl #
AC_DEFUN([SPL_AC_DEBUG_KMEM], [
AC_ARG_ENABLE([debug-kmem],
[AS_HELP_STRING([--enable-debug-kmem],
[Enable basic kmem accounting @<:@default=no@:>@])],
[],
[enable_debug_kmem=no])
AS_IF([test "x$enable_debug_kmem" = xyes],
[
KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_KMEM"
DEBUG_KMEM="_with_debug_kmem"
AC_DEFINE([DEBUG_KMEM], [1],
[Define to 1 to enable basic kmem accounting])
], [
DEBUG_KMEM="_without_debug_kmem"
])
AC_SUBST(DEBUG_KMEM)
AC_MSG_CHECKING([whether basic kmem accounting is enabled])
AC_MSG_RESULT([$enable_debug_kmem])
])
dnl #
dnl # Disabled by default it provides detailed memory tracking. This
dnl # feature also requires --enable-debug-kmem to be set. When enabled
dnl # not only will total bytes be tracked but also the location of every
dnl # alloc and free. When the SPL module is unloaded a list of all leaked
dnl # addresses and where they were allocated will be dumped to the console.
dnl # Enabling this feature has a significant impact on performance but it
dnl # makes finding memory leaks pretty straight forward.
dnl #
AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [
AC_ARG_ENABLE([debug-kmem-tracking],
[AS_HELP_STRING([--enable-debug-kmem-tracking],
[Enable detailed kmem tracking @<:@default=no@:>@])],
[],
[enable_debug_kmem_tracking=no])
AS_IF([test "x$enable_debug_kmem_tracking" = xyes],
[
KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_KMEM_TRACKING"
DEBUG_KMEM_TRACKING="_with_debug_kmem_tracking"
AC_DEFINE([DEBUG_KMEM_TRACKING], [1],
[Define to 1 to enable detailed kmem tracking])
], [
DEBUG_KMEM_TRACKING="_without_debug_kmem_tracking"
])
AC_SUBST(DEBUG_KMEM_TRACKING)
AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
AC_MSG_RESULT([$enable_debug_kmem_tracking])
])

28
config/kernel-kuidgid.m4 Normal file
View File

@ -0,0 +1,28 @@
dnl #
dnl # User namespaces, use kuid_t in place of uid_t
dnl # where available. Not strictly a user namespaces thing
dnl # but it should prevent surprises
dnl #
AC_DEFUN([SPL_AC_KUIDGID_T], [
AC_MSG_CHECKING([whether kuid_t/kgid_t is available])
SPL_LINUX_TRY_COMPILE([
#include <linux/uidgid.h>
], [
kuid_t userid = KUIDT_INIT(0);
kgid_t groupid = KGIDT_INIT(0);
],[
SPL_LINUX_TRY_COMPILE([
#include <linux/uidgid.h>
], [
kuid_t userid = 0;
kgid_t groupid = 0;
],[
AC_MSG_RESULT(yes; optional)
],[
AC_MSG_RESULT(yes; mandatory)
AC_DEFINE(HAVE_KUIDGID_T, 1, [kuid_t/kgid_t in use])
])
],[
AC_MSG_RESULT(no)
])
])

17
config/kernel-pde-data.m4 Normal file
View File

@ -0,0 +1,17 @@
dnl #
dnl # 3.10 API change,
dnl # PDE is replaced by PDE_DATA
dnl #
AC_DEFUN([SPL_AC_PDE_DATA], [
AC_MSG_CHECKING([whether PDE_DATA() is available])
SPL_LINUX_TRY_COMPILE_SYMBOL([
#include <linux/proc_fs.h>
], [
PDE_DATA(NULL);
], [PDE_DATA], [], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_PDE_DATA, 1, [yes])
],[
AC_MSG_RESULT(no)
])
])

57
config/kernel-rw.m4 Normal file
View File

@ -0,0 +1,57 @@
dnl #
dnl # 4.14 API change
dnl # kernel_write() which was introduced in 3.9 was updated to take
dnl # the offset as a pointer which is needed by vn_rdwr().
dnl #
AC_DEFUN([SPL_AC_KERNEL_WRITE], [
AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
struct file *file = NULL;
const void *buf = NULL;
size_t count = 0;
loff_t *pos = NULL;
ssize_t ret;
ret = kernel_write(file, buf, count, pos);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1,
[kernel_write() take loff_t pointer])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
dnl #
dnl # 4.14 API change
dnl # kernel_read() which has existed for forever was updated to take
dnl # the offset as a pointer which is needed by vn_rdwr().
dnl #
AC_DEFUN([SPL_AC_KERNEL_READ], [
AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
struct file *file = NULL;
void *buf = NULL;
size_t count = 0;
loff_t *pos = NULL;
ssize_t ret;
ret = kernel_read(file, buf, count, pos);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_KERNEL_READ_PPOS, 1,
[kernel_read() take loff_t pointer])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

75
config/kernel-rwsem.m4 Normal file
View File

@ -0,0 +1,75 @@
dnl #
dnl # 3.1 API Change
dnl #
dnl # The rw_semaphore.wait_lock member was changed from spinlock_t to
dnl # raw_spinlock_t at commit ddb6c9b58a19edcfac93ac670b066c836ff729f1.
dnl #
AC_DEFUN([SPL_AC_RWSEM_SPINLOCK_IS_RAW], [
AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/rwsem.h>
],[
struct rw_semaphore dummy_semaphore __attribute__ ((unused));
raw_spinlock_t dummy_lock __attribute__ ((unused)) =
__RAW_SPIN_LOCK_INITIALIZER(dummy_lock);
dummy_semaphore.wait_lock = dummy_lock;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(RWSEM_SPINLOCK_IS_RAW, 1,
[struct rw_semaphore member wait_lock is raw_spinlock_t])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
dnl #
dnl # 3.16 API Change
dnl #
dnl # rwsem-spinlock "->activity" changed to "->count"
dnl #
AC_DEFUN([SPL_AC_RWSEM_ACTIVITY], [
AC_MSG_CHECKING([whether struct rw_semaphore has member activity])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/rwsem.h>
],[
struct rw_semaphore dummy_semaphore __attribute__ ((unused));
dummy_semaphore.activity = 0;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1,
[struct rw_semaphore has member activity])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
dnl #
dnl # 4.8 API Change
dnl #
dnl # rwsem "->count" changed to atomic_long_t type
dnl #
AC_DEFUN([SPL_AC_RWSEM_ATOMIC_LONG_COUNT], [
AC_MSG_CHECKING(
[whether struct rw_semaphore has atomic_long_t member count])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/rwsem.h>
],[
DECLARE_RWSEM(dummy_semaphore);
(void) atomic_long_read(&dummy_semaphore.count);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RWSEM_ATOMIC_LONG_COUNT, 1,
[struct rw_semaphore has atomic_long_t member count])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

56
config/kernel-sched.m4 Normal file
View File

@ -0,0 +1,56 @@
dnl #
dnl # 3.9 API change,
dnl # Moved things from linux/sched.h to linux/sched/rt.h
dnl #
AC_DEFUN([SPL_AC_SCHED_RT_HEADER],
[AC_MSG_CHECKING([whether header linux/sched/rt.h exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/sched.h>
#include <linux/sched/rt.h>
],[
return 0;
],[
AC_DEFINE(HAVE_SCHED_RT_HEADER, 1, [linux/sched/rt.h exists])
AC_MSG_RESULT(yes)
],[
AC_MSG_RESULT(no)
])
])
dnl #
dnl # 4.11 API change,
dnl # Moved things from linux/sched.h to linux/sched/signal.h
dnl #
AC_DEFUN([SPL_AC_SCHED_SIGNAL_HEADER],
[AC_MSG_CHECKING([whether header linux/sched/signal.h exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/sched.h>
#include <linux/sched/signal.h>
],[
return 0;
],[
AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, [linux/sched/signal.h exists])
AC_MSG_RESULT(yes)
],[
AC_MSG_RESULT(no)
])
])
dnl #
dnl # 3.19 API change
dnl # The io_schedule_timeout() function is present in all 2.6.32 kernels
dnl # but it was not exported until Linux 3.19. The RHEL 7.x kernels which
dnl # are based on a 3.10 kernel do export this symbol.
dnl #
AC_DEFUN([SPL_AC_IO_SCHEDULE_TIMEOUT], [
AC_MSG_CHECKING([whether io_schedule_timeout() is available])
SPL_LINUX_TRY_COMPILE_SYMBOL([
#include <linux/sched.h>
], [
(void) io_schedule_timeout(1);
], [io_schedule_timeout], [], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IO_SCHEDULE_TIMEOUT, 1, [yes])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -0,0 +1,39 @@
dnl #
dnl # 3.9 API change
dnl # set_fs_pwd takes const struct path *
dnl #
AC_DEFUN([SPL_AC_SET_FS_PWD_WITH_CONST],
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
[AC_MSG_CHECKING([whether set_fs_pwd() requires const struct path *])
SPL_LINUX_TRY_COMPILE([
#include <linux/spinlock.h>
#include <linux/fs_struct.h>
#include <linux/path.h>
void (*const set_fs_pwd_func)
(struct fs_struct *, const struct path *)
= set_fs_pwd;
],[
return 0;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SET_FS_PWD_WITH_CONST, 1,
[set_fs_pwd() needs const path *])
],[
SPL_LINUX_TRY_COMPILE([
#include <linux/spinlock.h>
#include <linux/fs_struct.h>
#include <linux/path.h>
void (*const set_fs_pwd_func)
(struct fs_struct *, struct path *)
= set_fs_pwd;
],[
return 0;
],[
AC_MSG_RESULT(no)
],[
AC_MSG_ERROR(unknown)
])
])
EXTRA_KCFLAGS="$tmp_flags"
])

125
config/kernel-shrinker.m4 Normal file
View File

@ -0,0 +1,125 @@
AC_DEFUN([SPL_AC_SHRINKER_CALLBACK],[
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
dnl #
dnl # 2.6.23 to 2.6.34 API change
dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask)
dnl #
AC_MSG_CHECKING([whether old 2-argument shrinker exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/mm.h>
int shrinker_cb(int nr_to_scan, gfp_t gfp_mask);
],[
struct shrinker cache_shrinker = {
.shrink = shrinker_cb,
.seeks = DEFAULT_SEEKS,
};
register_shrinker(&cache_shrinker);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_2ARGS_OLD_SHRINKER_CALLBACK, 1,
[old shrinker callback wants 2 args])
],[
AC_MSG_RESULT(no)
dnl #
dnl # 2.6.35 - 2.6.39 API change
dnl # ->shrink(struct shrinker *,
dnl # int nr_to_scan, gfp_t gfp_mask)
dnl #
AC_MSG_CHECKING([whether old 3-argument shrinker exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/mm.h>
int shrinker_cb(struct shrinker *, int nr_to_scan,
gfp_t gfp_mask);
],[
struct shrinker cache_shrinker = {
.shrink = shrinker_cb,
.seeks = DEFAULT_SEEKS,
};
register_shrinker(&cache_shrinker);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_3ARGS_SHRINKER_CALLBACK, 1,
[old shrinker callback wants 3 args])
],[
AC_MSG_RESULT(no)
dnl #
dnl # 3.0 - 3.11 API change
dnl # ->shrink(struct shrinker *,
dnl # struct shrink_control *sc)
dnl #
AC_MSG_CHECKING(
[whether new 2-argument shrinker exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/mm.h>
int shrinker_cb(struct shrinker *,
struct shrink_control *sc);
],[
struct shrinker cache_shrinker = {
.shrink = shrinker_cb,
.seeks = DEFAULT_SEEKS,
};
register_shrinker(&cache_shrinker);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_2ARGS_NEW_SHRINKER_CALLBACK, 1,
[new shrinker callback wants 2 args])
],[
AC_MSG_RESULT(no)
dnl #
dnl # 3.12 API change,
dnl # ->shrink() is logically split in to
dnl # ->count_objects() and ->scan_objects()
dnl #
AC_MSG_CHECKING(
[whether ->count_objects callback exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/mm.h>
unsigned long shrinker_cb(
struct shrinker *,
struct shrink_control *sc);
],[
struct shrinker cache_shrinker = {
.count_objects = shrinker_cb,
.scan_objects = shrinker_cb,
.seeks = DEFAULT_SEEKS,
};
register_shrinker(&cache_shrinker);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK,
1, [->count_objects exists])
],[
AC_MSG_ERROR(error)
])
])
])
])
EXTRA_KCFLAGS="$tmp_flags"
])
dnl #
dnl # 2.6.39 API change,
dnl # Shrinker adjust to use common shrink_control structure.
dnl #
AC_DEFUN([SPL_AC_SHRINK_CONTROL_STRUCT], [
AC_MSG_CHECKING([whether struct shrink_control exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/mm.h>
],[
struct shrink_control sc __attribute__ ((unused));
sc.nr_to_scan = 0;
sc.gfp_mask = GFP_KERNEL;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1,
[struct shrink_control exists])
],[
AC_MSG_RESULT(no)
])
])

24
config/kernel-spinlock.m4 Normal file
View File

@ -0,0 +1,24 @@
dnl #
dnl # 2.6.36 API change,
dnl # The 'struct fs_struct->lock' was changed from a rwlock_t to
dnl # a spinlock_t to improve the fastpath performance.
dnl #
AC_DEFUN([SPL_AC_FS_STRUCT_SPINLOCK], [
AC_MSG_CHECKING([whether struct fs_struct uses spinlock_t])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/sched.h>
#include <linux/fs_struct.h>
],[
static struct fs_struct fs;
spin_lock_init(&fs.lock);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FS_STRUCT_SPINLOCK, 1,
[struct fs_struct uses spinlock_t])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

32
config/kernel-timer.m4 Normal file
View File

@ -0,0 +1,32 @@
dnl #
dnl # 4.15 API change
dnl # https://lkml.org/lkml/2017/11/25/90
dnl # Check if timer_list.func get passed a timer_list or an unsigned long
dnl # (older kernels). Also sanity check the from_timer() and timer_setup()
dnl # macros are available as well, since they will be used in the same newer
dnl # kernels that support the new timer_list.func signature.
dnl #
AC_DEFUN([SPL_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST], [
AC_MSG_CHECKING([whether timer_list.function gets a timer_list])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/timer.h>
void task_expire(struct timer_list *tl) {}
],[
#ifndef from_timer
#error "No from_timer() macro"
#endif
struct timer_list timer;
timer.function = task_expire;
timer_setup(&timer, NULL, 0);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1,
[timer_list.function gets a timer_list])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

View File

@ -0,0 +1,19 @@
dnl #
dnl # config trim unused symbols,
dnl # Verify the kernel has CONFIG_TRIM_UNUSED_KSYMS DISABLED.
dnl #
AC_DEFUN([SPL_AC_CONFIG_TRIM_UNUSED_KSYMS], [
AC_MSG_CHECKING([whether CONFIG_TRIM_UNUSED_KSYM is disabled])
SPL_LINUX_TRY_COMPILE([
#if defined(CONFIG_TRIM_UNUSED_KSYMS)
#error CONFIG_TRIM_UNUSED_KSYMS not defined
#endif
],[ ],[
AC_MSG_RESULT([yes])
],[
AC_MSG_RESULT([no])
AC_MSG_ERROR([
*** This kernel has unused symbols trimming enabled, please disable.
*** Rebuild the kernel with CONFIG_TRIM_UNUSED_KSYMS=n set.])
])
])

View File

@ -0,0 +1,21 @@
dnl #
dnl # 2.6.36 API compatibility.
dnl # Added usleep_range timer.
dnl # usleep_range is a finer precision implementation of msleep
dnl # designed to be a drop-in replacement for udelay where a precise
dnl # sleep / busy-wait is unnecessary.
dnl #
AC_DEFUN([SPL_AC_USLEEP_RANGE], [
AC_MSG_CHECKING([whether usleep_range() is available])
SPL_LINUX_TRY_COMPILE([
#include <linux/delay.h>
],[
usleep_range(0, 0);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_USLEEP_RANGE, 1,
[usleep_range is available])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -0,0 +1,17 @@
dnl #
dnl # 2.6.35 API change,
dnl # Unused 'struct dentry *' removed from vfs_fsync() prototype.
dnl #
AC_DEFUN([SPL_AC_2ARGS_VFS_FSYNC], [
AC_MSG_CHECKING([whether vfs_fsync() wants 2 args])
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
vfs_fsync(NULL, 0);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_2ARGS_VFS_FSYNC, 1, [vfs_fsync() wants 2 args])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -0,0 +1,62 @@
dnl #
dnl # 4.11 API, a528d35e@torvalds/linux
dnl # vfs_getattr(const struct path *p, struct kstat *s, u32 m, unsigned int f)
dnl #
AC_DEFUN([SPL_AC_4ARGS_VFS_GETATTR], [
AC_MSG_CHECKING([whether vfs_getattr() wants 4 args])
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
vfs_getattr((const struct path *)NULL,
(struct kstat *)NULL,
(u32)0,
(unsigned int)0);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_4ARGS_VFS_GETATTR, 1,
[vfs_getattr wants 4 args])
],[
AC_MSG_RESULT(no)
])
])
dnl #
dnl # 3.9 API
dnl # vfs_getattr(struct path *p, struct kstat *s)
dnl #
AC_DEFUN([SPL_AC_2ARGS_VFS_GETATTR], [
AC_MSG_CHECKING([whether vfs_getattr() wants 2 args])
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
vfs_getattr((struct path *) NULL,
(struct kstat *)NULL);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_2ARGS_VFS_GETATTR, 1,
[vfs_getattr wants 2 args])
],[
AC_MSG_RESULT(no)
])
])
dnl #
dnl # <3.9 API
dnl # vfs_getattr(struct vfsmount *v, struct dentry *d, struct kstat *k)
dnl #
AC_DEFUN([SPL_AC_3ARGS_VFS_GETATTR], [
AC_MSG_CHECKING([whether vfs_getattr() wants 3 args])
SPL_LINUX_TRY_COMPILE([
#include <linux/fs.h>
],[
vfs_getattr((struct vfsmount *)NULL,
(struct dentry *)NULL,
(struct kstat *)NULL);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_3ARGS_VFS_GETATTR, 1,
[vfs_getattr wants 3 args])
],[
AC_MSG_RESULT(no)
])
])

76
config/kernel-wait.m4 Normal file
View File

@ -0,0 +1,76 @@
dnl #
dnl # 3.17 API change,
dnl # wait_on_bit() no longer requires an action argument. The former
dnl # "wait_on_bit" interface required an 'action' function to be provided
dnl # which does the actual waiting. There were over 20 such functions in the
dnl # kernel, many of them identical, though most cases can be satisfied by one
dnl # of just two functions: one which uses io_schedule() and one which just
dnl # uses schedule(). This API change was made to consolidate all of those
dnl # redundant wait functions.
dnl #
AC_DEFUN([SPL_AC_WAIT_ON_BIT], [
AC_MSG_CHECKING([whether wait_on_bit() takes an action])
SPL_LINUX_TRY_COMPILE([
#include <linux/wait.h>
],[
int (*action)(void *) = NULL;
wait_on_bit(NULL, 0, action, 0);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes])
],[
AC_MSG_RESULT(no)
])
])
dnl #
dnl # 4.13 API change
dnl # Renamed struct wait_queue -> struct wait_queue_entry.
dnl #
AC_DEFUN([SPL_AC_WAIT_QUEUE_ENTRY_T], [
AC_MSG_CHECKING([whether wait_queue_entry_t exists])
SPL_LINUX_TRY_COMPILE([
#include <linux/wait.h>
],[
wait_queue_entry_t *entry __attribute__ ((unused));
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1,
[wait_queue_entry_t exists])
],[
AC_MSG_RESULT(no)
])
])
dnl #
dnl # 4.13 API change
dnl # Renamed wait_queue_head::task_list -> wait_queue_head::head
dnl # Renamed wait_queue_entry::task_list -> wait_queue_entry::entry
dnl #
AC_DEFUN([SPL_AC_WAIT_QUEUE_HEAD_ENTRY], [
AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist])
SPL_LINUX_TRY_COMPILE([
#include <linux/wait.h>
#ifdef HAVE_WAIT_QUEUE_ENTRY_T
typedef wait_queue_head_t spl_wait_queue_head_t;
typedef wait_queue_entry_t spl_wait_queue_entry_t;
#else
typedef wait_queue_head_t spl_wait_queue_head_t;
typedef wait_queue_t spl_wait_queue_entry_t;
#endif
],[
spl_wait_queue_head_t wq_head;
spl_wait_queue_entry_t wq_entry;
struct list_head *head __attribute__ ((unused));
struct list_head *entry __attribute__ ((unused));
head = &wq_head.head;
entry = &wq_entry.entry;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_WAIT_QUEUE_HEAD_ENTRY, 1,
[wq_head->head and wq_entry->entry exist])
],[
AC_MSG_RESULT(no)
])
])

63
config/kernel-zlib.m4 Normal file
View File

@ -0,0 +1,63 @@
dnl #
dnl # zlib inflate compat,
dnl # Verify the kernel has CONFIG_ZLIB_INFLATE support enabled.
dnl #
AC_DEFUN([SPL_AC_CONFIG_ZLIB_INFLATE], [
AC_MSG_CHECKING([whether CONFIG_ZLIB_INFLATE is defined])
SPL_LINUX_TRY_COMPILE([
#if !defined(CONFIG_ZLIB_INFLATE) && \
!defined(CONFIG_ZLIB_INFLATE_MODULE)
#error CONFIG_ZLIB_INFLATE not defined
#endif
],[ ],[
AC_MSG_RESULT([yes])
],[
AC_MSG_RESULT([no])
AC_MSG_ERROR([
*** This kernel does not include the required zlib inflate support.
*** Rebuild the kernel with CONFIG_ZLIB_INFLATE=y|m set.])
])
])
dnl #
dnl # zlib deflate compat,
dnl # Verify the kernel has CONFIG_ZLIB_DEFLATE support enabled.
dnl #
AC_DEFUN([SPL_AC_CONFIG_ZLIB_DEFLATE], [
AC_MSG_CHECKING([whether CONFIG_ZLIB_DEFLATE is defined])
SPL_LINUX_TRY_COMPILE([
#if !defined(CONFIG_ZLIB_DEFLATE) && \
!defined(CONFIG_ZLIB_DEFLATE_MODULE)
#error CONFIG_ZLIB_DEFLATE not defined
#endif
],[ ],[
AC_MSG_RESULT([yes])
],[
AC_MSG_RESULT([no])
AC_MSG_ERROR([
*** This kernel does not include the required zlib deflate support.
*** Rebuild the kernel with CONFIG_ZLIB_DEFLATE=y|m set.])
])
])
dnl #
dnl # 2.6.39 API compat,
dnl # The function zlib_deflate_workspacesize() now take 2 arguments.
dnl # This was done to avoid always having to allocate the maximum size
dnl # workspace (268K). The caller can now specific the windowBits and
dnl # memLevel compression parameters to get a smaller workspace.
dnl #
AC_DEFUN([SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE],
[AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args])
SPL_LINUX_TRY_COMPILE([
#include <linux/zlib.h>
],[
return zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1,
[zlib_deflate_workspacesize() wants 2 args])
],[
AC_MSG_RESULT(no)
])
])

156
include/spl/rpc/xdr.h Normal file
View File

@ -0,0 +1,156 @@
/*
* Copyright (c) 2008 Sun Microsystems, Inc.
* Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_RPC_XDR_H
#define _SPL_RPC_XDR_H
#include <sys/types.h>
typedef int bool_t;
/*
* XDR enums and types.
*/
enum xdr_op {
XDR_ENCODE,
XDR_DECODE
};
struct xdr_ops;
typedef struct {
struct xdr_ops *x_ops; /* Let caller know xdrmem_create() succeeds */
caddr_t x_addr; /* Current buffer addr */
caddr_t x_addr_end; /* End of the buffer */
enum xdr_op x_op; /* Stream direction */
} XDR;
typedef bool_t (*xdrproc_t)(XDR *xdrs, void *ptr);
struct xdr_ops {
bool_t (*xdr_control)(XDR *, int, void *);
bool_t (*xdr_char)(XDR *, char *);
bool_t (*xdr_u_short)(XDR *, unsigned short *);
bool_t (*xdr_u_int)(XDR *, unsigned *);
bool_t (*xdr_u_longlong_t)(XDR *, u_longlong_t *);
bool_t (*xdr_opaque)(XDR *, caddr_t, const uint_t);
bool_t (*xdr_string)(XDR *, char **, const uint_t);
bool_t (*xdr_array)(XDR *, caddr_t *, uint_t *, const uint_t,
const uint_t, const xdrproc_t);
};
/*
* XDR control operator.
*/
#define XDR_GET_BYTES_AVAIL 1
struct xdr_bytesrec {
bool_t xc_is_last_record;
size_t xc_num_avail;
};
/*
* XDR functions.
*/
void xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
const enum xdr_op op);
/* Currently not needed. If needed later, we'll add it to struct xdr_ops */
#define xdr_destroy(xdrs) ((void) 0)
#define xdr_control(xdrs, req, info) \
(xdrs)->x_ops->xdr_control((xdrs), (req), (info))
/*
* For precaution, the following are defined as static inlines instead of macros
* to get some amount of type safety.
*
* Also, macros wouldn't work in the case where typecasting is done, because it
* must be possible to reference the functions' addresses by these names.
*/
static inline bool_t xdr_char(XDR *xdrs, char *cp)
{
return (xdrs->x_ops->xdr_char(xdrs, cp));
}
static inline bool_t xdr_u_short(XDR *xdrs, unsigned short *usp)
{
return (xdrs->x_ops->xdr_u_short(xdrs, usp));
}
static inline bool_t xdr_short(XDR *xdrs, short *sp)
{
BUILD_BUG_ON(sizeof (short) != 2);
return (xdrs->x_ops->xdr_u_short(xdrs, (unsigned short *) sp));
}
static inline bool_t xdr_u_int(XDR *xdrs, unsigned *up)
{
return (xdrs->x_ops->xdr_u_int(xdrs, up));
}
static inline bool_t xdr_int(XDR *xdrs, int *ip)
{
BUILD_BUG_ON(sizeof (int) != 4);
return (xdrs->x_ops->xdr_u_int(xdrs, (unsigned *)ip));
}
static inline bool_t xdr_u_longlong_t(XDR *xdrs, u_longlong_t *ullp)
{
return (xdrs->x_ops->xdr_u_longlong_t(xdrs, ullp));
}
static inline bool_t xdr_longlong_t(XDR *xdrs, longlong_t *llp)
{
BUILD_BUG_ON(sizeof (longlong_t) != 8);
return (xdrs->x_ops->xdr_u_longlong_t(xdrs, (u_longlong_t *)llp));
}
/*
* Fixed-length opaque data.
*/
static inline bool_t xdr_opaque(XDR *xdrs, caddr_t cp, const uint_t cnt)
{
return (xdrs->x_ops->xdr_opaque(xdrs, cp, cnt));
}
/*
* Variable-length string.
* The *sp buffer must have (maxsize + 1) bytes.
*/
static inline bool_t xdr_string(XDR *xdrs, char **sp, const uint_t maxsize)
{
return (xdrs->x_ops->xdr_string(xdrs, sp, maxsize));
}
/*
* Variable-length arrays.
*/
static inline bool_t xdr_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep,
const uint_t maxsize, const uint_t elsize, const xdrproc_t elproc)
{
return xdrs->x_ops->xdr_array(xdrs, arrp, sizep, maxsize, elsize,
elproc);
}
#endif /* SPL_RPC_XDR_H */

119
include/spl/sys/acl.h Normal file
View File

@ -0,0 +1,119 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_ACL_H
#define _SPL_ACL_H
#include <sys/types.h>
typedef struct ace {
uid_t a_who;
uint32_t a_access_mask;
uint16_t a_flags;
uint16_t a_type;
} ace_t;
typedef struct ace_object {
uid_t a_who; /* uid or gid */
uint32_t a_access_mask; /* read,write,... */
uint16_t a_flags; /* see below */
uint16_t a_type; /* allow or deny */
uint8_t a_obj_type[16]; /* obj type */
uint8_t a_inherit_obj_type[16]; /* inherit obj */
} ace_object_t;
#define MAX_ACL_ENTRIES 1024
#define ACE_READ_DATA 0x00000001
#define ACE_LIST_DIRECTORY 0x00000001
#define ACE_WRITE_DATA 0x00000002
#define ACE_ADD_FILE 0x00000002
#define ACE_APPEND_DATA 0x00000004
#define ACE_ADD_SUBDIRECTORY 0x00000004
#define ACE_READ_NAMED_ATTRS 0x00000008
#define ACE_WRITE_NAMED_ATTRS 0x00000010
#define ACE_EXECUTE 0x00000020
#define ACE_DELETE_CHILD 0x00000040
#define ACE_READ_ATTRIBUTES 0x00000080
#define ACE_WRITE_ATTRIBUTES 0x00000100
#define ACE_DELETE 0x00010000
#define ACE_READ_ACL 0x00020000
#define ACE_WRITE_ACL 0x00040000
#define ACE_WRITE_OWNER 0x00080000
#define ACE_SYNCHRONIZE 0x00100000
#define ACE_FILE_INHERIT_ACE 0x0001
#define ACE_DIRECTORY_INHERIT_ACE 0x0002
#define ACE_NO_PROPAGATE_INHERIT_ACE 0x0004
#define ACE_INHERIT_ONLY_ACE 0x0008
#define ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010
#define ACE_FAILED_ACCESS_ACE_FLAG 0x0020
#define ACE_IDENTIFIER_GROUP 0x0040
#define ACE_INHERITED_ACE 0x0080
#define ACE_OWNER 0x1000
#define ACE_GROUP 0x2000
#define ACE_EVERYONE 0x4000
#define ACE_ACCESS_ALLOWED_ACE_TYPE 0x0000
#define ACE_ACCESS_DENIED_ACE_TYPE 0x0001
#define ACE_SYSTEM_AUDIT_ACE_TYPE 0x0002
#define ACE_SYSTEM_ALARM_ACE_TYPE 0x0003
#define ACL_AUTO_INHERIT 0x0001
#define ACL_PROTECTED 0x0002
#define ACL_DEFAULTED 0x0004
#define ACL_FLAGS_ALL (ACL_AUTO_INHERIT|ACL_PROTECTED|ACL_DEFAULTED)
#define ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04
#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
#define ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09
#define ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A
#define ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B
#define ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C
#define ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D
#define ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E
#define ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F
#define ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10
#define ACE_ALL_TYPES 0x001F
#define ACE_TYPE_FLAGS (ACE_OWNER|ACE_GROUP|ACE_EVERYONE|ACE_IDENTIFIER_GROUP)
/* BEGIN CSTYLED */
#define ACE_ALL_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
/* END CSTYLED */
#define VSA_ACE 0x0010
#define VSA_ACECNT 0x0020
#define VSA_ACE_ALLTYPES 0x0040
#define VSA_ACE_ACLFLAGS 0x0080
#endif /* _SPL_ACL_H */

79
include/spl/sys/atomic.h Normal file
View File

@ -0,0 +1,79 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_ATOMIC_H
#define _SPL_ATOMIC_H
#include <linux/module.h>
#include <linux/spinlock.h>
#include <sys/types.h>
/*
* Map the atomic_* functions to the Linux counterparts. This relies on the
* fact that the atomic types are internally really a uint32 or uint64. If
* this were to change an alternate approach would be needed.
*
* N.B. Due to the limitations of the original API atomicity is not strictly
* preserved when using the 64-bit functions on a 32-bit system. In order
* to support this all consumers would need to be updated to use the Linux
* provided atomic_t and atomic64_t types.
*/
#define atomic_inc_32(v) atomic_inc((atomic_t *)(v))
#define atomic_dec_32(v) atomic_dec((atomic_t *)(v))
#define atomic_add_32(v, i) atomic_add((i), (atomic_t *)(v))
#define atomic_sub_32(v, i) atomic_sub((i), (atomic_t *)(v))
#define atomic_inc_32_nv(v) atomic_inc_return((atomic_t *)(v))
#define atomic_dec_32_nv(v) atomic_dec_return((atomic_t *)(v))
#define atomic_add_32_nv(v, i) atomic_add_return((i), (atomic_t *)(v))
#define atomic_sub_32_nv(v, i) atomic_sub_return((i), (atomic_t *)(v))
#define atomic_cas_32(v, x, y) atomic_cmpxchg((atomic_t *)(v), x, y)
#define atomic_swap_32(v, x) atomic_xchg((atomic_t *)(v), x)
#define atomic_inc_64(v) atomic64_inc((atomic64_t *)(v))
#define atomic_dec_64(v) atomic64_dec((atomic64_t *)(v))
#define atomic_add_64(v, i) atomic64_add((i), (atomic64_t *)(v))
#define atomic_sub_64(v, i) atomic64_sub((i), (atomic64_t *)(v))
#define atomic_inc_64_nv(v) atomic64_inc_return((atomic64_t *)(v))
#define atomic_dec_64_nv(v) atomic64_dec_return((atomic64_t *)(v))
#define atomic_add_64_nv(v, i) atomic64_add_return((i), (atomic64_t *)(v))
#define atomic_sub_64_nv(v, i) atomic64_sub_return((i), (atomic64_t *)(v))
#define atomic_cas_64(v, x, y) atomic64_cmpxchg((atomic64_t *)(v), x, y)
#define atomic_swap_64(v, x) atomic64_xchg((atomic64_t *)(v), x)
#ifdef _LP64
static __inline__ void *
atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
{
return ((void *)atomic_cas_64((volatile uint64_t *)target,
(uint64_t)cmp, (uint64_t)newval));
}
#else /* _LP64 */
static __inline__ void *
atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
{
return ((void *)atomic_cas_32((volatile uint32_t *)target,
(uint32_t)cmp, (uint32_t)newval));
}
#endif /* _LP64 */
#endif /* _SPL_ATOMIC_H */

View File

@ -0,0 +1,78 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_BYTEORDER_H
#define _SPL_BYTEORDER_H
#include <asm/byteorder.h>
#include <sys/isa_defs.h>
#define BSWAP_8(x) ((x) & 0xff)
#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
#define LE_16(x) cpu_to_le16(x)
#define LE_32(x) cpu_to_le32(x)
#define LE_64(x) cpu_to_le64(x)
#define BE_16(x) cpu_to_be16(x)
#define BE_32(x) cpu_to_be32(x)
#define BE_64(x) cpu_to_be64(x)
#define BE_IN8(xa) \
*((uint8_t *)(xa))
#define BE_IN16(xa) \
(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
#define BE_IN32(xa) \
(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
#ifdef _BIG_ENDIAN
static __inline__ uint64_t
htonll(uint64_t n)
{
return (n);
}
static __inline__ uint64_t
ntohll(uint64_t n)
{
return (n);
}
#else
static __inline__ uint64_t
htonll(uint64_t n)
{
return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
}
static __inline__ uint64_t
ntohll(uint64_t n)
{
return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
}
#endif
#endif /* SPL_BYTEORDER_H */

54
include/spl/sys/callb.h Normal file
View File

@ -0,0 +1,54 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CALLB_H
#define _SPL_CALLB_H
#include <linux/module.h>
#include <sys/mutex.h>
#define CALLB_CPR_ASSERT(cp) ASSERT(MUTEX_HELD((cp)->cc_lockp));
typedef struct callb_cpr {
kmutex_t *cc_lockp;
} callb_cpr_t;
#define CALLB_CPR_INIT(cp, lockp, func, name) { \
(cp)->cc_lockp = lockp; \
}
#define CALLB_CPR_SAFE_BEGIN(cp) { \
CALLB_CPR_ASSERT(cp); \
}
#define CALLB_CPR_SAFE_END(cp, lockp) { \
CALLB_CPR_ASSERT(cp); \
}
#define CALLB_CPR_EXIT(cp) { \
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
mutex_exit((cp)->cc_lockp); \
}
#endif /* _SPL_CALLB_H */

52
include/spl/sys/callo.h Normal file
View File

@ -0,0 +1,52 @@
/*
* Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CALLO_H
#define _SPL_CALLO_H
/*
* Callout flags:
*
* CALLOUT_FLAG_ROUNDUP
* Roundup the expiration time to the next resolution boundary.
* If this flag is not specified, the expiration time is rounded down.
* CALLOUT_FLAG_ABSOLUTE
* Normally, the expiration passed to the timeout API functions is an
* expiration interval. If this flag is specified, then it is
* interpreted as the expiration time itself.
* CALLOUT_FLAG_HRESTIME
* Normally, callouts are not affected by changes to system time
* (hrestime). This flag is used to create a callout that is affected
* by system time. If system time changes, these timers must be
* handled in a special way (see callout.c). These are used by condition
* variables and LWP timers that need this behavior.
* CALLOUT_FLAG_32BIT
* Legacy interfaces timeout() and realtime_timeout() pass this flag
* to timeout_generic() to indicate that a 32-bit ID should be allocated.
*/
#define CALLOUT_FLAG_ROUNDUP 0x1
#define CALLOUT_FLAG_ABSOLUTE 0x2
#define CALLOUT_FLAG_HRESTIME 0x4
#define CALLOUT_FLAG_32BIT 0x8
#endif /* _SPL_CALLB_H */

42
include/spl/sys/cmn_err.h Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CMN_ERR_H
#define _SPL_CMN_ERR_H
#include <stdarg.h>
#define CE_CONT 0 /* continuation */
#define CE_NOTE 1 /* notice */
#define CE_WARN 2 /* warning */
#define CE_PANIC 3 /* panic */
#define CE_IGNORE 4 /* print nothing */
extern void cmn_err(int, const char *, ...);
extern void vcmn_err(int, const char *, va_list);
extern void vpanic(const char *, va_list);
#define fm_panic panic
#endif /* SPL_CMN_ERR_H */

80
include/spl/sys/condvar.h Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CONDVAR_H
#define _SPL_CONDVAR_H
#include <linux/module.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/callo.h>
#include <sys/wait.h>
/*
* The kcondvar_t struct is protected by mutex taken externally before
* calling any of the wait/signal funs, and passed into the wait funs.
*/
#define CV_MAGIC 0x346545f4
#define CV_DESTROY 0x346545f5
typedef struct {
int cv_magic;
spl_wait_queue_head_t cv_event;
spl_wait_queue_head_t cv_destroy;
atomic_t cv_refs;
atomic_t cv_waiters;
kmutex_t *cv_mutex;
} kcondvar_t;
typedef enum { CV_DEFAULT = 0, CV_DRIVER } kcv_type_t;
extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *);
extern void __cv_destroy(kcondvar_t *);
extern void __cv_wait(kcondvar_t *, kmutex_t *);
extern void __cv_wait_io(kcondvar_t *, kmutex_t *);
extern void __cv_wait_sig(kcondvar_t *, kmutex_t *);
extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
extern clock_t cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
extern void __cv_signal(kcondvar_t *);
extern void __cv_broadcast(kcondvar_t *c);
#define cv_init(cvp, name, type, arg) __cv_init(cvp, name, type, arg)
#define cv_destroy(cvp) __cv_destroy(cvp)
#define cv_wait(cvp, mp) __cv_wait(cvp, mp)
#define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp)
#define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp)
#define cv_wait_interruptible(cvp, mp) cv_wait_sig(cvp, mp)
#define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t)
#define cv_timedwait_io(cvp, mp, t) __cv_timedwait_io(cvp, mp, t)
#define cv_timedwait_sig(cvp, mp, t) __cv_timedwait_sig(cvp, mp, t)
#define cv_timedwait_interruptible(cvp, mp, t) cv_timedwait_sig(cvp, mp, t)
#define cv_signal(cvp) __cv_signal(cvp)
#define cv_broadcast(cvp) __cv_broadcast(cvp)
#endif /* _SPL_CONDVAR_H */

44
include/spl/sys/console.h Normal file
View File

@ -0,0 +1,44 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CONSOLE_H
#define _SPL_CONSOLE_H
void
console_vprintf(const char *fmt, va_list args)
{
vprintk(fmt, args);
}
void
console_printf(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
console_vprintf(fmt, args);
va_end(args);
}
#endif /* _SPL_CONSOLE_H */

75
include/spl/sys/cred.h Normal file
View File

@ -0,0 +1,75 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CRED_H
#define _SPL_CRED_H
#include <linux/module.h>
#include <linux/cred.h>
#include <sys/types.h>
#include <sys/vfs.h>
typedef struct cred cred_t;
#define kcred ((cred_t *)(init_task.cred))
#define CRED() ((cred_t *)current_cred())
/* Linux 4.9 API change, GROUP_AT was removed */
#ifndef GROUP_AT
#define GROUP_AT(gi, i) ((gi)->gid[i])
#endif
#ifdef HAVE_KUIDGID_T
#define KUID_TO_SUID(x) (__kuid_val(x))
#define KGID_TO_SGID(x) (__kgid_val(x))
#define SUID_TO_KUID(x) (KUIDT_INIT(x))
#define SGID_TO_KGID(x) (KGIDT_INIT(x))
#define KGIDP_TO_SGIDP(x) (&(x)->val)
#else /* HAVE_KUIDGID_T */
#define KUID_TO_SUID(x) (x)
#define KGID_TO_SGID(x) (x)
#define SUID_TO_KUID(x) (x)
#define SGID_TO_KGID(x) (x)
#define KGIDP_TO_SGIDP(x) (x)
#endif /* HAVE_KUIDGID_T */
extern void crhold(cred_t *cr);
extern void crfree(cred_t *cr);
extern uid_t crgetuid(const cred_t *cr);
extern uid_t crgetruid(const cred_t *cr);
extern uid_t crgetsuid(const cred_t *cr);
extern uid_t crgetfsuid(const cred_t *cr);
extern gid_t crgetgid(const cred_t *cr);
extern gid_t crgetrgid(const cred_t *cr);
extern gid_t crgetsgid(const cred_t *cr);
extern gid_t crgetfsgid(const cred_t *cr);
extern int crgetngroups(const cred_t *cr);
extern gid_t *crgetgroups(const cred_t *cr);
extern int groupmember(gid_t gid, const cred_t *cr);
#endif /* _SPL_CRED_H */

30
include/spl/sys/ctype.h Normal file
View File

@ -0,0 +1,30 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_CTYPE_H
#define _SPL_CTYPE_H
#include <linux/ctype.h>
#endif /* SPL_CTYPE_H */

131
include/spl/sys/debug.h Normal file
View File

@ -0,0 +1,131 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Available Solaris debug functions. All of the ASSERT() macros will be
* compiled out when NDEBUG is defined, this is the default behavior for
* the SPL. To enable assertions use the --enable-debug with configure.
* The VERIFY() functions are never compiled out and cannot be disabled.
*
* PANIC() - Panic the node and print message.
* ASSERT() - Assert X is true, if not panic.
* ASSERTV() - Wraps a variable declaration which is only used by ASSERT().
* ASSERT3B() - Assert boolean X OP Y is true, if not panic.
* ASSERT3S() - Assert signed X OP Y is true, if not panic.
* ASSERT3U() - Assert unsigned X OP Y is true, if not panic.
* ASSERT3P() - Assert pointer X OP Y is true, if not panic.
* ASSERT0() - Assert value is zero, if not panic.
* VERIFY() - Verify X is true, if not panic.
* VERIFY3B() - Verify boolean X OP Y is true, if not panic.
* VERIFY3S() - Verify signed X OP Y is true, if not panic.
* VERIFY3U() - Verify unsigned X OP Y is true, if not panic.
* VERIFY3P() - Verify pointer X OP Y is true, if not panic.
* VERIFY0() - Verify value is zero, if not panic.
*/
#ifndef _SPL_DEBUG_H
#define _SPL_DEBUG_H
/*
* Common DEBUG functionality.
*/
int spl_panic(const char *file, const char *func, int line,
const char *fmt, ...);
void spl_dumpstack(void);
/* BEGIN CSTYLED */
#define PANIC(fmt, a...) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, fmt, ## a)
#define VERIFY(cond) \
(void) (unlikely(!(cond)) && \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
"%s", "VERIFY(" #cond ") failed\n"))
#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE, FMT, CAST) do { \
TYPE _verify3_left = (TYPE)(LEFT); \
TYPE _verify3_right = (TYPE)(RIGHT); \
if (!(_verify3_left OP _verify3_right)) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
"VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
"failed (" FMT " " #OP " " FMT ")\n", \
CAST (_verify3_left), CAST (_verify3_right)); \
} while (0)
#define VERIFY3B(x,y,z) VERIFY3_IMPL(x, y, z, boolean_t, "%d", (boolean_t))
#define VERIFY3S(x,y,z) VERIFY3_IMPL(x, y, z, int64_t, "%lld", (long long))
#define VERIFY3U(x,y,z) VERIFY3_IMPL(x, y, z, uint64_t, "%llu", \
(unsigned long long))
#define VERIFY3P(x,y,z) VERIFY3_IMPL(x, y, z, uintptr_t, "%p", (void *))
#define VERIFY0(x) VERIFY3_IMPL(0, ==, x, int64_t, "%lld", (long long))
#define CTASSERT_GLOBAL(x) _CTASSERT(x, __LINE__)
#define CTASSERT(x) { _CTASSERT(x, __LINE__); }
#define _CTASSERT(x, y) __CTASSERT(x, y)
#define __CTASSERT(x, y) \
typedef char __attribute__ ((unused)) \
__compile_time_assertion__ ## y[(x) ? 1 : -1]
/*
* Debugging disabled (--disable-debug)
*/
#ifdef NDEBUG
#define SPL_DEBUG_STR ""
#define ASSERT(x) ((void)0)
#define ASSERTV(x)
#define ASSERT3B(x,y,z) ((void)0)
#define ASSERT3S(x,y,z) ((void)0)
#define ASSERT3U(x,y,z) ((void)0)
#define ASSERT3P(x,y,z) ((void)0)
#define ASSERT0(x) ((void)0)
#define IMPLY(A, B) ((void)0)
#define EQUIV(A, B) ((void)0)
/*
* Debugging enabled (--enable-debug)
*/
#else
#define SPL_DEBUG_STR " (DEBUG mode)"
#define ASSERT(cond) VERIFY(cond)
#define ASSERTV(x) x
#define ASSERT3B(x,y,z) VERIFY3B(x, y, z)
#define ASSERT3S(x,y,z) VERIFY3S(x, y, z)
#define ASSERT3U(x,y,z) VERIFY3U(x, y, z)
#define ASSERT3P(x,y,z) VERIFY3P(x, y, z)
#define ASSERT0(x) VERIFY0(x)
#define IMPLY(A, B) \
((void)(((!(A)) || (B)) || \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
"(" #A ") implies (" #B ")")))
#define EQUIV(A, B) \
((void)((!!(A) == !!(B)) || \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
"(" #A ") is equivalent to (" #B ")")))
/* END CSTYLED */
#endif /* NDEBUG */
#endif /* SPL_DEBUG_H */

34
include/spl/sys/disp.h Normal file
View File

@ -0,0 +1,34 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_DISP_H
#define _SPL_DISP_H
#include <linux/preempt.h>
#define kpreempt(unused) schedule()
#define kpreempt_disable() preempt_disable()
#define kpreempt_enable() preempt_enable()
#endif /* SPL_DISP_H */

40
include/spl/sys/dkio.h Normal file
View File

@ -0,0 +1,40 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_DKIO_H
#define _SPL_DKIO_H
#define DFL_SZ(num_exts) \
(sizeof (dkioc_free_list_t) + (num_exts - 1) * 16)
#define DKIOC (0x04 << 8)
#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */
/*
* ioctl to free space (e.g. SCSI UNMAP) off a disk.
* Pass a dkioc_free_list_t containing a list of extents to be freed.
*/
#define DKIOCFREE (DKIOC|50)
#endif /* _SPL_DKIO_H */

View File

@ -0,0 +1,58 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_DKIOC_UTIL_H
#define _SPL_DKIOC_UTIL_H
#include <sys/dkio.h>
typedef struct dkioc_free_list_ext_s {
uint64_t dfle_start;
uint64_t dfle_length;
} dkioc_free_list_ext_t;
typedef struct dkioc_free_list_s {
uint64_t dfl_flags;
uint64_t dfl_num_exts;
int64_t dfl_offset;
/*
* N.B. this is only an internal debugging API! This is only called
* from debug builds of sd for pre-release checking. Remove before GA!
*/
void (*dfl_ck_func)(uint64_t, uint64_t, void *);
void *dfl_ck_arg;
dkioc_free_list_ext_t dfl_exts[1];
} dkioc_free_list_t;
static inline void dfl_free(dkioc_free_list_t *dfl) {
vmem_free(dfl, DFL_SZ(dfl->dfl_num_exts));
}
static inline dkioc_free_list_t *dfl_alloc(uint64_t dfl_num_exts, int flags) {
return (vmem_zalloc(DFL_SZ(dfl_num_exts), flags));
}
#endif /* _SPL_DKIOC_UTIL_H */

37
include/spl/sys/fcntl.h Normal file
View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_FCNTL_H
#define _SPL_FCNTL_H
#include <asm/fcntl.h>
#define F_FREESP 11
#ifdef CONFIG_64BIT
typedef struct flock flock64_t;
#else
typedef struct flock64 flock64_t;
#endif /* CONFIG_64BIT */
#endif /* _SPL_FCNTL_H */

52
include/spl/sys/file.h Normal file
View File

@ -0,0 +1,52 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_FILE_H
#define _SPL_FILE_H
#define FIGNORECASE 0x00080000
#define FKIOCTL 0x80000000
#define ED_CASE_CONFLICT 0x10
#ifdef HAVE_INODE_LOCK_SHARED
#define spl_inode_lock(ip) inode_lock(ip)
#define spl_inode_unlock(ip) inode_unlock(ip)
#define spl_inode_lock_shared(ip) inode_lock_shared(ip)
#define spl_inode_unlock_shared(ip) inode_unlock_shared(ip)
#define spl_inode_trylock(ip) inode_trylock(ip)
#define spl_inode_trylock_shared(ip) inode_trylock_shared(ip)
#define spl_inode_is_locked(ip) inode_is_locked(ip)
#define spl_inode_lock_nested(ip, s) inode_lock_nested(ip, s)
#else
#define spl_inode_lock(ip) mutex_lock(&(ip)->i_mutex)
#define spl_inode_unlock(ip) mutex_unlock(&(ip)->i_mutex)
#define spl_inode_lock_shared(ip) mutex_lock(&(ip)->i_mutex)
#define spl_inode_unlock_shared(ip) mutex_unlock(&(ip)->i_mutex)
#define spl_inode_trylock(ip) mutex_trylock(&(ip)->i_mutex)
#define spl_inode_trylock_shared(ip) mutex_trylock(&(ip)->i_mutex)
#define spl_inode_is_locked(ip) mutex_is_locked(&(ip)->i_mutex)
#define spl_inode_lock_nested(ip, s) mutex_lock_nested(&(ip)->i_mutex, s)
#endif
#endif /* SPL_FILE_H */

View File

@ -0,0 +1,28 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_INTTYPES_H
#define _SPL_INTTYPES_H
#endif /* SPL_INTTYPES_H */

229
include/spl/sys/isa_defs.h Normal file
View File

@ -0,0 +1,229 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_ISA_DEFS_H
#define _SPL_ISA_DEFS_H
/* x86_64 arch specific defines */
#if defined(__x86_64) || defined(__x86_64__)
#if !defined(__x86_64)
#define __x86_64
#endif
#if !defined(__amd64)
#define __amd64
#endif
#if !defined(__x86)
#define __x86
#endif
#if !defined(_LP64)
#define _LP64
#endif
#define _ALIGNMENT_REQUIRED 1
/* i386 arch specific defines */
#elif defined(__i386) || defined(__i386__)
#if !defined(__i386)
#define __i386
#endif
#if !defined(__x86)
#define __x86
#endif
#if !defined(_ILP32)
#define _ILP32
#endif
#define _ALIGNMENT_REQUIRED 0
/* powerpc (ppc64) arch specific defines */
#elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
#if !defined(__powerpc)
#define __powerpc
#endif
#if !defined(__powerpc__)
#define __powerpc__
#endif
#if defined(__powerpc64__)
#if !defined(_LP64)
#define _LP64
#endif
#else
#if !defined(_ILP32)
#define _ILP32
#endif
#endif
/*
* Illumos doesn't define _ALIGNMENT_REQUIRED for PPC, so default to 1
* out of paranoia.
*/
#define _ALIGNMENT_REQUIRED 1
/* arm arch specific defines */
#elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
#if !defined(__arm)
#define __arm
#endif
#if !defined(__arm__)
#define __arm__
#endif
#if defined(__aarch64__)
#if !defined(_LP64)
#define _LP64
#endif
#else
#if !defined(_ILP32)
#define _ILP32
#endif
#endif
#if defined(__ARMEL__) || defined(__AARCH64EL__)
#define _LITTLE_ENDIAN
#else
#define _BIG_ENDIAN
#endif
/*
* Illumos doesn't define _ALIGNMENT_REQUIRED for ARM, so default to 1
* out of paranoia.
*/
#define _ALIGNMENT_REQUIRED 1
/* sparc arch specific defines */
#elif defined(__sparc) || defined(__sparc__)
#if !defined(__sparc)
#define __sparc
#endif
#if !defined(__sparc__)
#define __sparc__
#endif
#if defined(__arch64__)
#if !defined(_LP64)
#define _LP64
#endif
#else
#if !defined(_ILP32)
#define _ILP32
#endif
#endif
#define _BIG_ENDIAN
#define _SUNOS_VTOC_16
#define _ALIGNMENT_REQUIRED 1
/* s390 arch specific defines */
#elif defined(__s390__)
#if defined(__s390x__)
#if !defined(_LP64)
#define _LP64
#endif
#else
#if !defined(_ILP32)
#define _ILP32
#endif
#endif
#define _BIG_ENDIAN
/*
* Illumos doesn't define _ALIGNMENT_REQUIRED for s390, so default to 1
* out of paranoia.
*/
#define _ALIGNMENT_REQUIRED 1
/* MIPS arch specific defines */
#elif defined(__mips__)
#if defined(__MIPSEB__)
#define _BIG_ENDIAN
#elif defined(__MIPSEL__)
#define _LITTLE_ENDIAN
#else
#error MIPS no endian specified
#endif
#ifndef _LP64
#define _ILP32
#endif
#define _SUNOS_VTOC_16
/*
* Illumos doesn't define _ALIGNMENT_REQUIRED for MIPS, so default to 1
* out of paranoia.
*/
#define _ALIGNMENT_REQUIRED 1
#else
/*
* Currently supported:
* x86_64, i386, arm, powerpc, s390, sparc, and mips
*/
#error "Unsupported ISA type"
#endif
#if defined(_ILP32) && defined(_LP64)
#error "Both _ILP32 and _LP64 are defined"
#endif
#if !defined(_ILP32) && !defined(_LP64)
#error "Neither _ILP32 or _LP64 are defined"
#endif
#include <sys/byteorder.h>
#if defined(__LITTLE_ENDIAN) && !defined(_LITTLE_ENDIAN)
#define _LITTLE_ENDIAN __LITTLE_ENDIAN
#endif
#if defined(__BIG_ENDIAN) && !defined(_BIG_ENDIAN)
#define _BIG_ENDIAN __BIG_ENDIAN
#endif
#if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
#error "Both _LITTLE_ENDIAN and _BIG_ENDIAN are defined"
#endif
#if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
#error "Neither _LITTLE_ENDIAN or _BIG_ENDIAN are defined"
#endif
#endif /* _SPL_ISA_DEFS_H */

185
include/spl/sys/kmem.h Normal file
View File

@ -0,0 +1,185 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_KMEM_H
#define _SPL_KMEM_H
#include <sys/debug.h>
#include <linux/slab.h>
#include <linux/sched.h>
extern int kmem_debugging(void);
extern char *kmem_vasprintf(const char *fmt, va_list ap);
extern char *kmem_asprintf(const char *fmt, ...);
extern char *strdup(const char *str);
extern void strfree(char *str);
/*
* Memory allocation interfaces
*/
#define KM_SLEEP 0x0000 /* can block for memory; success guaranteed */
#define KM_NOSLEEP 0x0001 /* cannot block for memory; may fail */
#define KM_PUSHPAGE 0x0004 /* can block for memory; may use reserve */
#define KM_ZERO 0x1000 /* zero the allocation */
#define KM_VMEM 0x2000 /* caller is vmem_* wrapper */
#define KM_PUBLIC_MASK (KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
static int spl_fstrans_check(void);
/*
* Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion
* function is context aware which means that KM_SLEEP allocations can be
* safely used in syncing contexts which have set PF_FSTRANS.
*/
static inline gfp_t
kmem_flags_convert(int flags)
{
gfp_t lflags = __GFP_NOWARN | __GFP_COMP;
if (flags & KM_NOSLEEP) {
lflags |= GFP_ATOMIC | __GFP_NORETRY;
} else {
lflags |= GFP_KERNEL;
if (spl_fstrans_check())
lflags &= ~(__GFP_IO|__GFP_FS);
}
if (flags & KM_PUSHPAGE)
lflags |= __GFP_HIGH;
if (flags & KM_ZERO)
lflags |= __GFP_ZERO;
return (lflags);
}
typedef struct {
struct task_struct *fstrans_thread;
unsigned int saved_flags;
} fstrans_cookie_t;
/*
* Introduced in Linux 3.9, however this cannot be solely relied on before
* Linux 3.18 as it doesn't turn off __GFP_FS as it should.
*/
#ifdef PF_MEMALLOC_NOIO
#define __SPL_PF_MEMALLOC_NOIO (PF_MEMALLOC_NOIO)
#else
#define __SPL_PF_MEMALLOC_NOIO (0)
#endif
/*
* PF_FSTRANS is removed from Linux 4.12
*/
#ifdef PF_FSTRANS
#define __SPL_PF_FSTRANS (PF_FSTRANS)
#else
#define __SPL_PF_FSTRANS (0)
#endif
#define SPL_FSTRANS (__SPL_PF_FSTRANS|__SPL_PF_MEMALLOC_NOIO)
static inline fstrans_cookie_t
spl_fstrans_mark(void)
{
fstrans_cookie_t cookie;
BUILD_BUG_ON(SPL_FSTRANS == 0);
cookie.fstrans_thread = current;
cookie.saved_flags = current->flags & SPL_FSTRANS;
current->flags |= SPL_FSTRANS;
return (cookie);
}
static inline void
spl_fstrans_unmark(fstrans_cookie_t cookie)
{
ASSERT3P(cookie.fstrans_thread, ==, current);
ASSERT((current->flags & SPL_FSTRANS) == SPL_FSTRANS);
current->flags &= ~SPL_FSTRANS;
current->flags |= cookie.saved_flags;
}
static inline int
spl_fstrans_check(void)
{
return (current->flags & SPL_FSTRANS);
}
/*
* specifically used to check PF_FSTRANS flag, cannot be relied on for
* checking spl_fstrans_mark().
*/
static inline int
__spl_pf_fstrans_check(void)
{
return (current->flags & __SPL_PF_FSTRANS);
}
#ifdef HAVE_ATOMIC64_T
#define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used)
#define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used)
#define kmem_alloc_used_read() atomic64_read(&kmem_alloc_used)
#define kmem_alloc_used_set(size) atomic64_set(&kmem_alloc_used, size)
extern atomic64_t kmem_alloc_used;
extern unsigned long long kmem_alloc_max;
#else /* HAVE_ATOMIC64_T */
#define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used)
#define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used)
#define kmem_alloc_used_read() atomic_read(&kmem_alloc_used)
#define kmem_alloc_used_set(size) atomic_set(&kmem_alloc_used, size)
extern atomic_t kmem_alloc_used;
extern unsigned long long kmem_alloc_max;
#endif /* HAVE_ATOMIC64_T */
extern unsigned int spl_kmem_alloc_warn;
extern unsigned int spl_kmem_alloc_max;
#define kmem_alloc(sz, fl) spl_kmem_alloc((sz), (fl), __func__, __LINE__)
#define kmem_zalloc(sz, fl) spl_kmem_zalloc((sz), (fl), __func__, __LINE__)
#define kmem_free(ptr, sz) spl_kmem_free((ptr), (sz))
extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
extern void spl_kmem_free(const void *ptr, size_t sz);
/*
* The following functions are only available for internal use.
*/
extern void *spl_kmem_alloc_impl(size_t size, int flags, int node);
extern void *spl_kmem_alloc_debug(size_t size, int flags, int node);
extern void *spl_kmem_alloc_track(size_t size, int flags,
const char *func, int line, int node);
extern void spl_kmem_free_impl(const void *buf, size_t size);
extern void spl_kmem_free_debug(const void *buf, size_t size);
extern void spl_kmem_free_track(const void *buf, size_t size);
extern int spl_kmem_init(void);
extern void spl_kmem_fini(void);
#endif /* _SPL_KMEM_H */

View File

@ -0,0 +1,240 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_KMEM_CACHE_H
#define _SPL_KMEM_CACHE_H
#include <sys/taskq.h>
/*
* Slab allocation interfaces. The SPL slab differs from the standard
* Linux SLAB or SLUB primarily in that each cache may be backed by slabs
* allocated from the physical or virtal memory address space. The virtual
* slabs allow for good behavior when allocation large objects of identical
* size. This slab implementation also supports both constructors and
* destructors which the Linux slab does not.
*/
enum {
KMC_BIT_NOTOUCH = 0, /* Don't update ages */
KMC_BIT_NODEBUG = 1, /* Default behavior */
KMC_BIT_NOMAGAZINE = 2, /* XXX: Unsupported */
KMC_BIT_NOHASH = 3, /* XXX: Unsupported */
KMC_BIT_QCACHE = 4, /* XXX: Unsupported */
KMC_BIT_KMEM = 5, /* Use kmem cache */
KMC_BIT_VMEM = 6, /* Use vmem cache */
KMC_BIT_SLAB = 7, /* Use Linux slab cache */
KMC_BIT_OFFSLAB = 8, /* Objects not on slab */
KMC_BIT_NOEMERGENCY = 9, /* Disable emergency objects */
KMC_BIT_DEADLOCKED = 14, /* Deadlock detected */
KMC_BIT_GROWING = 15, /* Growing in progress */
KMC_BIT_REAPING = 16, /* Reaping in progress */
KMC_BIT_DESTROY = 17, /* Destroy in progress */
KMC_BIT_TOTAL = 18, /* Proc handler helper bit */
KMC_BIT_ALLOC = 19, /* Proc handler helper bit */
KMC_BIT_MAX = 20, /* Proc handler helper bit */
};
/* kmem move callback return values */
typedef enum kmem_cbrc {
KMEM_CBRC_YES = 0, /* Object moved */
KMEM_CBRC_NO = 1, /* Object not moved */
KMEM_CBRC_LATER = 2, /* Object not moved, try again later */
KMEM_CBRC_DONT_NEED = 3, /* Neither object is needed */
KMEM_CBRC_DONT_KNOW = 4, /* Object unknown */
} kmem_cbrc_t;
#define KMC_NOTOUCH (1 << KMC_BIT_NOTOUCH)
#define KMC_NODEBUG (1 << KMC_BIT_NODEBUG)
#define KMC_NOMAGAZINE (1 << KMC_BIT_NOMAGAZINE)
#define KMC_NOHASH (1 << KMC_BIT_NOHASH)
#define KMC_QCACHE (1 << KMC_BIT_QCACHE)
#define KMC_KMEM (1 << KMC_BIT_KMEM)
#define KMC_VMEM (1 << KMC_BIT_VMEM)
#define KMC_SLAB (1 << KMC_BIT_SLAB)
#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB)
#define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY)
#define KMC_DEADLOCKED (1 << KMC_BIT_DEADLOCKED)
#define KMC_GROWING (1 << KMC_BIT_GROWING)
#define KMC_REAPING (1 << KMC_BIT_REAPING)
#define KMC_DESTROY (1 << KMC_BIT_DESTROY)
#define KMC_TOTAL (1 << KMC_BIT_TOTAL)
#define KMC_ALLOC (1 << KMC_BIT_ALLOC)
#define KMC_MAX (1 << KMC_BIT_MAX)
#define KMC_REAP_CHUNK INT_MAX
#define KMC_DEFAULT_SEEKS 1
#define KMC_EXPIRE_AGE 0x1 /* Due to age */
#define KMC_EXPIRE_MEM 0x2 /* Due to low memory */
#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */
extern unsigned int spl_kmem_cache_expire;
extern struct list_head spl_kmem_cache_list;
extern struct rw_semaphore spl_kmem_cache_sem;
#define SKM_MAGIC 0x2e2e2e2e
#define SKO_MAGIC 0x20202020
#define SKS_MAGIC 0x22222222
#define SKC_MAGIC 0x2c2c2c2c
#define SPL_KMEM_CACHE_DELAY 15 /* Minimum slab release age */
#define SPL_KMEM_CACHE_REAP 0 /* Default reap everything */
#define SPL_KMEM_CACHE_OBJ_PER_SLAB 8 /* Target objects per slab */
#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 1 /* Minimum objects per slab */
#define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */
#ifdef _LP64
#define SPL_KMEM_CACHE_MAX_SIZE 32 /* Max slab size in MB */
#else
#define SPL_KMEM_CACHE_MAX_SIZE 4 /* Max slab size in MB */
#endif
#define SPL_MAX_ORDER (MAX_ORDER - 3)
#define SPL_MAX_ORDER_NR_PAGES (1 << (SPL_MAX_ORDER - 1))
#ifdef CONFIG_SLUB
#define SPL_MAX_KMEM_CACHE_ORDER PAGE_ALLOC_COSTLY_ORDER
#define SPL_MAX_KMEM_ORDER_NR_PAGES (1 << (SPL_MAX_KMEM_CACHE_ORDER - 1))
#else
#define SPL_MAX_KMEM_ORDER_NR_PAGES (KMALLOC_MAX_SIZE >> PAGE_SHIFT)
#endif
#define POINTER_IS_VALID(p) 0 /* Unimplemented */
#define POINTER_INVALIDATE(pp) /* Unimplemented */
typedef int (*spl_kmem_ctor_t)(void *, void *, int);
typedef void (*spl_kmem_dtor_t)(void *, void *);
typedef void (*spl_kmem_reclaim_t)(void *);
typedef struct spl_kmem_magazine {
uint32_t skm_magic; /* Sanity magic */
uint32_t skm_avail; /* Available objects */
uint32_t skm_size; /* Magazine size */
uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */
unsigned long skm_age; /* Last cache access */
unsigned int skm_cpu; /* Owned by cpu */
void *skm_objs[0]; /* Object pointers */
} spl_kmem_magazine_t;
typedef struct spl_kmem_obj {
uint32_t sko_magic; /* Sanity magic */
void *sko_addr; /* Buffer address */
struct spl_kmem_slab *sko_slab; /* Owned by slab */
struct list_head sko_list; /* Free object list linkage */
} spl_kmem_obj_t;
typedef struct spl_kmem_slab {
uint32_t sks_magic; /* Sanity magic */
uint32_t sks_objs; /* Objects per slab */
struct spl_kmem_cache *sks_cache; /* Owned by cache */
struct list_head sks_list; /* Slab list linkage */
struct list_head sks_free_list; /* Free object list */
unsigned long sks_age; /* Last modify jiffie */
uint32_t sks_ref; /* Ref count used objects */
} spl_kmem_slab_t;
typedef struct spl_kmem_alloc {
struct spl_kmem_cache *ska_cache; /* Owned by cache */
int ska_flags; /* Allocation flags */
taskq_ent_t ska_tqe; /* Task queue entry */
} spl_kmem_alloc_t;
typedef struct spl_kmem_emergency {
struct rb_node ske_node; /* Emergency tree linkage */
unsigned long ske_obj; /* Buffer address */
} spl_kmem_emergency_t;
typedef struct spl_kmem_cache {
uint32_t skc_magic; /* Sanity magic */
uint32_t skc_name_size; /* Name length */
char *skc_name; /* Name string */
spl_kmem_magazine_t **skc_mag; /* Per-CPU warm cache */
uint32_t skc_mag_size; /* Magazine size */
uint32_t skc_mag_refill; /* Magazine refill count */
spl_kmem_ctor_t skc_ctor; /* Constructor */
spl_kmem_dtor_t skc_dtor; /* Destructor */
spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
void *skc_private; /* Private data */
void *skc_vmp; /* Unused */
struct kmem_cache *skc_linux_cache; /* Linux slab cache if used */
unsigned long skc_flags; /* Flags */
uint32_t skc_obj_size; /* Object size */
uint32_t skc_obj_align; /* Object alignment */
uint32_t skc_slab_objs; /* Objects per slab */
uint32_t skc_slab_size; /* Slab size */
uint32_t skc_delay; /* Slab reclaim interval */
uint32_t skc_reap; /* Slab reclaim count */
atomic_t skc_ref; /* Ref count callers */
taskqid_t skc_taskqid; /* Slab reclaim task */
struct list_head skc_list; /* List of caches linkage */
struct list_head skc_complete_list; /* Completely alloc'ed */
struct list_head skc_partial_list; /* Partially alloc'ed */
struct rb_root skc_emergency_tree; /* Min sized objects */
spinlock_t skc_lock; /* Cache lock */
spl_wait_queue_head_t skc_waitq; /* Allocation waiters */
uint64_t skc_slab_fail; /* Slab alloc failures */
uint64_t skc_slab_create; /* Slab creates */
uint64_t skc_slab_destroy; /* Slab destroys */
uint64_t skc_slab_total; /* Slab total current */
uint64_t skc_slab_alloc; /* Slab alloc current */
uint64_t skc_slab_max; /* Slab max historic */
uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc current */
uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_obj_deadlock; /* Obj emergency deadlocks */
uint64_t skc_obj_emergency; /* Obj emergency current */
uint64_t skc_obj_emergency_max; /* Obj emergency max */
} spl_kmem_cache_t;
#define kmem_cache_t spl_kmem_cache_t
extern spl_kmem_cache_t *spl_kmem_cache_create(char *name, size_t size,
size_t align, spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor,
spl_kmem_reclaim_t reclaim, void *priv, void *vmp, int flags);
extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
kmem_cbrc_t (*)(void *, void *, size_t, void *));
extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
extern void spl_kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags);
extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count);
extern void spl_kmem_reap(void);
#define kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) \
spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl)
#define kmem_cache_set_move(skc, move) spl_kmem_cache_set_move(skc, move)
#define kmem_cache_destroy(skc) spl_kmem_cache_destroy(skc)
#define kmem_cache_alloc(skc, flags) spl_kmem_cache_alloc(skc, flags)
#define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj)
#define kmem_cache_reap_now(skc) \
spl_kmem_cache_reap_now(skc, skc->skc_reap)
#define kmem_reap() spl_kmem_reap()
/*
* The following functions are only available for internal use.
*/
extern int spl_kmem_cache_init(void);
extern void spl_kmem_cache_fini(void);
#endif /* _SPL_KMEM_CACHE_H */

42
include/spl/sys/kobj.h Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_KOBJ_H
#define _SPL_KOBJ_H
#include <sys/vnode.h>
typedef struct _buf {
vnode_t *vp;
} _buf_t;
typedef struct _buf buf_t;
extern struct _buf *kobj_open_file(const char *name);
extern void kobj_close_file(struct _buf *file);
extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
unsigned off);
extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
#endif /* SPL_KOBJ_H */

208
include/spl/sys/kstat.h Normal file
View File

@ -0,0 +1,208 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_KSTAT_H
#define _SPL_KSTAT_H
#include <linux/module.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#define KSTAT_STRLEN 255
#define KSTAT_RAW_MAX (128*1024)
/*
* For reference valid classes are:
* disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
*/
#define KSTAT_TYPE_RAW 0 /* can be anything; ks_ndata >= 1 */
#define KSTAT_TYPE_NAMED 1 /* name/value pair; ks_ndata >= 1 */
#define KSTAT_TYPE_INTR 2 /* interrupt stats; ks_ndata == 1 */
#define KSTAT_TYPE_IO 3 /* I/O stats; ks_ndata == 1 */
#define KSTAT_TYPE_TIMER 4 /* event timer; ks_ndata >= 1 */
#define KSTAT_NUM_TYPES 5
#define KSTAT_DATA_CHAR 0
#define KSTAT_DATA_INT32 1
#define KSTAT_DATA_UINT32 2
#define KSTAT_DATA_INT64 3
#define KSTAT_DATA_UINT64 4
#define KSTAT_DATA_LONG 5
#define KSTAT_DATA_ULONG 6
#define KSTAT_DATA_STRING 7
#define KSTAT_NUM_DATAS 8
#define KSTAT_INTR_HARD 0
#define KSTAT_INTR_SOFT 1
#define KSTAT_INTR_WATCHDOG 2
#define KSTAT_INTR_SPURIOUS 3
#define KSTAT_INTR_MULTSVC 4
#define KSTAT_NUM_INTRS 5
#define KSTAT_FLAG_VIRTUAL 0x01
#define KSTAT_FLAG_VAR_SIZE 0x02
#define KSTAT_FLAG_WRITABLE 0x04
#define KSTAT_FLAG_PERSISTENT 0x08
#define KSTAT_FLAG_DORMANT 0x10
#define KSTAT_FLAG_UNSUPPORTED \
(KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_WRITABLE | \
KSTAT_FLAG_PERSISTENT | KSTAT_FLAG_DORMANT)
#define KS_MAGIC 0x9d9d9d9d
/* Dynamic updates */
#define KSTAT_READ 0
#define KSTAT_WRITE 1
struct kstat_s;
typedef struct kstat_s kstat_t;
typedef int kid_t; /* unique kstat id */
typedef int kstat_update_t(struct kstat_s *, int); /* dynamic update cb */
typedef struct kstat_module {
char ksm_name[KSTAT_STRLEN+1]; /* module name */
struct list_head ksm_module_list; /* module linkage */
struct list_head ksm_kstat_list; /* list of kstat entries */
struct proc_dir_entry *ksm_proc; /* proc entry */
} kstat_module_t;
typedef struct kstat_raw_ops {
int (*headers)(char *buf, size_t size);
int (*data)(char *buf, size_t size, void *data);
void *(*addr)(kstat_t *ksp, loff_t index);
} kstat_raw_ops_t;
struct kstat_s {
int ks_magic; /* magic value */
kid_t ks_kid; /* unique kstat ID */
hrtime_t ks_crtime; /* creation time */
hrtime_t ks_snaptime; /* last access time */
char ks_module[KSTAT_STRLEN+1]; /* provider module name */
int ks_instance; /* provider module instance */
char ks_name[KSTAT_STRLEN+1]; /* kstat name */
char ks_class[KSTAT_STRLEN+1]; /* kstat class */
uchar_t ks_type; /* kstat data type */
uchar_t ks_flags; /* kstat flags */
void *ks_data; /* kstat type-specific data */
uint_t ks_ndata; /* # of data records */
size_t ks_data_size; /* size of kstat data section */
struct proc_dir_entry *ks_proc; /* proc linkage */
kstat_update_t *ks_update; /* dynamic updates */
void *ks_private; /* private data */
kmutex_t ks_private_lock; /* kstat private data lock */
kmutex_t *ks_lock; /* kstat data lock */
struct list_head ks_list; /* kstat linkage */
kstat_module_t *ks_owner; /* kstat module linkage */
kstat_raw_ops_t ks_raw_ops; /* ops table for raw type */
char *ks_raw_buf; /* buf used for raw ops */
size_t ks_raw_bufsize; /* size of raw ops buffer */
};
typedef struct kstat_named_s {
char name[KSTAT_STRLEN]; /* name of counter */
uchar_t data_type; /* data type */
union {
char c[16]; /* 128-bit int */
int32_t i32; /* 32-bit signed int */
uint32_t ui32; /* 32-bit unsigned int */
int64_t i64; /* 64-bit signed int */
uint64_t ui64; /* 64-bit unsigned int */
long l; /* native signed long */
ulong_t ul; /* native unsigned long */
struct {
union {
char *ptr; /* NULL-term string */
char __pad[8]; /* 64-bit padding */
} addr;
uint32_t len; /* # bytes for strlen + '\0' */
} string;
} value;
} kstat_named_t;
#define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
#define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
typedef struct kstat_intr {
uint_t intrs[KSTAT_NUM_INTRS];
} kstat_intr_t;
typedef struct kstat_io {
u_longlong_t nread; /* number of bytes read */
u_longlong_t nwritten; /* number of bytes written */
uint_t reads; /* number of read operations */
uint_t writes; /* number of write operations */
hrtime_t wtime; /* cumulative wait (pre-service) time */
hrtime_t wlentime; /* cumulative wait len*time product */
hrtime_t wlastupdate; /* last time wait queue changed */
hrtime_t rtime; /* cumulative run (service) time */
hrtime_t rlentime; /* cumulative run length*time product */
hrtime_t rlastupdate; /* last time run queue changed */
uint_t wcnt; /* count of elements in wait state */
uint_t rcnt; /* count of elements in run state */
} kstat_io_t;
typedef struct kstat_timer {
char name[KSTAT_STRLEN+1]; /* event name */
u_longlong_t num_events; /* number of events */
hrtime_t elapsed_time; /* cumulative elapsed time */
hrtime_t min_time; /* shortest event duration */
hrtime_t max_time; /* longest event duration */
hrtime_t start_time; /* previous event start time */
hrtime_t stop_time; /* previous event stop time */
} kstat_timer_t;
int spl_kstat_init(void);
void spl_kstat_fini(void);
extern void __kstat_set_raw_ops(kstat_t *ksp,
int (*headers)(char *buf, size_t size),
int (*data)(char *buf, size_t size, void *data),
void* (*addr)(kstat_t *ksp, loff_t index));
extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
const char *ks_name, const char *ks_class, uchar_t ks_type,
uint_t ks_ndata, uchar_t ks_flags);
extern void __kstat_install(kstat_t *ksp);
extern void __kstat_delete(kstat_t *ksp);
extern void kstat_waitq_enter(kstat_io_t *);
extern void kstat_waitq_exit(kstat_io_t *);
extern void kstat_runq_enter(kstat_io_t *);
extern void kstat_runq_exit(kstat_io_t *);
#define kstat_set_raw_ops(k, h, d, a) \
__kstat_set_raw_ops(k, h, d, a)
#define kstat_create(m, i, n, c, t, s, f) \
__kstat_create(m, i, n, c, t, s, f)
#define kstat_install(k) __kstat_install(k)
#define kstat_delete(k) __kstat_delete(k)
#endif /* _SPL_KSTAT_H */

208
include/spl/sys/list.h Normal file
View File

@ -0,0 +1,208 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_LIST_H
#define _SPL_LIST_H
#include <sys/types.h>
#include <linux/list.h>
/*
* NOTE: I have implemented the Solaris list API in terms of the native
* linux API. This has certain advantages in terms of leveraging the linux
* list debugging infrastructure, but it also means that the internals of a
* list differ slightly than on Solaris. This is not a problem as long as
* all callers stick to the published API. The two major differences are:
*
* 1) A list_node_t is mapped to a linux list_head struct which changes
* the name of the list_next/list_prev pointers to next/prev respectively.
*
* 2) A list_node_t which is not attached to a list on Solaris is denoted
* by having its list_next/list_prev pointers set to NULL. Under linux
* the next/prev pointers are set to LIST_POISON1 and LIST_POISON2
* respectively. At this moment this only impacts the implementation
* of the list_link_init() and list_link_active() functions.
*/
typedef struct list_head list_node_t;
typedef struct list {
size_t list_size;
size_t list_offset;
list_node_t list_head;
} list_t;
#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
static inline int
list_is_empty(list_t *list)
{
return (list_empty(&list->list_head));
}
static inline void
list_link_init(list_node_t *node)
{
node->next = LIST_POISON1;
node->prev = LIST_POISON2;
}
static inline void
list_create(list_t *list, size_t size, size_t offset)
{
list->list_size = size;
list->list_offset = offset;
INIT_LIST_HEAD(&list->list_head);
}
static inline void
list_destroy(list_t *list)
{
list_del(&list->list_head);
}
static inline void
list_insert_head(list_t *list, void *object)
{
list_add(list_d2l(list, object), &list->list_head);
}
static inline void
list_insert_tail(list_t *list, void *object)
{
list_add_tail(list_d2l(list, object), &list->list_head);
}
static inline void
list_insert_after(list_t *list, void *object, void *nobject)
{
if (object == NULL)
list_insert_head(list, nobject);
else
list_add(list_d2l(list, nobject), list_d2l(list, object));
}
static inline void
list_insert_before(list_t *list, void *object, void *nobject)
{
if (object == NULL)
list_insert_tail(list, nobject);
else
list_add_tail(list_d2l(list, nobject), list_d2l(list, object));
}
static inline void
list_remove(list_t *list, void *object)
{
list_del(list_d2l(list, object));
}
static inline void *
list_remove_head(list_t *list)
{
list_node_t *head = list->list_head.next;
if (head == &list->list_head)
return (NULL);
list_del(head);
return (list_object(list, head));
}
static inline void *
list_remove_tail(list_t *list)
{
list_node_t *tail = list->list_head.prev;
if (tail == &list->list_head)
return (NULL);
list_del(tail);
return (list_object(list, tail));
}
static inline void *
list_head(list_t *list)
{
if (list_is_empty(list))
return (NULL);
return (list_object(list, list->list_head.next));
}
static inline void *
list_tail(list_t *list)
{
if (list_is_empty(list))
return (NULL);
return (list_object(list, list->list_head.prev));
}
static inline void *
list_next(list_t *list, void *object)
{
list_node_t *node = list_d2l(list, object);
if (node->next != &list->list_head)
return (list_object(list, node->next));
return (NULL);
}
static inline void *
list_prev(list_t *list, void *object)
{
list_node_t *node = list_d2l(list, object);
if (node->prev != &list->list_head)
return (list_object(list, node->prev));
return (NULL);
}
static inline int
list_link_active(list_node_t *node)
{
return (node->next != LIST_POISON1) && (node->prev != LIST_POISON2);
}
static inline void
spl_list_move_tail(list_t *dst, list_t *src)
{
list_splice_init(&src->list_head, dst->list_head.prev);
}
#define list_move_tail(dst, src) spl_list_move_tail(dst, src)
static inline void
list_link_replace(list_node_t *old_node, list_node_t *new_node)
{
new_node->next = old_node->next;
new_node->prev = old_node->prev;
old_node->prev->next = new_node;
old_node->next->prev = new_node;
list_link_init(old_node);
}
#endif /* SPL_LIST_H */

32
include/spl/sys/mode.h Normal file
View File

@ -0,0 +1,32 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_MODE_H
#define _SPL_MODE_H
#define IFTOVT(mode) vn_mode_to_vtype(mode)
#define VTTOIF(vtype) vn_vtype_to_mode(vtype)
#define MAKEIMODE(T, M) (VTTOIF(T) | ((M) & ~S_IFMT))
#endif /* SPL_MODE_H */

184
include/spl/sys/mutex.h Normal file
View File

@ -0,0 +1,184 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_MUTEX_H
#define _SPL_MUTEX_H
#include <sys/types.h>
#include <linux/mutex.h>
#include <linux/lockdep.h>
typedef enum {
MUTEX_DEFAULT = 0,
MUTEX_SPIN = 1,
MUTEX_ADAPTIVE = 2,
MUTEX_NOLOCKDEP = 3
} kmutex_type_t;
typedef struct {
struct mutex m_mutex;
spinlock_t m_lock; /* used for serializing mutex_exit */
kthread_t *m_owner;
#ifdef CONFIG_LOCKDEP
kmutex_type_t m_type;
#endif /* CONFIG_LOCKDEP */
} kmutex_t;
#define MUTEX(mp) (&((mp)->m_mutex))
static inline void
spl_mutex_set_owner(kmutex_t *mp)
{
mp->m_owner = current;
}
static inline void
spl_mutex_clear_owner(kmutex_t *mp)
{
mp->m_owner = NULL;
}
#define mutex_owner(mp) (ACCESS_ONCE((mp)->m_owner))
#define mutex_owned(mp) (mutex_owner(mp) == current)
#define MUTEX_HELD(mp) mutex_owned(mp)
#define MUTEX_NOT_HELD(mp) (!MUTEX_HELD(mp))
#ifdef CONFIG_LOCKDEP
static inline void
spl_mutex_set_type(kmutex_t *mp, kmutex_type_t type)
{
mp->m_type = type;
}
static inline void
spl_mutex_lockdep_off_maybe(kmutex_t *mp) \
{ \
if (mp && mp->m_type == MUTEX_NOLOCKDEP) \
lockdep_off(); \
}
static inline void
spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
{ \
if (mp && mp->m_type == MUTEX_NOLOCKDEP) \
lockdep_on(); \
}
#else /* CONFIG_LOCKDEP */
#define spl_mutex_set_type(mp, type)
#define spl_mutex_lockdep_off_maybe(mp)
#define spl_mutex_lockdep_on_maybe(mp)
#endif /* CONFIG_LOCKDEP */
/*
* The following functions must be a #define and not static inline.
* This ensures that the native linux mutex functions (lock/unlock)
* will be correctly located in the users code which is important
* for the built in kernel lock analysis tools
*/
#undef mutex_init
#define mutex_init(mp, name, type, ibc) \
{ \
static struct lock_class_key __key; \
ASSERT(type == MUTEX_DEFAULT || type == MUTEX_NOLOCKDEP); \
\
__mutex_init(MUTEX(mp), (name) ? (#name) : (#mp), &__key); \
spin_lock_init(&(mp)->m_lock); \
spl_mutex_clear_owner(mp); \
spl_mutex_set_type(mp, type); \
}
#undef mutex_destroy
#define mutex_destroy(mp) \
{ \
VERIFY3P(mutex_owner(mp), ==, NULL); \
}
/* BEGIN CSTYLED */
#define mutex_tryenter(mp) \
({ \
int _rc_; \
\
spl_mutex_lockdep_off_maybe(mp); \
if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1) \
spl_mutex_set_owner(mp); \
spl_mutex_lockdep_on_maybe(mp); \
\
_rc_; \
})
/* END CSTYLED */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define mutex_enter_nested(mp, subclass) \
{ \
ASSERT3P(mutex_owner(mp), !=, current); \
spl_mutex_lockdep_off_maybe(mp); \
mutex_lock_nested(MUTEX(mp), (subclass)); \
spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \
}
#else /* CONFIG_DEBUG_LOCK_ALLOC */
#define mutex_enter_nested(mp, subclass) \
{ \
ASSERT3P(mutex_owner(mp), !=, current); \
spl_mutex_lockdep_off_maybe(mp); \
mutex_lock(MUTEX(mp)); \
spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \
}
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
#define mutex_enter(mp) mutex_enter_nested((mp), 0)
/*
* The reason for the spinlock:
*
* The Linux mutex is designed with a fast-path/slow-path design such that it
* does not guarantee serialization upon itself, allowing a race where latter
* acquirers finish mutex_unlock before former ones.
*
* The race renders it unsafe to be used for serializing the freeing of an
* object in which the mutex is embedded, where the latter acquirer could go
* on to free the object while the former one is still doing mutex_unlock and
* causing memory corruption.
*
* However, there are many places in ZFS where the mutex is used for
* serializing object freeing, and the code is shared among other OSes without
* this issue. Thus, we need the spinlock to force the serialization on
* mutex_exit().
*
* See http://lwn.net/Articles/575477/ for the information about the race.
*/
#define mutex_exit(mp) \
{ \
spl_mutex_clear_owner(mp); \
spin_lock(&(mp)->m_lock); \
spl_mutex_lockdep_off_maybe(mp); \
mutex_unlock(MUTEX(mp)); \
spl_mutex_lockdep_on_maybe(mp); \
spin_unlock(&(mp)->m_lock); \
/* NOTE: do not dereference mp after this point */ \
}
int spl_mutex_init(void);
void spl_mutex_fini(void);
#endif /* _SPL_MUTEX_H */

36
include/spl/sys/param.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_PARAM_H
#define _SPL_PARAM_H
#include <asm/page.h>
/* Pages to bytes and back */
#define ptob(pages) ((pages) << PAGE_SHIFT)
#define btop(bytes) ((bytes) >> PAGE_SHIFT)
#define MAXUID UINT32_MAX
#endif /* SPL_PARAM_H */

35
include/spl/sys/proc.h Normal file
View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_PROC_H
#define _SPL_PROC_H
#include <linux/proc_fs.h>
extern struct proc_dir_entry *proc_spl_kstat;
int spl_proc_init(void);
void spl_proc_fini(void);
#endif /* SPL_PROC_H */

View File

@ -0,0 +1,32 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_PROCESSOR_H
#define _SPL_PROCESSOR_H
#define getcpuid() smp_processor_id()
typedef int processorid_t;
#endif /* _SPL_PROCESSOR_H */

40
include/spl/sys/random.h Normal file
View File

@ -0,0 +1,40 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_RANDOM_H
#define _SPL_RANDOM_H
#include <linux/module.h>
#include <linux/random.h>
static __inline__ int
random_get_bytes(uint8_t *ptr, size_t len)
{
get_random_bytes((void *)ptr, (int)len);
return (0);
}
extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
#endif /* _SPL_RANDOM_H */

273
include/spl/sys/rwlock.h Normal file
View File

@ -0,0 +1,273 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_RWLOCK_H
#define _SPL_RWLOCK_H
#include <sys/types.h>
#include <linux/rwsem.h>
/* Linux kernel compatibility */
#if defined(CONFIG_PREEMPT_RT_FULL)
#define SPL_RWSEM_SINGLE_READER_VALUE (1)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (0)
#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
#define SPL_RWSEM_SINGLE_READER_VALUE (1)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1)
#else
#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS)
#endif
/* Linux 3.16 changed activity to count for rwsem-spinlock */
#if defined(CONFIG_PREEMPT_RT_FULL)
#define RWSEM_COUNT(sem) sem->read_depth
#elif defined(HAVE_RWSEM_ACTIVITY)
#define RWSEM_COUNT(sem) sem->activity
/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */
#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT)
#define RWSEM_COUNT(sem) atomic_long_read(&(sem)->count)
#else
#define RWSEM_COUNT(sem) sem->count
#endif
#if defined(RWSEM_SPINLOCK_IS_RAW)
#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl)
#define spl_rwsem_unlock_irqrestore(lk, fl) \
raw_spin_unlock_irqrestore(lk, fl)
#define spl_rwsem_trylock_irqsave(lk, fl) raw_spin_trylock_irqsave(lk, fl)
#else
#define spl_rwsem_lock_irqsave(lk, fl) spin_lock_irqsave(lk, fl)
#define spl_rwsem_unlock_irqrestore(lk, fl) spin_unlock_irqrestore(lk, fl)
#define spl_rwsem_trylock_irqsave(lk, fl) spin_trylock_irqsave(lk, fl)
#endif /* RWSEM_SPINLOCK_IS_RAW */
#define spl_rwsem_is_locked(rwsem) rwsem_is_locked(rwsem)
typedef enum {
RW_DRIVER = 2,
RW_DEFAULT = 4,
RW_NOLOCKDEP = 5
} krw_type_t;
typedef enum {
RW_NONE = 0,
RW_WRITER = 1,
RW_READER = 2
} krw_t;
/*
* If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner
* field, so we don't need our own.
*/
typedef struct {
struct rw_semaphore rw_rwlock;
#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
kthread_t *rw_owner;
#endif
#ifdef CONFIG_LOCKDEP
krw_type_t rw_type;
#endif /* CONFIG_LOCKDEP */
} krwlock_t;
#define SEM(rwp) (&(rwp)->rw_rwlock)
static inline void
spl_rw_set_owner(krwlock_t *rwp)
{
/*
* If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write,
* downgrade_write and __init_rwsem will set/clear owner for us.
*/
#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
rwp->rw_owner = current;
#endif
}
static inline void
spl_rw_clear_owner(krwlock_t *rwp)
{
#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
rwp->rw_owner = NULL;
#endif
}
static inline kthread_t *
rw_owner(krwlock_t *rwp)
{
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
return (SEM(rwp)->owner);
#else
return (rwp->rw_owner);
#endif
}
#ifdef CONFIG_LOCKDEP
static inline void
spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
{
rwp->rw_type = type;
}
static inline void
spl_rw_lockdep_off_maybe(krwlock_t *rwp) \
{ \
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
lockdep_off(); \
}
static inline void
spl_rw_lockdep_on_maybe(krwlock_t *rwp) \
{ \
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
lockdep_on(); \
}
#else /* CONFIG_LOCKDEP */
#define spl_rw_set_type(rwp, type)
#define spl_rw_lockdep_off_maybe(rwp)
#define spl_rw_lockdep_on_maybe(rwp)
#endif /* CONFIG_LOCKDEP */
static inline int
RW_READ_HELD(krwlock_t *rwp)
{
/*
* Linux 4.8 will set owner to 1 when read held instead of leave it
* NULL. So we check whether owner <= 1.
*/
return (spl_rwsem_is_locked(SEM(rwp)) &&
(unsigned long)rw_owner(rwp) <= 1);
}
static inline int
RW_WRITE_HELD(krwlock_t *rwp)
{
return (rw_owner(rwp) == current);
}
static inline int
RW_LOCK_HELD(krwlock_t *rwp)
{
return (spl_rwsem_is_locked(SEM(rwp)));
}
/*
* The following functions must be a #define and not static inline.
* This ensures that the native linux semaphore functions (down/up)
* will be correctly located in the users code which is important
* for the built in kernel lock analysis tools
*/
/* BEGIN CSTYLED */
#define rw_init(rwp, name, type, arg) \
({ \
static struct lock_class_key __key; \
ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP); \
\
__init_rwsem(SEM(rwp), #rwp, &__key); \
spl_rw_clear_owner(rwp); \
spl_rw_set_type(rwp, type); \
})
#define rw_destroy(rwp) \
({ \
VERIFY(!RW_LOCK_HELD(rwp)); \
})
#define rw_tryenter(rwp, rw) \
({ \
int _rc_ = 0; \
\
spl_rw_lockdep_off_maybe(rwp); \
switch (rw) { \
case RW_READER: \
_rc_ = down_read_trylock(SEM(rwp)); \
break; \
case RW_WRITER: \
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
spl_rw_set_owner(rwp); \
break; \
default: \
VERIFY(0); \
} \
spl_rw_lockdep_on_maybe(rwp); \
_rc_; \
})
#define rw_enter(rwp, rw) \
({ \
spl_rw_lockdep_off_maybe(rwp); \
switch (rw) { \
case RW_READER: \
down_read(SEM(rwp)); \
break; \
case RW_WRITER: \
down_write(SEM(rwp)); \
spl_rw_set_owner(rwp); \
break; \
default: \
VERIFY(0); \
} \
spl_rw_lockdep_on_maybe(rwp); \
})
#define rw_exit(rwp) \
({ \
spl_rw_lockdep_off_maybe(rwp); \
if (RW_WRITE_HELD(rwp)) { \
spl_rw_clear_owner(rwp); \
up_write(SEM(rwp)); \
} else { \
ASSERT(RW_READ_HELD(rwp)); \
up_read(SEM(rwp)); \
} \
spl_rw_lockdep_on_maybe(rwp); \
})
#define rw_downgrade(rwp) \
({ \
spl_rw_lockdep_off_maybe(rwp); \
spl_rw_clear_owner(rwp); \
downgrade_write(SEM(rwp)); \
spl_rw_lockdep_on_maybe(rwp); \
})
#define rw_tryupgrade(rwp) \
({ \
int _rc_ = 0; \
\
if (RW_WRITE_HELD(rwp)) { \
_rc_ = 1; \
} else { \
spl_rw_lockdep_off_maybe(rwp); \
if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \
spl_rw_set_owner(rwp); \
spl_rw_lockdep_on_maybe(rwp); \
} \
_rc_; \
})
/* END CSTYLED */
int spl_rw_init(void);
void spl_rw_fini(void);
int rwsem_tryupgrade(struct rw_semaphore *rwsem);
#endif /* _SPL_RWLOCK_H */

209
include/spl/sys/shrinker.h Normal file
View File

@ -0,0 +1,209 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SHRINKER_H
#define _SPL_SHRINKER_H
#include <linux/mm.h>
#include <linux/fs.h>
#if !defined(HAVE_SHRINK_CONTROL_STRUCT)
struct shrink_control {
gfp_t gfp_mask;
unsigned long nr_to_scan;
};
#endif /* HAVE_SHRINK_CONTROL_STRUCT */
/*
* Due to frequent changes in the shrinker API the following
* compatibility wrappers should be used. They are as follows:
*
* SPL_SHRINKER_DECLARE is used to declare the shrinker which is
* passed to spl_register_shrinker()/spl_unregister_shrinker(). Use
* shrinker_name to set the shrinker variable name, shrinker_callback
* to set the callback function, and seek_cost to define the cost of
* reclaiming an object.
*
* SPL_SHRINKER_DECLARE(shrinker_name, shrinker_callback, seek_cost);
*
* SPL_SHRINKER_CALLBACK_FWD_DECLARE is used when a forward declaration
* of the shrinker callback function is required. Only the callback
* function needs to be passed.
*
* SPL_SHRINKER_CALLBACK_FWD_DECLARE(shrinker_callback);
*
* SPL_SHRINKER_CALLBACK_WRAPPER is used to declare the callback function
* which is registered with the shrinker. This function will call your
* custom shrinker which must use the following prototype. Notice the
* leading __'s, these must be appended to the callback_function name.
*
* int __shrinker_callback(struct shrinker *, struct shrink_control *)
* SPL_SHRINKER_CALLBACK_WRAPPER(shrinker_callback);a
*
*
* Example:
*
* SPL_SHRINKER_CALLBACK_FWD_DECLARE(my_shrinker_fn);
* SPL_SHRINKER_DECLARE(my_shrinker, my_shrinker_fn, 1);
*
* static int
* __my_shrinker_fn(struct shrinker *shrink, struct shrink_control *sc)
* {
* if (sc->nr_to_scan) {
* ...scan objects in the cache and reclaim them...
* }
*
* ...calculate number of objects in the cache...
*
* return (number of objects in the cache);
* }
* SPL_SHRINKER_CALLBACK_WRAPPER(my_shrinker_fn);
*/
#define spl_register_shrinker(x) register_shrinker(x)
#define spl_unregister_shrinker(x) unregister_shrinker(x)
/*
* Linux 2.6.23 - 2.6.34 Shrinker API Compatibility.
*/
#if defined(HAVE_2ARGS_OLD_SHRINKER_CALLBACK)
#define SPL_SHRINKER_DECLARE(s, x, y) \
static struct shrinker s = { \
.shrink = x, \
.seeks = y \
}
#define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn) \
static int fn(int nr_to_scan, unsigned int gfp_mask)
#define SPL_SHRINKER_CALLBACK_WRAPPER(fn) \
static int \
fn(int nr_to_scan, unsigned int gfp_mask) \
{ \
struct shrink_control sc; \
\
sc.nr_to_scan = nr_to_scan; \
sc.gfp_mask = gfp_mask; \
\
return (__ ## fn(NULL, &sc)); \
}
/*
* Linux 2.6.35 to 2.6.39 Shrinker API Compatibility.
*/
#elif defined(HAVE_3ARGS_SHRINKER_CALLBACK)
#define SPL_SHRINKER_DECLARE(s, x, y) \
static struct shrinker s = { \
.shrink = x, \
.seeks = y \
}
#define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn) \
static int fn(struct shrinker *, int, unsigned int)
#define SPL_SHRINKER_CALLBACK_WRAPPER(fn) \
static int \
fn(struct shrinker *shrink, int nr_to_scan, unsigned int gfp_mask) \
{ \
struct shrink_control sc; \
\
sc.nr_to_scan = nr_to_scan; \
sc.gfp_mask = gfp_mask; \
\
return (__ ## fn(shrink, &sc)); \
}
/*
* Linux 3.0 to 3.11 Shrinker API Compatibility.
*/
#elif defined(HAVE_2ARGS_NEW_SHRINKER_CALLBACK)
#define SPL_SHRINKER_DECLARE(s, x, y) \
static struct shrinker s = { \
.shrink = x, \
.seeks = y \
}
#define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn) \
static int fn(struct shrinker *, struct shrink_control *)
#define SPL_SHRINKER_CALLBACK_WRAPPER(fn) \
static int \
fn(struct shrinker *shrink, struct shrink_control *sc) \
{ \
return (__ ## fn(shrink, sc)); \
}
/*
* Linux 3.12 and later Shrinker API Compatibility.
*/
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
#define SPL_SHRINKER_DECLARE(s, x, y) \
static struct shrinker s = { \
.count_objects = x ## _count_objects, \
.scan_objects = x ## _scan_objects, \
.seeks = y \
}
#define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn) \
static unsigned long fn ## _count_objects(struct shrinker *, \
struct shrink_control *); \
static unsigned long fn ## _scan_objects(struct shrinker *, \
struct shrink_control *)
#define SPL_SHRINKER_CALLBACK_WRAPPER(fn) \
static unsigned long \
fn ## _count_objects(struct shrinker *shrink, struct shrink_control *sc)\
{ \
int __ret__; \
\
sc->nr_to_scan = 0; \
__ret__ = __ ## fn(NULL, sc); \
\
/* Errors may not be returned and must be converted to zeros */ \
return ((__ret__ < 0) ? 0 : __ret__); \
} \
\
static unsigned long \
fn ## _scan_objects(struct shrinker *shrink, struct shrink_control *sc) \
{ \
int __ret__; \
\
__ret__ = __ ## fn(NULL, sc); \
return ((__ret__ < 0) ? SHRINK_STOP : __ret__); \
}
#else
/*
* Linux 2.x to 2.6.22, or a newer shrinker API has been introduced.
*/
#error "Unknown shrinker callback"
#endif
#if defined(HAVE_SPLIT_SHRINKER_CALLBACK)
typedef unsigned long spl_shrinker_t;
#else
typedef int spl_shrinker_t;
#define SHRINK_STOP (-1)
#endif
#endif /* SPL_SHRINKER_H */

61
include/spl/sys/sid.h Normal file
View File

@ -0,0 +1,61 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SID_H
#define _SPL_SID_H
typedef struct ksiddomain {
char *kd_name;
} ksiddomain_t;
typedef enum ksid_index {
KSID_USER,
KSID_GROUP,
KSID_OWNER,
KSID_COUNT
} ksid_index_t;
typedef int ksid_t;
static inline ksiddomain_t *
ksid_lookupdomain(const char *dom)
{
ksiddomain_t *kd;
int len = strlen(dom);
kd = kmem_zalloc(sizeof (ksiddomain_t), KM_SLEEP);
kd->kd_name = kmem_zalloc(len + 1, KM_SLEEP);
memcpy(kd->kd_name, dom, len);
return (kd);
}
static inline void
ksiddomain_rele(ksiddomain_t *ksid)
{
kmem_free(ksid->kd_name, strlen(ksid->kd_name) + 1);
kmem_free(ksid, sizeof (ksiddomain_t));
}
#endif /* _SPL_SID_H */

55
include/spl/sys/signal.h Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SIGNAL_H
#define _SPL_SIGNAL_H
#include <linux/sched.h>
#ifdef HAVE_SCHED_SIGNAL_HEADER
#include <linux/sched/signal.h>
#endif
#define FORREAL 0 /* Usual side-effects */
#define JUSTLOOKING 1 /* Don't stop the process */
/*
* The "why" argument indicates the allowable side-effects of the call:
*
* FORREAL: Extract the next pending signal from p_sig into p_cursig;
* stop the process if a stop has been requested or if a traced signal
* is pending.
*
* JUSTLOOKING: Don't stop the process, just indicate whether or not
* a signal might be pending (FORREAL is needed to tell for sure).
*/
static __inline__ int
issig(int why)
{
ASSERT(why == FORREAL || why == JUSTLOOKING);
return (signal_pending(current));
}
#endif /* SPL_SIGNAL_H */

30
include/spl/sys/stat.h Normal file
View File

@ -0,0 +1,30 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_STAT_H
#define _SPL_STAT_H
#include <linux/stat.h>
#endif /* SPL_STAT_H */

31
include/spl/sys/strings.h Normal file
View File

@ -0,0 +1,31 @@
/*
* Copyright (C) 2018 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SYS_STRINGS_H
#define _SPL_SYS_STRINGS_H
#include <linux/string.h>
#define bzero(ptr, size) memset(ptr, 0, size)
#define bcopy(src, dest, size) memmove(dest, src, size)
#define bcmp(src, dest, size) memcmp((src), (dest), (size_t)(size))
#endif /* _SPL_SYS_STRINGS_H */

58
include/spl/sys/sunddi.h Normal file
View File

@ -0,0 +1,58 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SUNDDI_H
#define _SPL_SUNDDI_H
#include <sys/cred.h>
#include <sys/uio.h>
#include <sys/mutex.h>
#include <sys/u8_textprep.h>
#include <sys/vnode.h>
typedef int ddi_devid_t;
#define DDI_DEV_T_NONE ((dev_t)-1)
#define DDI_DEV_T_ANY ((dev_t)-2)
#define DI_MAJOR_T_UNKNOWN ((major_t)0)
#define DDI_PROP_DONTPASS 0x0001
#define DDI_PROP_CANSLEEP 0x0002
#define DDI_SUCCESS 0
#define DDI_FAILURE -1
#define ddi_prop_lookup_string(x1, x2, x3, x4, x5) (*x5 = NULL)
#define ddi_prop_free(x) (void)0
#define ddi_root_node() (void)0
extern int ddi_strtoul(const char *, char **, int, unsigned long *);
extern int ddi_strtol(const char *, char **, int, long *);
extern int ddi_strtoull(const char *, char **, int, unsigned long long *);
extern int ddi_strtoll(const char *, char **, int, long long *);
extern int ddi_copyin(const void *from, void *to, size_t len, int flags);
extern int ddi_copyout(const void *from, void *to, size_t len, int flags);
#endif /* SPL_SUNDDI_H */

228
include/spl/sys/sysmacros.h Normal file
View File

@ -0,0 +1,228 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SYSMACROS_H
#define _SPL_SYSMACROS_H
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <sys/debug.h>
#include <sys/zone.h>
#include <sys/signal.h>
#include <asm/page.h>
#ifdef HAVE_SCHED_RT_HEADER
#include <linux/sched/rt.h>
#endif
#ifndef _KERNEL
#define _KERNEL __KERNEL__
#endif
#define FALSE 0
#define TRUE 1
#define INT8_MAX (127)
#define INT8_MIN (-128)
#define UINT8_MAX (255)
#define UINT8_MIN (0)
#define INT16_MAX (32767)
#define INT16_MIN (-32768)
#define UINT16_MAX (65535)
#define UINT16_MIN (0)
#define INT32_MAX INT_MAX
#define INT32_MIN INT_MIN
#define UINT32_MAX UINT_MAX
#define UINT32_MIN UINT_MIN
#define INT64_MAX LLONG_MAX
#define INT64_MIN LLONG_MIN
#define UINT64_MAX ULLONG_MAX
#define UINT64_MIN ULLONG_MIN
#define NBBY 8
#define MAXMSGLEN 256
#define MAXNAMELEN 256
#define MAXPATHLEN 4096
#define MAXOFFSET_T LLONG_MAX
#define MAXBSIZE 8192
#define DEV_BSIZE 512
#define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */
#define proc_pageout NULL
#define curproc current
#define max_ncpus num_possible_cpus()
#define boot_ncpus num_online_cpus()
#define CPU_SEQID smp_processor_id()
#define _NOTE(x)
#define is_system_labeled() 0
#ifndef RLIM64_INFINITY
#define RLIM64_INFINITY (~0ULL)
#endif
/*
* 0..MAX_PRIO-1: Process priority
* 0..MAX_RT_PRIO-1: RT priority tasks
* MAX_RT_PRIO..MAX_PRIO-1: SCHED_NORMAL tasks
*
* Treat shim tasks as SCHED_NORMAL tasks
*/
#define minclsyspri (MAX_PRIO-1)
#define maxclsyspri (MAX_RT_PRIO)
#define defclsyspri (DEFAULT_PRIO)
#ifndef NICE_TO_PRIO
#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
#endif
#ifndef PRIO_TO_NICE
#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
#endif
/*
* Missing macros
*/
#ifndef PAGESIZE
#define PAGESIZE PAGE_SIZE
#endif
#ifndef PAGESHIFT
#define PAGESHIFT PAGE_SHIFT
#endif
/* Dtrace probes do not exist in the linux kernel */
#ifdef DTRACE_PROBE
#undef DTRACE_PROBE
#endif /* DTRACE_PROBE */
#define DTRACE_PROBE(a) ((void)0)
#ifdef DTRACE_PROBE1
#undef DTRACE_PROBE1
#endif /* DTRACE_PROBE1 */
#define DTRACE_PROBE1(a, b, c) ((void)0)
#ifdef DTRACE_PROBE2
#undef DTRACE_PROBE2
#endif /* DTRACE_PROBE2 */
#define DTRACE_PROBE2(a, b, c, d, e) ((void)0)
#ifdef DTRACE_PROBE3
#undef DTRACE_PROBE3
#endif /* DTRACE_PROBE3 */
#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0)
#ifdef DTRACE_PROBE4
#undef DTRACE_PROBE4
#endif /* DTRACE_PROBE4 */
#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0)
/* Missing globals */
extern char spl_version[32];
extern unsigned long spl_hostid;
/* Missing misc functions */
extern uint32_t zone_get_hostid(void *zone);
extern void spl_setup(void);
extern void spl_cleanup(void);
#define highbit(x) __fls(x)
#define lowbit(x) __ffs(x)
#define highbit64(x) fls64(x)
#define makedevice(maj, min) makedev(maj, min)
/* common macros */
#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef MAX
#define MAX(a, b) ((a) < (b) ? (b) : (a))
#endif
#ifndef ABS
#define ABS(a) ((a) < 0 ? -(a) : (a))
#endif
#ifndef DIV_ROUND_UP
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#endif
#ifndef roundup
#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
#endif
#ifndef howmany
#define howmany(x, y) (((x) + ((y) - 1)) / (y))
#endif
/*
* Compatibility macros/typedefs needed for Solaris -> Linux port
*/
#define P2ALIGN(x, align) ((x) & -(align))
#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
#define P2ROUNDUP(x, align) ((((x) - 1) | ((align) - 1)) + 1)
#define P2PHASE(x, align) ((x) & ((align) - 1))
#define P2NPHASE(x, align) (-(x) & ((align) - 1))
#define ISP2(x) (((x) & ((x) - 1)) == 0)
#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
#define P2BOUNDARY(off, len, align) \
(((off) ^ ((off) + (len) - 1)) > (align) - 1)
/*
* Typed version of the P2* macros. These macros should be used to ensure
* that the result is correctly calculated based on the data type of (x),
* which is passed in as the last argument, regardless of the data
* type of the alignment. For example, if (x) is of type uint64_t,
* and we want to round it up to a page boundary using "PAGESIZE" as
* the alignment, we can do either
*
* P2ROUNDUP(x, (uint64_t)PAGESIZE)
* or
* P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
*/
#define P2ALIGN_TYPED(x, align, type) \
((type)(x) & -(type)(align))
#define P2PHASE_TYPED(x, align, type) \
((type)(x) & ((type)(align) - 1))
#define P2NPHASE_TYPED(x, align, type) \
(-(type)(x) & ((type)(align) - 1))
#define P2ROUNDUP_TYPED(x, align, type) \
((((type)(x) - 1) | ((type)(align) - 1)) + 1)
#define P2END_TYPED(x, align, type) \
(-(~(type)(x) & -(type)(align)))
#define P2PHASEUP_TYPED(x, align, phase, type) \
((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
#define P2CROSS_TYPED(x, y, align, type) \
(((type)(x) ^ (type)(y)) > (type)(align) - 1)
#define P2SAMEHIGHBIT_TYPED(x, y, type) \
(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
/* avoid any possibility of clashing with <stddef.h> version */
#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
#endif
#endif /* _SPL_SYSMACROS_H */

View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_SYSTEMINFO_H
#define _SPL_SYSTEMINFO_H
#define HW_HOSTID_LEN 11 /* minimum buffer size needed */
/* to hold a decimal or hex */
/* hostid string */
/* Supplemental definitions for Linux. */
#define HW_HOSTID_PATH "/etc/hostid" /* binary configuration file */
#define HW_HOSTID_MASK 0xFFFFFFFF /* significant hostid bits */
#endif /* SPL_SYSTEMINFO_H */

163
include/spl/sys/taskq.h Normal file
View File

@ -0,0 +1,163 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_TASKQ_H
#define _SPL_TASKQ_H
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/kthread.h>
#include <sys/types.h>
#include <sys/thread.h>
#include <sys/rwlock.h>
#include <sys/wait.h>
#define TASKQ_NAMELEN 31
#define TASKQ_PREPOPULATE 0x00000001
#define TASKQ_CPR_SAFE 0x00000002
#define TASKQ_DYNAMIC 0x00000004
#define TASKQ_THREADS_CPU_PCT 0x00000008
#define TASKQ_DC_BATCH 0x00000010
#define TASKQ_ACTIVE 0x80000000
/*
* Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
* KM_SLEEP/KM_NOSLEEP. TQ_NOQUEUE/TQ_NOALLOC are set particularly
* large so as not to conflict with already used GFP_* defines.
*/
#define TQ_SLEEP 0x00000000
#define TQ_NOSLEEP 0x00000001
#define TQ_PUSHPAGE 0x00000002
#define TQ_NOQUEUE 0x01000000
#define TQ_NOALLOC 0x02000000
#define TQ_NEW 0x04000000
#define TQ_FRONT 0x08000000
/*
* Reserved taskqid values.
*/
#define TASKQID_INVALID ((taskqid_t)0)
#define TASKQID_INITIAL ((taskqid_t)1)
/*
* spin_lock(lock) and spin_lock_nested(lock,0) are equivalent,
* so TQ_LOCK_DYNAMIC must not evaluate to 0
*/
typedef enum tq_lock_role {
TQ_LOCK_GENERAL = 0,
TQ_LOCK_DYNAMIC = 1,
} tq_lock_role_t;
typedef unsigned long taskqid_t;
typedef void (task_func_t)(void *);
typedef struct taskq {
spinlock_t tq_lock; /* protects taskq_t */
char *tq_name; /* taskq name */
int tq_instance; /* instance of tq_name */
struct list_head tq_thread_list; /* list of all threads */
struct list_head tq_active_list; /* list of active threads */
int tq_nactive; /* # of active threads */
int tq_nthreads; /* # of existing threads */
int tq_nspawn; /* # of threads being spawned */
int tq_maxthreads; /* # of threads maximum */
int tq_pri; /* priority */
int tq_minalloc; /* min taskq_ent_t pool size */
int tq_maxalloc; /* max taskq_ent_t pool size */
int tq_nalloc; /* cur taskq_ent_t pool size */
uint_t tq_flags; /* flags */
taskqid_t tq_next_id; /* next pend/work id */
taskqid_t tq_lowest_id; /* lowest pend/work id */
struct list_head tq_free_list; /* free taskq_ent_t's */
struct list_head tq_pend_list; /* pending taskq_ent_t's */
struct list_head tq_prio_list; /* priority taskq_ent_t's */
struct list_head tq_delay_list; /* delayed taskq_ent_t's */
struct list_head tq_taskqs; /* all taskq_t's */
spl_wait_queue_head_t tq_work_waitq; /* new work waitq */
spl_wait_queue_head_t tq_wait_waitq; /* wait waitq */
tq_lock_role_t tq_lock_class; /* class when taking tq_lock */
} taskq_t;
typedef struct taskq_ent {
spinlock_t tqent_lock;
spl_wait_queue_head_t tqent_waitq;
struct timer_list tqent_timer;
struct list_head tqent_list;
taskqid_t tqent_id;
task_func_t *tqent_func;
void *tqent_arg;
taskq_t *tqent_taskq;
uintptr_t tqent_flags;
unsigned long tqent_birth;
} taskq_ent_t;
#define TQENT_FLAG_PREALLOC 0x1
#define TQENT_FLAG_CANCEL 0x2
typedef struct taskq_thread {
struct list_head tqt_thread_list;
struct list_head tqt_active_list;
struct task_struct *tqt_thread;
taskq_t *tqt_tq;
taskqid_t tqt_id;
taskq_ent_t *tqt_task;
uintptr_t tqt_flags;
} taskq_thread_t;
/* Global system-wide dynamic task queue available for all consumers */
extern taskq_t *system_taskq;
/* Global dynamic task queue for long delay */
extern taskq_t *system_delay_taskq;
/* List of all taskqs */
extern struct list_head tq_list;
extern struct rw_semaphore tq_list_sem;
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
uint_t, clock_t);
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
taskq_ent_t *);
extern int taskq_empty_ent(taskq_ent_t *);
extern void taskq_init_ent(taskq_ent_t *);
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait_id(taskq_t *, taskqid_t);
extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
extern void taskq_wait(taskq_t *);
extern int taskq_cancel_id(taskq_t *, taskqid_t);
extern int taskq_member(taskq_t *, kthread_t *);
#define taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
taskq_create(name, nthreads, pri, min, max, flags)
#define taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
taskq_create(name, nthreads, maxclsyspri, min, max, flags)
int spl_taskq_init(void);
void spl_taskq_fini(void);
#endif /* _SPL_TASKQ_H */

69
include/spl/sys/thread.h Normal file
View File

@ -0,0 +1,69 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_THREAD_H
#define _SPL_THREAD_H
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/tsd.h>
/*
* Thread interfaces
*/
#define TP_MAGIC 0x53535353
#define TS_SLEEP TASK_INTERRUPTIBLE
#define TS_RUN TASK_RUNNING
#define TS_ZOMB EXIT_ZOMBIE
#define TS_STOPPED TASK_STOPPED
typedef void (*thread_func_t)(void *);
/* BEGIN CSTYLED */
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
__thread_create(stk, stksize, (thread_func_t)func, \
#func, arg, len, pp, state, pri)
/* END CSTYLED */
#define thread_exit() __thread_exit()
#define thread_join(t) VERIFY(0)
#define curthread current
#define getcomm() current->comm
#define getpid() current->pid
extern kthread_t *__thread_create(caddr_t stk, size_t stksize,
thread_func_t func, const char *name, void *args, size_t len, proc_t *pp,
int state, pri_t pri);
extern void __thread_exit(void);
extern struct task_struct *spl_kthread_create(int (*func)(void *),
void *data, const char namefmt[], ...);
extern proc_t p0;
#endif /* _SPL_THREAD_H */

82
include/spl/sys/time.h Normal file
View File

@ -0,0 +1,82 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_TIME_H
#define _SPL_TIME_H
#include <linux/module.h>
#include <linux/time.h>
#include <sys/types.h>
#include <sys/timer.h>
#if defined(CONFIG_64BIT)
#define TIME_MAX INT64_MAX
#define TIME_MIN INT64_MIN
#else
#define TIME_MAX INT32_MAX
#define TIME_MIN INT32_MIN
#endif
#define SEC 1
#define MILLISEC 1000
#define MICROSEC 1000000
#define NANOSEC 1000000000
#define MSEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MILLISEC))
#define NSEC2MSEC(n) ((n) / (NANOSEC / MILLISEC))
#define USEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MICROSEC))
#define NSEC2USEC(n) ((n) / (NANOSEC / MICROSEC))
#define NSEC2SEC(n) ((n) / (NANOSEC / SEC))
#define SEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / SEC))
static const int hz = HZ;
#define TIMESPEC_OVERFLOW(ts) \
((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX)
static inline void
gethrestime(timestruc_t *now)
{
*now = current_kernel_time();
}
static inline time_t
gethrestime_sec(void)
{
struct timespec ts;
ts = current_kernel_time();
return (ts.tv_sec);
}
static inline hrtime_t
gethrtime(void)
{
struct timespec now;
getrawmonotonic(&now);
return (((hrtime_t)now.tv_sec * NSEC_PER_SEC) + now.tv_nsec);
}
#endif /* _SPL_TIME_H */

75
include/spl/sys/timer.h Normal file
View File

@ -0,0 +1,75 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_TIMER_H
#define _SPL_TIMER_H
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/time.h>
#include <linux/timer.h>
#define lbolt ((clock_t)jiffies)
#define lbolt64 ((int64_t)get_jiffies_64())
#define ddi_get_lbolt() ((clock_t)jiffies)
#define ddi_get_lbolt64() ((int64_t)get_jiffies_64())
#define ddi_time_before(a, b) (typecheck(clock_t, a) && \
typecheck(clock_t, b) && \
((a) - (b) < 0))
#define ddi_time_after(a, b) ddi_time_before(b, a)
#define ddi_time_before_eq(a, b) (!ddi_time_after(a, b))
#define ddi_time_after_eq(a, b) ddi_time_before_eq(b, a)
#define ddi_time_before64(a, b) (typecheck(int64_t, a) && \
typecheck(int64_t, b) && \
((a) - (b) < 0))
#define ddi_time_after64(a, b) ddi_time_before64(b, a)
#define ddi_time_before_eq64(a, b) (!ddi_time_after64(a, b))
#define ddi_time_after_eq64(a, b) ddi_time_before_eq64(b, a)
#define delay(ticks) schedule_timeout_uninterruptible(ticks)
/* usleep_range() introduced in 2.6.36 */
#ifndef HAVE_USLEEP_RANGE
static inline void
usleep_range(unsigned long min, unsigned long max)
{
unsigned int min_ms = min / USEC_PER_MSEC;
if (min >= MAX_UDELAY_MS)
msleep(min_ms);
else
udelay(min);
}
#endif /* HAVE_USLEEP_RANGE */
#define SEC_TO_TICK(sec) ((sec) * HZ)
#define MSEC_TO_TICK(ms) msecs_to_jiffies(ms)
#define USEC_TO_TICK(us) usecs_to_jiffies(us)
#define NSEC_TO_TICK(ns) usecs_to_jiffies(ns / NSEC_PER_USEC)
#endif /* _SPL_TIMER_H */

46
include/spl/sys/tsd.h Normal file
View File

@ -0,0 +1,46 @@
/*
* Copyright (C) 2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_TSD_H
#define _SPL_TSD_H
#include <sys/types.h>
#define TSD_HASH_TABLE_BITS_DEFAULT 9
#define TSD_KEYS_MAX 32768
#define DTOR_PID (PID_MAX_LIMIT+1)
#define PID_KEY (TSD_KEYS_MAX+1)
typedef void (*dtor_func_t)(void *);
extern int tsd_set(uint_t, void *);
extern void *tsd_get(uint_t);
extern void *tsd_get_by_thread(uint_t, kthread_t *);
extern void tsd_create(uint_t *, dtor_func_t);
extern void tsd_destroy(uint_t *);
extern void tsd_exit(void);
int spl_tsd_init(void);
void spl_tsd_fini(void);
#endif /* _SPL_TSD_H */

70
include/spl/sys/types.h Normal file
View File

@ -0,0 +1,70 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_TYPES_H
#define _SPL_TYPES_H
#include <linux/types.h>
#ifndef ULLONG_MAX
#define ULLONG_MAX (~0ULL)
#endif
#ifndef LLONG_MAX
#define LLONG_MAX ((long long)(~0ULL>>1))
#endif
typedef enum {
B_FALSE = 0,
B_TRUE = 1
} boolean_t;
typedef unsigned char uchar_t;
typedef unsigned short ushort_t;
typedef unsigned int uint_t;
typedef unsigned long ulong_t;
typedef unsigned long long u_longlong_t;
typedef long long longlong_t;
typedef unsigned long intptr_t;
typedef unsigned long long rlim64_t;
typedef struct task_struct kthread_t;
typedef struct task_struct proc_t;
typedef struct timespec timestruc_t;
typedef struct timespec timespec_t;
typedef longlong_t hrtime_t;
typedef int id_t;
typedef short pri_t;
typedef short index_t;
typedef longlong_t offset_t;
typedef u_longlong_t u_offset_t;
typedef ulong_t pgcnt_t;
typedef int major_t;
typedef int minor_t;
#endif /* _SPL_TYPES_H */

35
include/spl/sys/types32.h Normal file
View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_TYPES32_H
#define _SPL_TYPES32_H
#include <sys/types.h>
typedef uint32_t caddr32_t;
typedef int32_t daddr32_t;
typedef int32_t time32_t;
typedef uint32_t size32_t;
#endif /* _SPL_TYPES32_H */

106
include/spl/sys/uio.h Normal file
View File

@ -0,0 +1,106 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_UIO_H
#define _SPL_UIO_H
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <asm/uaccess.h>
#include <sys/types.h>
typedef struct iovec iovec_t;
typedef enum uio_rw {
UIO_READ = 0,
UIO_WRITE = 1,
} uio_rw_t;
typedef enum uio_seg {
UIO_USERSPACE = 0,
UIO_SYSSPACE = 1,
UIO_USERISPACE = 2,
UIO_BVEC = 3,
} uio_seg_t;
typedef struct uio {
union {
const struct iovec *uio_iov;
const struct bio_vec *uio_bvec;
};
int uio_iovcnt;
offset_t uio_loffset;
uio_seg_t uio_segflg;
uint16_t uio_fmode;
uint16_t uio_extflg;
offset_t uio_limit;
ssize_t uio_resid;
size_t uio_skip;
} uio_t;
typedef struct aio_req {
uio_t *aio_uio;
void *aio_private;
} aio_req_t;
typedef enum xuio_type {
UIOTYPE_ASYNCIO,
UIOTYPE_ZEROCOPY,
} xuio_type_t;
#define UIOA_IOV_MAX 16
typedef struct uioa_page_s {
int uioa_pfncnt;
void **uioa_ppp;
caddr_t uioa_base;
size_t uioa_len;
} uioa_page_t;
typedef struct xuio {
uio_t xu_uio;
enum xuio_type xu_type;
union {
struct {
uint32_t xu_a_state;
ssize_t xu_a_mbytes;
uioa_page_t *xu_a_lcur;
void **xu_a_lppp;
void *xu_a_hwst[4];
uioa_page_t xu_a_locked[UIOA_IOV_MAX];
} xu_aio;
struct {
int xu_zc_rw;
void *xu_zc_priv;
} xu_zc;
} xu_ext;
} xuio_t;
#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv
#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw
#endif /* SPL_UIO_H */

42
include/spl/sys/user.h Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2015 Cluster Inc.
* Produced at ClusterHQ Inc (cf, DISCLAIMER).
* Written by Richard Yao <richard.yao@clusterhq.com>.
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_USER_H
#define _SPL_USER_H
/*
* We have uf_info_t for areleasef(). We implement areleasef() using a global
* linked list of all open file descriptors with the task structs referenced,
* so accessing the correct descriptor from areleasef() only requires knowing
* about the Linux task_struct. Since this is internal to our compatibility
* layer, we make it an opaque type.
*
* XXX: If the descriptor changes under us and we do not do a getf() between
* the change and using it, we would get an incorrect reference.
*/
struct uf_info;
typedef struct uf_info uf_info_t;
#define P_FINFO(x) ((uf_info_t *)x)
#endif /* SPL_USER_H */

51
include/spl/sys/vfs.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_ZFS_H
#define _SPL_ZFS_H
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/statfs.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/seq_file.h>
#define MAXFIDSZ 64
typedef struct spl_fid {
union {
long fid_pad;
struct {
ushort_t len; /* length of data in bytes */
char data[MAXFIDSZ]; /* data (variable len) */
} _fid;
} un;
} fid_t;
#define fid_len un._fid.len
#define fid_data un._fid.data
#endif /* SPL_ZFS_H */

109
include/spl/sys/vmem.h Normal file
View File

@ -0,0 +1,109 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_VMEM_H
#define _SPL_VMEM_H
#include <sys/kmem.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
typedef struct vmem { } vmem_t;
extern vmem_t *heap_arena;
extern vmem_t *zio_alloc_arena;
extern vmem_t *zio_arena;
extern size_t vmem_size(vmem_t *vmp, int typemask);
/*
* Memory allocation interfaces
*/
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
#ifndef VMALLOC_TOTAL
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
#endif
/*
* vmem_* is an interface to a low level arena-based memory allocator on
* Illumos that is used to allocate virtual address space. The kmem SLAB
* allocator allocates slabs from it. Then the generic allocation functions
* kmem_{alloc,zalloc,free}() are layered on top of SLAB allocators.
*
* On Linux, the primary means of doing allocations is via kmalloc(), which
* is similarly layered on top of something called the buddy allocator. The
* buddy allocator is not available to kernel modules, it uses physical
* memory addresses rather than virtual memory addresses and is prone to
* fragmentation.
*
* Linux sets aside a relatively small address space for in-kernel virtual
* memory from which allocations can be done using vmalloc(). It might seem
* like a good idea to use vmalloc() to implement something similar to
* Illumos' allocator. However, this has the following problems:
*
* 1. Page directory table allocations are hard coded to use GFP_KERNEL.
* Consequently, any KM_PUSHPAGE or KM_NOSLEEP allocations done using
* vmalloc() will not have proper semantics.
*
* 2. Address space exhaustion is a real issue on 32-bit platforms where
* only a few 100MB are available. The kernel will handle it by spinning
* when it runs out of address space.
*
* 3. All vmalloc() allocations and frees are protected by a single global
* lock which serializes all allocations.
*
* 4. Accessing /proc/meminfo and /proc/vmallocinfo will iterate the entire
* list. The former will sum the allocations while the latter will print
* them to user space in a way that user space can keep the lock held
* indefinitely. When the total number of mapped allocations is large
* (several 100,000) a large amount of time will be spent waiting on locks.
*
* 5. Linux has a wait_on_bit() locking primitive that assumes physical
* memory is used, it simply does not work on virtual memory. Certain
* Linux structures (e.g. the superblock) use them and might be embedded
* into a structure from Illumos. This makes using Linux virtual memory
* unsafe in certain situations.
*
* It follows that we cannot obtain identical semantics to those on Illumos.
* Consequently, we implement the kmem_{alloc,zalloc,free}() functions in
* such a way that they can be used as drop-in replacements for small vmem_*
* allocations (8MB in size or smaller) and map vmem_{alloc,zalloc,free}()
* to them.
*/
#define vmem_alloc(sz, fl) spl_vmem_alloc((sz), (fl), __func__, __LINE__)
#define vmem_zalloc(sz, fl) spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
#define vmem_free(ptr, sz) spl_vmem_free((ptr), (sz))
#define vmem_qcache_reap(ptr) ((void)0)
extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
extern void spl_vmem_free(const void *ptr, size_t sz);
int spl_vmem_init(void);
void spl_vmem_fini(void);
#endif /* _SPL_VMEM_H */

84
include/spl/sys/vmsystm.h Normal file
View File

@ -0,0 +1,84 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_VMSYSTM_H
#define _SPL_VMSYSTM_H
#include <linux/mmzone.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <sys/types.h>
#include <asm/uaccess.h>
#define membar_producer() smp_wmb()
#define physmem totalram_pages
#define freemem (nr_free_pages() + \
global_page_state(NR_INACTIVE_FILE) + \
global_page_state(NR_INACTIVE_ANON) + \
global_page_state(NR_SLAB_RECLAIMABLE))
#define xcopyin(from, to, size) copy_from_user(to, from, size)
#define xcopyout(from, to, size) copy_to_user(to, from, size)
static __inline__ int
copyin(const void *from, void *to, size_t len)
{
/* On error copyin routine returns -1 */
if (xcopyin(from, to, len))
return (-1);
return (0);
}
static __inline__ int
copyout(const void *from, void *to, size_t len)
{
/* On error copyout routine returns -1 */
if (xcopyout(from, to, len))
return (-1);
return (0);
}
static __inline__ int
copyinstr(const void *from, void *to, size_t len, size_t *done)
{
size_t rc;
if (len == 0)
return (-ENAMETOOLONG);
/* XXX: Should return ENAMETOOLONG if 'strlen(from) > len' */
memset(to, 0, len);
rc = copyin(from, to, len - 1);
if (done != NULL)
*done = rc;
return (0);
}
#endif /* SPL_VMSYSTM_H */

204
include/spl/sys/vnode.h Normal file
View File

@ -0,0 +1,204 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_VNODE_H
#define _SPL_VNODE_H
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <linux/buffer_head.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fs_struct.h>
#include <linux/mount.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/user.h>
/*
* Prior to linux-2.6.33 only O_DSYNC semantics were implemented and
* they used the O_SYNC flag. As of linux-2.6.33 the this behavior
* was properly split in to O_SYNC and O_DSYNC respectively.
*/
#ifndef O_DSYNC
#define O_DSYNC O_SYNC
#endif
#define FREAD 1
#define FWRITE 2
#define FCREAT O_CREAT
#define FTRUNC O_TRUNC
#define FOFFMAX O_LARGEFILE
#define FSYNC O_SYNC
#define FDSYNC O_DSYNC
#define FRSYNC O_SYNC
#define FEXCL O_EXCL
#define FDIRECT O_DIRECT
#define FAPPEND O_APPEND
#define FNODSYNC 0x10000 /* fsync pseudo flag */
#define FNOFOLLOW 0x20000 /* don't follow symlinks */
#define F_FREESP 11 /* Free file space */
/*
* The vnode AT_ flags are mapped to the Linux ATTR_* flags.
* This allows them to be used safely with an iattr structure.
* The AT_XVATTR flag has been added and mapped to the upper
* bit range to avoid conflicting with the standard Linux set.
*/
#undef AT_UID
#undef AT_GID
#define AT_MODE ATTR_MODE
#define AT_UID ATTR_UID
#define AT_GID ATTR_GID
#define AT_SIZE ATTR_SIZE
#define AT_ATIME ATTR_ATIME
#define AT_MTIME ATTR_MTIME
#define AT_CTIME ATTR_CTIME
#define ATTR_XVATTR (1 << 31)
#define AT_XVATTR ATTR_XVATTR
#define ATTR_IATTR_MASK (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_SIZE | \
ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_FILE)
#define CRCREAT 0x01
#define RMFILE 0x02
#define B_INVAL 0x01
#define B_TRUNC 0x02
#define LOOKUP_DIR 0x01
#define LOOKUP_XATTR 0x02
#define CREATE_XATTR_DIR 0x04
#define ATTR_NOACLCHECK 0x20
typedef enum vtype {
VNON = 0,
VREG = 1,
VDIR = 2,
VBLK = 3,
VCHR = 4,
VLNK = 5,
VFIFO = 6,
VDOOR = 7,
VPROC = 8,
VSOCK = 9,
VPORT = 10,
VBAD = 11
} vtype_t;
typedef struct vattr {
enum vtype va_type; /* vnode type */
uint_t va_mask; /* attribute bit-mask */
ushort_t va_mode; /* acc mode */
uid_t va_uid; /* owner uid */
gid_t va_gid; /* owner gid */
long va_fsid; /* fs id */
long va_nodeid; /* node # */
uint32_t va_nlink; /* # links */
uint64_t va_size; /* file size */
struct timespec va_atime; /* last acc */
struct timespec va_mtime; /* last mod */
struct timespec va_ctime; /* last chg */
dev_t va_rdev; /* dev */
uint64_t va_nblocks; /* space used */
uint32_t va_blksize; /* block size */
uint32_t va_seq; /* sequence */
struct dentry *va_dentry; /* dentry to wire */
} vattr_t;
typedef struct vnode {
struct file *v_file;
kmutex_t v_lock; /* protects vnode fields */
uint_t v_flag; /* vnode flags (see below) */
uint_t v_count; /* reference count */
void *v_data; /* private data for fs */
struct vfs *v_vfsp; /* ptr to containing VFS */
struct stdata *v_stream; /* associated stream */
enum vtype v_type; /* vnode type */
dev_t v_rdev; /* device (VCHR, VBLK) */
gfp_t v_gfp_mask; /* original mapping gfp mask */
} vnode_t;
typedef struct vn_file {
int f_fd; /* linux fd for lookup */
struct task_struct *f_task; /* linux task this fd belongs to */
struct file *f_file; /* linux file struct */
atomic_t f_ref; /* ref count */
kmutex_t f_lock; /* struct lock */
loff_t f_offset; /* offset */
vnode_t *f_vnode; /* vnode */
struct list_head f_list; /* list referenced file_t's */
} file_t;
extern vnode_t *vn_alloc(int flag);
void vn_free(vnode_t *vp);
extern vtype_t vn_mode_to_vtype(mode_t);
extern mode_t vn_vtype_to_mode(vtype_t);
extern int vn_open(const char *path, uio_seg_t seg, int flags, int mode,
vnode_t **vpp, int x1, void *x2);
extern int vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd);
extern int vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len,
offset_t off, uio_seg_t seg, int x1, rlim64_t x2,
void *x3, ssize_t *residp);
extern int vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4);
extern int vn_seek(vnode_t *vp, offset_t o, offset_t *op, void *ct);
extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4);
extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4);
extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
offset_t offset, void *x6, void *x7);
extern file_t *vn_getf(int fd);
extern void vn_releasef(int fd);
extern void vn_areleasef(int fd, uf_info_t *fip);
extern int vn_set_pwd(const char *filename);
int spl_vn_init(void);
void spl_vn_fini(void);
#define VOP_CLOSE vn_close
#define VOP_SEEK vn_seek
#define VOP_GETATTR vn_getattr
#define VOP_FSYNC vn_fsync
#define VOP_SPACE vn_space
#define VOP_PUTPAGE(vp, o, s, f, x1, x2) ((void)0)
#define vn_is_readonly(vp) 0
#define getf vn_getf
#define releasef vn_releasef
#define areleasef vn_areleasef
extern vnode_t *rootdir;
#endif /* SPL_VNODE_H */

55
include/spl/sys/wait.h Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright (C) 2007-2014 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_WAIT_H
#define _SPL_WAIT_H
#include <linux/sched.h>
#include <linux/wait.h>
#ifndef HAVE_WAIT_ON_BIT_ACTION
#define spl_wait_on_bit(word, bit, mode) wait_on_bit(word, bit, mode)
#else
static inline int
spl_bit_wait(void *word)
{
schedule();
return (0);
}
#define spl_wait_on_bit(word, bit, mode) \
wait_on_bit(word, bit, spl_bit_wait, mode)
#endif /* HAVE_WAIT_ON_BIT_ACTION */
#ifdef HAVE_WAIT_QUEUE_ENTRY_T
typedef wait_queue_head_t spl_wait_queue_head_t;
typedef wait_queue_entry_t spl_wait_queue_entry_t;
#else
typedef wait_queue_head_t spl_wait_queue_head_t;
typedef wait_queue_t spl_wait_queue_entry_t;
#endif
#endif /* SPL_WAIT_H */

78
include/spl/sys/zmod.h Normal file
View File

@ -0,0 +1,78 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
*
* z_compress_level/z_uncompress are nearly identical copies of the
* compress2/uncompress functions provided by the official zlib package
* available at http://zlib.net/. The only changes made we to slightly
* adapt the functions called to match the linux kernel implementation
* of zlib. The full zlib license follows:
*
* zlib.h -- interface of the 'zlib' general purpose compression library
* version 1.2.5, April 19th, 2010
*
* Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*
* Jean-loup Gailly
* Mark Adler
*/
#ifndef _SPL_ZMOD_H
#define _SPL_ZMOD_H
#include <sys/types.h>
#include <linux/zlib.h>
#ifdef HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
#define spl_zlib_deflate_workspacesize(wb, ml) \
zlib_deflate_workspacesize(wb, ml)
#else
#define spl_zlib_deflate_workspacesize(wb, ml) \
zlib_deflate_workspacesize()
#endif /* HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE */
extern int z_compress_level(void *dest, size_t *destLen, const void *source,
size_t sourceLen, int level);
extern int z_uncompress(void *dest, size_t *destLen, const void *source,
size_t sourceLen);
int spl_zlib_init(void);
void spl_zlib_fini(void);
#endif /* SPL_ZMOD_H */

36
include/spl/sys/zone.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPL_ZONE_H
#define _SPL_ZONE_H
#include <sys/byteorder.h>
#define GLOBAL_ZONEID 0
#define zone_dataset_visible(x, y) (1)
#define crgetzoneid(x) (GLOBAL_ZONEID)
#define INGLOBALZONE(z) (1)
#endif /* SPL_ZONE_H */

View File

@ -0,0 +1,357 @@
'\" te
.\"
.\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
.\"
.TH SPL-MODULE-PARAMETERS 5 "Oct 28, 2017"
.SH NAME
spl\-module\-parameters \- SPL module parameters
.SH DESCRIPTION
.sp
.LP
Description of the different parameters to the SPL module.
.SS "Module parameters"
.sp
.LP
.sp
.ne 2
.na
\fBspl_kmem_cache_expire\fR (uint)
.ad
.RS 12n
Cache expiration is part of default Illumos cache behavior. The idea is
that objects in magazines which have not been recently accessed should be
returned to the slabs periodically. This is known as cache aging and
when enabled objects will be typically returned after 15 seconds.
.sp
On the other hand Linux slabs are designed to never move objects back to
the slabs unless there is memory pressure. This is possible because under
Linux the cache will be notified when memory is low and objects can be
released.
.sp
By default only the Linux method is enabled. It has been shown to improve
responsiveness on low memory systems and not negatively impact the performance
of systems with more memory. This policy may be changed by setting the
\fBspl_kmem_cache_expire\fR bit mask as follows, both policies may be enabled
concurrently.
.sp
0x01 - Aging (Illumos), 0x02 - Low memory (Linux)
.sp
Default value: \fB0x02\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_kmem_threads\fR (uint)
.ad
.RS 12n
The number of threads created for the spl_kmem_cache task queue. This task
queue is responsible for allocating new slabs for use by the kmem caches.
For the majority of systems and workloads only a small number of threads are
required.
.sp
Default value: \fB4\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_reclaim\fR (uint)
.ad
.RS 12n
When this is set it prevents Linux from being able to rapidly reclaim all the
memory held by the kmem caches. This may be useful in circumstances where
it's preferable that Linux reclaim memory from some other subsystem first.
Setting this will increase the likelihood out of memory events on a memory
constrained system.
.sp
Default value: \fB0\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_obj_per_slab\fR (uint)
.ad
.RS 12n
The preferred number of objects per slab in the cache. In general, a larger
value will increase the caches memory footprint while decreasing the time
required to perform an allocation. Conversely, a smaller value will minimize
the footprint and improve cache reclaim time but individual allocations may
take longer.
.sp
Default value: \fB8\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_obj_per_slab_min\fR (uint)
.ad
.RS 12n
The minimum number of objects allowed per slab. Normally slabs will contain
\fBspl_kmem_cache_obj_per_slab\fR objects but for caches that contain very
large objects it's desirable to only have a few, or even just one, object per
slab.
.sp
Default value: \fB1\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_max_size\fR (uint)
.ad
.RS 12n
The maximum size of a kmem cache slab in MiB. This effectively limits
the maximum cache object size to \fBspl_kmem_cache_max_size\fR /
\fBspl_kmem_cache_obj_per_slab\fR. Caches may not be created with
object sized larger than this limit.
.sp
Default value: \fB32 (64-bit) or 4 (32-bit)\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_slab_limit\fR (uint)
.ad
.RS 12n
For small objects the Linux slab allocator should be used to make the most
efficient use of the memory. However, large objects are not supported by
the Linux slab and therefore the SPL implementation is preferred. This
value is used to determine the cutoff between a small and large object.
.sp
Objects of \fBspl_kmem_cache_slab_limit\fR or smaller will be allocated
using the Linux slab allocator, large objects use the SPL allocator. A
cutoff of 16K was determined to be optimal for architectures using 4K pages.
.sp
Default value: \fB16,384\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_kmem_limit\fR (uint)
.ad
.RS 12n
Depending on the size of a cache object it may be backed by kmalloc()'d
or vmalloc()'d memory. This is because the size of the required allocation
greatly impacts the best way to allocate the memory.
.sp
When objects are small and only a small number of memory pages need to be
allocated, ideally just one, then kmalloc() is very efficient. However,
when allocating multiple pages with kmalloc() it gets increasingly expensive
because the pages must be physically contiguous.
.sp
For this reason we shift to vmalloc() for slabs of large objects which
which removes the need for contiguous pages. We cannot use vmalloc() in
all cases because there is significant locking overhead involved. This
function takes a single global lock over the entire virtual address range
which serializes all allocations. Using slightly different allocation
functions for small and large objects allows us to handle a wide range of
object sizes.
.sp
The \fBspl_kmem_cache_kmem_limit\fR value is used to determine this cutoff
size. One quarter the PAGE_SIZE is used as the default value because
\fBspl_kmem_cache_obj_per_slab\fR defaults to 16. This means that at
most we will need to allocate four contiguous pages.
.sp
Default value: \fBPAGE_SIZE/4\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_alloc_warn\fR (uint)
.ad
.RS 12n
As a general rule kmem_alloc() allocations should be small, preferably
just a few pages since they must by physically contiguous. Therefore, a
rate limited warning will be printed to the console for any kmem_alloc()
which exceeds a reasonable threshold.
.sp
The default warning threshold is set to eight pages but capped at 32K to
accommodate systems using large pages. This value was selected to be small
enough to ensure the largest allocations are quickly noticed and fixed.
But large enough to avoid logging any warnings when a allocation size is
larger than optimal but not a serious concern. Since this value is tunable,
developers are encouraged to set it lower when testing so any new largish
allocations are quickly caught. These warnings may be disabled by setting
the threshold to zero.
.sp
Default value: \fB32,768\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_alloc_max\fR (uint)
.ad
.RS 12n
Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
Allocations which are marginally smaller than this limit may succeed but
should still be avoided due to the expense of locating a contiguous range
of free pages. Therefore, a maximum kmem size with reasonable safely
margin of 4x is set. Kmem_alloc() allocations larger than this maximum
will quickly fail. Vmem_alloc() allocations less than or equal to this
value will use kmalloc(), but shift to vmalloc() when exceeding this value.
.sp
Default value: \fBKMALLOC_MAX_SIZE/4\fR
.RE
.sp
.ne 2
.na
\fBspl_kmem_cache_magazine_size\fR (uint)
.ad
.RS 12n
Cache magazines are an optimization designed to minimize the cost of
allocating memory. They do this by keeping a per-cpu cache of recently
freed objects, which can then be reallocated without taking a lock. This
can improve performance on highly contended caches. However, because
objects in magazines will prevent otherwise empty slabs from being
immediately released this may not be ideal for low memory machines.
.sp
For this reason \fBspl_kmem_cache_magazine_size\fR can be used to set a
maximum magazine size. When this value is set to 0 the magazine size will
be automatically determined based on the object size. Otherwise magazines
will be limited to 2-256 objects per magazine (i.e per cpu). Magazines
may never be entirely disabled in this implementation.
.sp
Default value: \fB0\fR
.RE
.sp
.ne 2
.na
\fBspl_hostid\fR (ulong)
.ad
.RS 12n
The system hostid, when set this can be used to uniquely identify a system.
By default this value is set to zero which indicates the hostid is disabled.
It can be explicitly enabled by placing a unique non-zero value in
\fB/etc/hostid/\fR.
.sp
Default value: \fB0\fR
.RE
.sp
.ne 2
.na
\fBspl_hostid_path\fR (charp)
.ad
.RS 12n
The expected path to locate the system hostid when specified. This value
may be overridden for non-standard configurations.
.sp
Default value: \fB/etc/hostid\fR
.RE
.sp
.ne 2
.na
\fBspl_panic_halt\fR (uint)
.ad
.RS 12n
Cause a kernel panic on assertion failures. When not enabled, the thread is
halted to facilitate further debugging.
.sp
Set to a non-zero value to enable.
.sp
Default value: \fB0\fR
.RE
.sp
.ne 2
.na
\fBspl_taskq_kick\fR (uint)
.ad
.RS 12n
Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will
scan all the taskqs. If any of them have a pending task more than 5 seconds old,
it will kick it to spawn more threads. This can be used if you find a rare
deadlock occurs because one or more taskqs didn't spawn a thread when it should.
.sp
Default value: \fB0\fR
.RE
.sp
.ne 2
.na
\fBspl_taskq_thread_bind\fR (int)
.ad
.RS 12n
Bind taskq threads to specific CPUs. When enabled all taskq threads will
be distributed evenly over the available CPUs. By default, this behavior
is disabled to allow the Linux scheduler the maximum flexibility to determine
where a thread should run.
.sp
Default value: \fB0\fR
.RE
.sp
.ne 2
.na
\fBspl_taskq_thread_dynamic\fR (int)
.ad
.RS 12n
Allow dynamic taskqs. When enabled taskqs which set the TASKQ_DYNAMIC flag
will by default create only a single thread. New threads will be created on
demand up to a maximum allowed number to facilitate the completion of
outstanding tasks. Threads which are no longer needed will be promptly
destroyed. By default this behavior is enabled but it can be disabled to
aid performance analysis or troubleshooting.
.sp
Default value: \fB1\fR
.RE
.sp
.ne 2
.na
\fBspl_taskq_thread_priority\fR (int)
.ad
.RS 12n
Allow newly created taskq threads to set a non-default scheduler priority.
When enabled the priority specified when a taskq is created will be applied
to all threads created by that taskq. When disabled all threads will use
the default Linux kernel thread priority. By default, this behavior is
enabled.
.sp
Default value: \fB1\fR
.RE
.sp
.ne 2
.na
\fBspl_taskq_thread_sequential\fR (int)
.ad
.RS 12n
The number of items a taskq worker thread must handle without interruption
before requesting a new worker thread be spawned. This is used to control
how quickly taskqs ramp up the number of threads processing the queue.
Because Linux thread creation and destruction are relatively inexpensive a
small default value has been selected. This means that normally threads will
be created aggressively which is desirable. Increasing this value will
result in a slower thread creation rate which may be preferable for some
configurations.
.sp
Default value: \fB4\fR
.RE
.sp
.ne 2
.na
\fBspl_max_show_tasks\fR (uint)
.ad
.RS 12n
The maximum number of tasks per pending list in each taskq shown in
/proc/spl/{taskq,taskq-all}. Write 0 to turn off the limit. The proc file will
walk the lists with lock held, reading it could cause a lock up if the list
grow too large without limiting the output. "(truncated)" will be shown if the
list is larger than the limit.
.sp
Default value: \fB512\fR
.RE

View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View File

@ -0,0 +1 @@
COMPATIBILITY LAYER FOR OPENZFS ON LINUX

36
module/spl/spl-atomic.c Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Atomic Implementation.
*/
#include <sys/atomic.h>
#ifdef ATOMIC_SPINLOCK
/* Global atomic lock declarations */
DEFINE_SPINLOCK(atomic32_lock);
DEFINE_SPINLOCK(atomic64_lock);
EXPORT_SYMBOL(atomic32_lock);
EXPORT_SYMBOL(atomic64_lock);
#endif /* ATOMIC_SPINLOCK */

410
module/spl/spl-condvar.c Normal file
View File

@ -0,0 +1,410 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Credential Implementation.
*/
#include <sys/condvar.h>
#include <sys/time.h>
#include <linux/hrtimer.h>
void
__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
{
ASSERT(cvp);
ASSERT(name == NULL);
ASSERT(type == CV_DEFAULT);
ASSERT(arg == NULL);
cvp->cv_magic = CV_MAGIC;
init_waitqueue_head(&cvp->cv_event);
init_waitqueue_head(&cvp->cv_destroy);
atomic_set(&cvp->cv_waiters, 0);
atomic_set(&cvp->cv_refs, 1);
cvp->cv_mutex = NULL;
}
EXPORT_SYMBOL(__cv_init);
static int
cv_destroy_wakeup(kcondvar_t *cvp)
{
if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
ASSERT(cvp->cv_mutex == NULL);
ASSERT(!waitqueue_active(&cvp->cv_event));
return (1);
}
return (0);
}
void
__cv_destroy(kcondvar_t *cvp)
{
ASSERT(cvp);
ASSERT(cvp->cv_magic == CV_MAGIC);
cvp->cv_magic = CV_DESTROY;
atomic_dec(&cvp->cv_refs);
/* Block until all waiters are woken and references dropped. */
while (cv_destroy_wakeup(cvp) == 0)
wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
ASSERT3P(cvp->cv_mutex, ==, NULL);
ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
}
EXPORT_SYMBOL(__cv_destroy);
static void
cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
{
DEFINE_WAIT(wait);
kmutex_t *m;
ASSERT(cvp);
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
ASSERT(mutex_owned(mp));
atomic_inc(&cvp->cv_refs);
m = ACCESS_ONCE(cvp->cv_mutex);
if (!m)
m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
ASSERT(m == NULL || m == mp);
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
/*
* Mutex should be dropped after prepare_to_wait() this
* ensures we're linked in to the waiters list and avoids the
* race where 'cvp->cv_waiters > 0' but the list is empty.
*/
mutex_exit(mp);
if (io)
io_schedule();
else
schedule();
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
/*
* This is set without any lock, so it's racy. But this is
* just for debug anyway, so make it best-effort
*/
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
/*
* Hold mutex after we release the cvp, otherwise we could dead lock
* with a thread holding the mutex and call cv_destroy.
*/
mutex_enter(mp);
}
void
__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
{
cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
}
EXPORT_SYMBOL(__cv_wait);
void
__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
{
cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
}
EXPORT_SYMBOL(__cv_wait_io);
void
__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
{
cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
}
EXPORT_SYMBOL(__cv_wait_sig);
#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
#define spl_io_schedule_timeout(t) io_schedule_timeout(t)
#else
static void
__cv_wakeup(unsigned long data)
{
wake_up_process((struct task_struct *)data);
}
static long
spl_io_schedule_timeout(long time_left)
{
long expire_time = jiffies + time_left;
struct timer_list timer;
init_timer(&timer);
setup_timer(&timer, __cv_wakeup, (unsigned long)current);
timer.expires = expire_time;
add_timer(&timer);
io_schedule();
del_timer_sync(&timer);
time_left = expire_time - jiffies;
return (time_left < 0 ? 0 : time_left);
}
#endif
/*
* 'expire_time' argument is an absolute wall clock time in jiffies.
* Return value is time left (expire_time - now) or -1 if timeout occurred.
*/
static clock_t
__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
int state, int io)
{
DEFINE_WAIT(wait);
kmutex_t *m;
clock_t time_left;
ASSERT(cvp);
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
ASSERT(mutex_owned(mp));
/* XXX - Does not handle jiffie wrap properly */
time_left = expire_time - jiffies;
if (time_left <= 0)
return (-1);
atomic_inc(&cvp->cv_refs);
m = ACCESS_ONCE(cvp->cv_mutex);
if (!m)
m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
ASSERT(m == NULL || m == mp);
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
/*
* Mutex should be dropped after prepare_to_wait() this
* ensures we're linked in to the waiters list and avoids the
* race where 'cvp->cv_waiters > 0' but the list is empty.
*/
mutex_exit(mp);
if (io)
time_left = spl_io_schedule_timeout(time_left);
else
time_left = schedule_timeout(time_left);
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
/*
* This is set without any lock, so it's racy. But this is
* just for debug anyway, so make it best-effort
*/
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
/*
* Hold mutex after we release the cvp, otherwise we could dead lock
* with a thread holding the mutex and call cv_destroy.
*/
mutex_enter(mp);
return (time_left > 0 ? time_left : -1);
}
clock_t
__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
{
return (__cv_timedwait_common(cvp, mp, exp_time,
TASK_UNINTERRUPTIBLE, 0));
}
EXPORT_SYMBOL(__cv_timedwait);
clock_t
__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
{
return (__cv_timedwait_common(cvp, mp, exp_time,
TASK_UNINTERRUPTIBLE, 1));
}
EXPORT_SYMBOL(__cv_timedwait_io);
clock_t
__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
{
return (__cv_timedwait_common(cvp, mp, exp_time,
TASK_INTERRUPTIBLE, 0));
}
EXPORT_SYMBOL(__cv_timedwait_sig);
/*
* 'expire_time' argument is an absolute clock time in nanoseconds.
* Return value is time left (expire_time - now) or -1 if timeout occurred.
*/
static clock_t
__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
int state)
{
DEFINE_WAIT(wait);
kmutex_t *m;
hrtime_t time_left;
ktime_t ktime_left;
ASSERT(cvp);
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
ASSERT(mutex_owned(mp));
time_left = expire_time - gethrtime();
if (time_left <= 0)
return (-1);
atomic_inc(&cvp->cv_refs);
m = ACCESS_ONCE(cvp->cv_mutex);
if (!m)
m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
ASSERT(m == NULL || m == mp);
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
/*
* Mutex should be dropped after prepare_to_wait() this
* ensures we're linked in to the waiters list and avoids the
* race where 'cvp->cv_waiters > 0' but the list is empty.
*/
mutex_exit(mp);
/*
* Allow a 100 us range to give kernel an opportunity to coalesce
* interrupts
*/
ktime_left = ktime_set(0, time_left);
schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC,
HRTIMER_MODE_REL);
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
/*
* This is set without any lock, so it's racy. But this is
* just for debug anyway, so make it best-effort
*/
cvp->cv_mutex = NULL;
wake_up(&cvp->cv_destroy);
}
finish_wait(&cvp->cv_event, &wait);
atomic_dec(&cvp->cv_refs);
mutex_enter(mp);
time_left = expire_time - gethrtime();
return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
}
/*
* Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
*/
static clock_t
cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag, int state)
{
if (res > 1) {
/*
* Align expiration to the specified resolution.
*/
if (flag & CALLOUT_FLAG_ROUNDUP)
tim += res - 1;
tim = (tim / res) * res;
}
if (!(flag & CALLOUT_FLAG_ABSOLUTE))
tim += gethrtime();
return (__cv_timedwait_hires(cvp, mp, tim, state));
}
clock_t
cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
int flag)
{
return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
TASK_UNINTERRUPTIBLE));
}
EXPORT_SYMBOL(cv_timedwait_hires);
clock_t
cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag)
{
return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
TASK_INTERRUPTIBLE));
}
EXPORT_SYMBOL(cv_timedwait_sig_hires);
void
__cv_signal(kcondvar_t *cvp)
{
ASSERT(cvp);
ASSERT(cvp->cv_magic == CV_MAGIC);
atomic_inc(&cvp->cv_refs);
/*
* All waiters are added with WQ_FLAG_EXCLUSIVE so only one
* waiter will be set runable with each call to wake_up().
* Additionally wake_up() holds a spin_lock assoicated with
* the wait queue to ensure we don't race waking up processes.
*/
if (atomic_read(&cvp->cv_waiters) > 0)
wake_up(&cvp->cv_event);
atomic_dec(&cvp->cv_refs);
}
EXPORT_SYMBOL(__cv_signal);
void
__cv_broadcast(kcondvar_t *cvp)
{
ASSERT(cvp);
ASSERT(cvp->cv_magic == CV_MAGIC);
atomic_inc(&cvp->cv_refs);
/*
* Wake_up_all() will wake up all waiters even those which
* have the WQ_FLAG_EXCLUSIVE flag set.
*/
if (atomic_read(&cvp->cv_waiters) > 0)
wake_up_all(&cvp->cv_event);
atomic_dec(&cvp->cv_refs);
}
EXPORT_SYMBOL(__cv_broadcast);

200
module/spl/spl-cred.c Normal file
View File

@ -0,0 +1,200 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Credential Implementation.
*/
#include <sys/cred.h>
static int
#ifdef HAVE_KUIDGID_T
cr_groups_search(const struct group_info *group_info, kgid_t grp)
#else
cr_groups_search(const struct group_info *group_info, gid_t grp)
#endif
{
unsigned int left, right, mid;
int cmp;
if (!group_info)
return (0);
left = 0;
right = group_info->ngroups;
while (left < right) {
mid = (left + right) / 2;
cmp = KGID_TO_SGID(grp) -
KGID_TO_SGID(GROUP_AT(group_info, mid));
if (cmp > 0)
left = mid + 1;
else if (cmp < 0)
right = mid;
else
return (1);
}
return (0);
}
/* Hold a reference on the credential */
void
crhold(cred_t *cr)
{
(void) get_cred((const cred_t *)cr);
}
/* Free a reference on the credential */
void
crfree(cred_t *cr)
{
put_cred((const cred_t *)cr);
}
/* Return the number of supplemental groups */
int
crgetngroups(const cred_t *cr)
{
struct group_info *gi;
int rc;
gi = cr->group_info;
rc = gi->ngroups;
#ifndef HAVE_GROUP_INFO_GID
/*
* For Linux <= 4.8,
* crgetgroups will only returns gi->blocks[0], which contains only
* the first NGROUPS_PER_BLOCK groups.
*/
if (rc > NGROUPS_PER_BLOCK) {
WARN_ON_ONCE(1);
rc = NGROUPS_PER_BLOCK;
}
#endif
return (rc);
}
/*
* Return an array of supplemental gids. The returned address is safe
* to use as long as the caller has taken a reference with crhold().
*
* Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d
* array via ->gid.
*/
gid_t *
crgetgroups(const cred_t *cr)
{
struct group_info *gi;
gid_t *gids = NULL;
gi = cr->group_info;
#ifdef HAVE_GROUP_INFO_GID
gids = KGIDP_TO_SGIDP(gi->gid);
#else
if (gi->nblocks > 0)
gids = KGIDP_TO_SGIDP(gi->blocks[0]);
#endif
return (gids);
}
/* Check if the passed gid is available in supplied credential. */
int
groupmember(gid_t gid, const cred_t *cr)
{
struct group_info *gi;
int rc;
gi = cr->group_info;
rc = cr_groups_search(gi, SGID_TO_KGID(gid));
return (rc);
}
/* Return the effective user id */
uid_t
crgetuid(const cred_t *cr)
{
return (KUID_TO_SUID(cr->euid));
}
/* Return the real user id */
uid_t
crgetruid(const cred_t *cr)
{
return (KUID_TO_SUID(cr->uid));
}
/* Return the saved user id */
uid_t
crgetsuid(const cred_t *cr)
{
return (KUID_TO_SUID(cr->suid));
}
/* Return the filesystem user id */
uid_t
crgetfsuid(const cred_t *cr)
{
return (KUID_TO_SUID(cr->fsuid));
}
/* Return the effective group id */
gid_t
crgetgid(const cred_t *cr)
{
return (KGID_TO_SGID(cr->egid));
}
/* Return the real group id */
gid_t
crgetrgid(const cred_t *cr)
{
return (KGID_TO_SGID(cr->gid));
}
/* Return the saved group id */
gid_t
crgetsgid(const cred_t *cr)
{
return (KGID_TO_SGID(cr->sgid));
}
/* Return the filesystem group id */
gid_t
crgetfsgid(const cred_t *cr)
{
return (KGID_TO_SGID(cr->fsgid));
}
EXPORT_SYMBOL(crhold);
EXPORT_SYMBOL(crfree);
EXPORT_SYMBOL(crgetuid);
EXPORT_SYMBOL(crgetruid);
EXPORT_SYMBOL(crgetsuid);
EXPORT_SYMBOL(crgetfsuid);
EXPORT_SYMBOL(crgetgid);
EXPORT_SYMBOL(crgetrgid);
EXPORT_SYMBOL(crgetsgid);
EXPORT_SYMBOL(crgetfsgid);
EXPORT_SYMBOL(crgetngroups);
EXPORT_SYMBOL(crgetgroups);
EXPORT_SYMBOL(groupmember);

133
module/spl/spl-err.c Normal file
View File

@ -0,0 +1,133 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Error Implementation.
*/
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
#include <linux/ratelimit.h>
/*
* It is often useful to actually have the panic crash the node so you
* can then get notified of the event, get the crashdump for later
* analysis and other such goodies.
* But we would still default to the current default of not to do that.
*/
/* BEGIN CSTYLED */
unsigned int spl_panic_halt;
module_param(spl_panic_halt, uint, 0644);
MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
/* END CSTYLED */
/*
* Limit the number of stack traces dumped to not more than 5 every
* 60 seconds to prevent denial-of-service attacks from debug code.
*/
DEFINE_RATELIMIT_STATE(dumpstack_ratelimit_state, 60 * HZ, 5);
void
spl_dumpstack(void)
{
if (__ratelimit(&dumpstack_ratelimit_state)) {
printk("Showing stack for process %d\n", current->pid);
dump_stack();
}
}
EXPORT_SYMBOL(spl_dumpstack);
int
spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
{
const char *newfile;
char msg[MAXMSGLEN];
va_list ap;
newfile = strrchr(file, '/');
if (newfile != NULL)
newfile = newfile + 1;
else
newfile = file;
va_start(ap, fmt);
(void) vsnprintf(msg, sizeof (msg), fmt, ap);
va_end(ap);
printk(KERN_EMERG "%s", msg);
printk(KERN_EMERG "PANIC at %s:%d:%s()\n", newfile, line, func);
if (spl_panic_halt)
panic("%s", msg);
spl_dumpstack();
/* Halt the thread to facilitate further debugging */
set_current_state(TASK_UNINTERRUPTIBLE);
while (1)
schedule();
/* Unreachable */
return (1);
}
EXPORT_SYMBOL(spl_panic);
void
vcmn_err(int ce, const char *fmt, va_list ap)
{
char msg[MAXMSGLEN];
vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
switch (ce) {
case CE_IGNORE:
break;
case CE_CONT:
printk("%s", msg);
break;
case CE_NOTE:
printk(KERN_NOTICE "NOTICE: %s\n", msg);
break;
case CE_WARN:
printk(KERN_WARNING "WARNING: %s\n", msg);
break;
case CE_PANIC:
printk(KERN_EMERG "PANIC: %s\n", msg);
spl_dumpstack();
/* Halt the thread to facilitate further debugging */
set_current_state(TASK_UNINTERRUPTIBLE);
while (1)
schedule();
}
} /* vcmn_err() */
EXPORT_SYMBOL(vcmn_err);
void
cmn_err(int ce, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vcmn_err(ce, fmt, ap);
va_end(ap);
} /* cmn_err() */
EXPORT_SYMBOL(cmn_err);

775
module/spl/spl-generic.c Normal file
View File

@ -0,0 +1,775 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Generic Implementation.
*/
#include <sys/sysmacros.h>
#include <sys/systeminfo.h>
#include <sys/vmsystm.h>
#include <sys/kobj.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/taskq.h>
#include <sys/tsd.h>
#include <sys/zmod.h>
#include <sys/debug.h>
#include <sys/proc.h>
#include <sys/kstat.h>
#include <sys/file.h>
#include <linux/ctype.h>
#include <sys/disp.h>
#include <sys/random.h>
#include <sys/strings.h>
#include <linux/kmod.h>
char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE;
EXPORT_SYMBOL(spl_version);
/* BEGIN CSTYLED */
unsigned long spl_hostid = 0;
EXPORT_SYMBOL(spl_hostid);
module_param(spl_hostid, ulong, 0644);
MODULE_PARM_DESC(spl_hostid, "The system hostid.");
/* END CSTYLED */
proc_t p0;
EXPORT_SYMBOL(p0);
/*
* Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
*
* "Further scramblings of Marsaglia's xorshift generators"
* http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
*
* random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
* is to provide bytes containing random numbers. It is mapped to /dev/urandom
* on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
* random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
* we can implement it using a fast PRNG that we seed using Linux' actual
* equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
* with an independent seed so that all calls to random_get_pseudo_bytes() are
* free of atomic instructions.
*
* A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
* to generate words larger than 128 bits will paradoxically be limited to
* `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
* 128-bit words and selecting the first will implicitly select the second. If
* a caller finds this behavior undesireable, random_get_bytes() should be used
* instead.
*
* XXX: Linux interrupt handlers that trigger within the critical section
* formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
* see the same numbers. Nothing in the code currently calls this in an
* interrupt handler, so this is considered to be okay. If that becomes a
* problem, we could create a set of per-cpu variables for interrupt handlers
* and use them when in_interrupt() from linux/preempt_mask.h evaluates to
* true.
*/
static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy);
/*
* spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
* file:
*
* http://xorshift.di.unimi.it/xorshift128plus.c
*/
static inline uint64_t
spl_rand_next(uint64_t *s)
{
uint64_t s1 = s[0];
const uint64_t s0 = s[1];
s[0] = s0;
s1 ^= s1 << 23; // a
s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
return (s[1] + s0);
}
static inline void
spl_rand_jump(uint64_t *s)
{
static const uint64_t JUMP[] =
{ 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
uint64_t s0 = 0;
uint64_t s1 = 0;
int i, b;
for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
for (b = 0; b < 64; b++) {
if (JUMP[i] & 1ULL << b) {
s0 ^= s[0];
s1 ^= s[1];
}
(void) spl_rand_next(s);
}
s[0] = s0;
s[1] = s1;
}
int
random_get_pseudo_bytes(uint8_t *ptr, size_t len)
{
uint64_t *xp, s[2];
ASSERT(ptr);
xp = get_cpu_var(spl_pseudo_entropy);
s[0] = xp[0];
s[1] = xp[1];
while (len) {
union {
uint64_t ui64;
uint8_t byte[sizeof (uint64_t)];
}entropy;
int i = MIN(len, sizeof (uint64_t));
len -= i;
entropy.ui64 = spl_rand_next(s);
while (i--)
*ptr++ = entropy.byte[i];
}
xp[0] = s[0];
xp[1] = s[1];
put_cpu_var(spl_pseudo_entropy);
return (0);
}
EXPORT_SYMBOL(random_get_pseudo_bytes);
#if BITS_PER_LONG == 32
/*
* Support 64/64 => 64 division on a 32-bit platform. While the kernel
* provides a div64_u64() function for this we do not use it because the
* implementation is flawed. There are cases which return incorrect
* results as late as linux-2.6.35. Until this is fixed upstream the
* spl must provide its own implementation.
*
* This implementation is a slightly modified version of the algorithm
* proposed by the book 'Hacker's Delight'. The original source can be
* found here and is available for use without restriction.
*
* http://www.hackersdelight.org/HDcode/newCode/divDouble.c
*/
/*
* Calculate number of leading of zeros for a 64-bit value.
*/
static int
nlz64(uint64_t x)
{
register int n = 0;
if (x == 0)
return (64);
if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; }
if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; }
if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; }
if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; }
return (n);
}
/*
* Newer kernels have a div_u64() function but we define our own
* to simplify portibility between kernel versions.
*/
static inline uint64_t
__div_u64(uint64_t u, uint32_t v)
{
(void) do_div(u, v);
return (u);
}
/*
* Implementation of 64-bit unsigned division for 32-bit machines.
*
* First the procedure takes care of the case in which the divisor is a
* 32-bit quantity. There are two subcases: (1) If the left half of the
* dividend is less than the divisor, one execution of do_div() is all that
* is required (overflow is not possible). (2) Otherwise it does two
* divisions, using the grade school method.
*/
uint64_t
__udivdi3(uint64_t u, uint64_t v)
{
uint64_t u0, u1, v1, q0, q1, k;
int n;
if (v >> 32 == 0) { // If v < 2**32:
if (u >> 32 < v) { // If u/v cannot overflow,
return (__div_u64(u, v)); // just do one division.
} else { // If u/v would overflow:
u1 = u >> 32; // Break u into two halves.
u0 = u & 0xFFFFFFFF;
q1 = __div_u64(u1, v); // First quotient digit.
k = u1 - q1 * v; // First remainder, < v.
u0 += (k << 32);
q0 = __div_u64(u0, v); // Seconds quotient digit.
return ((q1 << 32) + q0);
}
} else { // If v >= 2**32:
n = nlz64(v); // 0 <= n <= 31.
v1 = (v << n) >> 32; // Normalize divisor, MSB is 1.
u1 = u >> 1; // To ensure no overflow.
q1 = __div_u64(u1, v1); // Get quotient from
q0 = (q1 << n) >> 31; // Undo normalization and
// division of u by 2.
if (q0 != 0) // Make q0 correct or
q0 = q0 - 1; // too small by 1.
if ((u - q0 * v) >= v)
q0 = q0 + 1; // Now q0 is correct.
return (q0);
}
}
EXPORT_SYMBOL(__udivdi3);
/* BEGIN CSTYLED */
#ifndef abs64
#define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
#endif
/* END CSTYLED */
/*
* Implementation of 64-bit signed division for 32-bit machines.
*/
int64_t
__divdi3(int64_t u, int64_t v)
{
int64_t q, t;
q = __udivdi3(abs64(u), abs64(v));
t = (u ^ v) >> 63; // If u, v have different
return ((q ^ t) - t); // signs, negate q.
}
EXPORT_SYMBOL(__divdi3);
/*
* Implementation of 64-bit unsigned modulo for 32-bit machines.
*/
uint64_t
__umoddi3(uint64_t dividend, uint64_t divisor)
{
return (dividend - (divisor * __udivdi3(dividend, divisor)));
}
EXPORT_SYMBOL(__umoddi3);
/*
* Implementation of 64-bit unsigned division/modulo for 32-bit machines.
*/
uint64_t
__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
{
uint64_t q = __udivdi3(n, d);
if (r)
*r = n - d * q;
return (q);
}
EXPORT_SYMBOL(__udivmoddi4);
/*
* Implementation of 64-bit signed division/modulo for 32-bit machines.
*/
int64_t
__divmoddi4(int64_t n, int64_t d, int64_t *r)
{
int64_t q, rr;
boolean_t nn = B_FALSE;
boolean_t nd = B_FALSE;
if (n < 0) {
nn = B_TRUE;
n = -n;
}
if (d < 0) {
nd = B_TRUE;
d = -d;
}
q = __udivmoddi4(n, d, (uint64_t *)&rr);
if (nn != nd)
q = -q;
if (nn)
rr = -rr;
if (r)
*r = rr;
return (q);
}
EXPORT_SYMBOL(__divmoddi4);
#if defined(__arm) || defined(__arm__)
/*
* Implementation of 64-bit (un)signed division for 32-bit arm machines.
*
* Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned)
* long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
* and the remainder in {r2, r3}. The return type is specifically left
* set to 'void' to ensure the compiler does not overwrite these registers
* during the return. All results are in registers as per ABI
*/
void
__aeabi_uldivmod(uint64_t u, uint64_t v)
{
uint64_t res;
uint64_t mod;
res = __udivdi3(u, v);
mod = __umoddi3(u, v);
{
register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
register uint32_t r1 asm("r1") = (res >> 32);
register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
register uint32_t r3 asm("r3") = (mod >> 32);
/* BEGIN CSTYLED */
asm volatile(""
: "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
: "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
/* END CSTYLED */
return; /* r0; */
}
}
EXPORT_SYMBOL(__aeabi_uldivmod);
void
__aeabi_ldivmod(int64_t u, int64_t v)
{
int64_t res;
uint64_t mod;
res = __divdi3(u, v);
mod = __umoddi3(u, v);
{
register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
register uint32_t r1 asm("r1") = (res >> 32);
register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
register uint32_t r3 asm("r3") = (mod >> 32);
/* BEGIN CSTYLED */
asm volatile(""
: "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
: "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
/* END CSTYLED */
return; /* r0; */
}
}
EXPORT_SYMBOL(__aeabi_ldivmod);
#endif /* __arm || __arm__ */
#endif /* BITS_PER_LONG */
/*
* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
* ddi_strtol(9F) man page. I have not verified the behavior of these
* functions against their Solaris counterparts. It is possible that I
* may have misinterpreted the man page or the man page is incorrect.
*/
int ddi_strtoul(const char *, char **, int, unsigned long *);
int ddi_strtol(const char *, char **, int, long *);
int ddi_strtoull(const char *, char **, int, unsigned long long *);
int ddi_strtoll(const char *, char **, int, long long *);
#define define_ddi_strtoux(type, valtype) \
int ddi_strtou##type(const char *str, char **endptr, \
int base, valtype *result) \
{ \
valtype last_value, value = 0; \
char *ptr = (char *)str; \
int flag = 1, digit; \
\
if (strlen(ptr) == 0) \
return (EINVAL); \
\
/* Auto-detect base based on prefix */ \
if (!base) { \
if (str[0] == '0') { \
if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
base = 16; /* hex */ \
ptr += 2; \
} else if (str[1] >= '0' && str[1] < 8) { \
base = 8; /* octal */ \
ptr += 1; \
} else { \
return (EINVAL); \
} \
} else { \
base = 10; /* decimal */ \
} \
} \
\
while (1) { \
if (isdigit(*ptr)) \
digit = *ptr - '0'; \
else if (isalpha(*ptr)) \
digit = tolower(*ptr) - 'a' + 10; \
else \
break; \
\
if (digit >= base) \
break; \
\
last_value = value; \
value = value * base + digit; \
if (last_value > value) /* Overflow */ \
return (ERANGE); \
\
flag = 1; \
ptr++; \
} \
\
if (flag) \
*result = value; \
\
if (endptr) \
*endptr = (char *)(flag ? ptr : str); \
\
return (0); \
} \
#define define_ddi_strtox(type, valtype) \
int ddi_strto##type(const char *str, char **endptr, \
int base, valtype *result) \
{ \
int rc; \
\
if (*str == '-') { \
rc = ddi_strtou##type(str + 1, endptr, base, result); \
if (!rc) { \
if (*endptr == str + 1) \
*endptr = (char *)str; \
else \
*result = -*result; \
} \
} else { \
rc = ddi_strtou##type(str, endptr, base, result); \
} \
\
return (rc); \
}
define_ddi_strtoux(l, unsigned long)
define_ddi_strtox(l, long)
define_ddi_strtoux(ll, unsigned long long)
define_ddi_strtox(ll, long long)
EXPORT_SYMBOL(ddi_strtoul);
EXPORT_SYMBOL(ddi_strtol);
EXPORT_SYMBOL(ddi_strtoll);
EXPORT_SYMBOL(ddi_strtoull);
int
ddi_copyin(const void *from, void *to, size_t len, int flags)
{
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
if (flags & FKIOCTL) {
memcpy(to, from, len);
return (0);
}
return (copyin(from, to, len));
}
EXPORT_SYMBOL(ddi_copyin);
int
ddi_copyout(const void *from, void *to, size_t len, int flags)
{
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
if (flags & FKIOCTL) {
memcpy(to, from, len);
return (0);
}
return (copyout(from, to, len));
}
EXPORT_SYMBOL(ddi_copyout);
/*
* Read the unique system identifier from the /etc/hostid file.
*
* The behavior of /usr/bin/hostid on Linux systems with the
* regular eglibc and coreutils is:
*
* 1. Generate the value if the /etc/hostid file does not exist
* or if the /etc/hostid file is less than four bytes in size.
*
* 2. If the /etc/hostid file is at least 4 bytes, then return
* the first four bytes [0..3] in native endian order.
*
* 3. Always ignore bytes [4..] if they exist in the file.
*
* Only the first four bytes are significant, even on systems that
* have a 64-bit word size.
*
* See:
*
* eglibc: sysdeps/unix/sysv/linux/gethostid.c
* coreutils: src/hostid.c
*
* Notes:
*
* The /etc/hostid file on Solaris is a text file that often reads:
*
* # DO NOT EDIT
* "0123456789"
*
* Directly copying this file to Linux results in a constant
* hostid of 4f442023 because the default comment constitutes
* the first four bytes of the file.
*
*/
char *spl_hostid_path = HW_HOSTID_PATH;
module_param(spl_hostid_path, charp, 0444);
MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
static int
hostid_read(uint32_t *hostid)
{
uint64_t size;
struct _buf *file;
uint32_t value = 0;
int error;
file = kobj_open_file(spl_hostid_path);
if (file == (struct _buf *)-1)
return (ENOENT);
error = kobj_get_filesize(file, &size);
if (error) {
kobj_close_file(file);
return (error);
}
if (size < sizeof (HW_HOSTID_MASK)) {
kobj_close_file(file);
return (EINVAL);
}
/*
* Read directly into the variable like eglibc does.
* Short reads are okay; native behavior is preserved.
*/
error = kobj_read_file(file, (char *)&value, sizeof (value), 0);
if (error < 0) {
kobj_close_file(file);
return (EIO);
}
/* Mask down to 32 bits like coreutils does. */
*hostid = (value & HW_HOSTID_MASK);
kobj_close_file(file);
return (0);
}
/*
* Return the system hostid. Preferentially use the spl_hostid module option
* when set, otherwise use the value in the /etc/hostid file.
*/
uint32_t
zone_get_hostid(void *zone)
{
uint32_t hostid;
ASSERT3P(zone, ==, NULL);
if (spl_hostid != 0)
return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
if (hostid_read(&hostid) == 0)
return (hostid);
return (0);
}
EXPORT_SYMBOL(zone_get_hostid);
static int
spl_kvmem_init(void)
{
int rc = 0;
rc = spl_kmem_init();
if (rc)
return (rc);
rc = spl_vmem_init();
if (rc) {
spl_kmem_fini();
return (rc);
}
return (rc);
}
/*
* We initialize the random number generator with 128 bits of entropy from the
* system random number generator. In the improbable case that we have a zero
* seed, we fallback to the system jiffies, unless it is also zero, in which
* situation we use a preprogrammed seed. We step forward by 2^64 iterations to
* initialize each of the per-cpu seeds so that the sequences generated on each
* CPU are guaranteed to never overlap in practice.
*/
static void __init
spl_random_init(void)
{
uint64_t s[2];
int i;
get_random_bytes(s, sizeof (s));
if (s[0] == 0 && s[1] == 0) {
if (jiffies != 0) {
s[0] = jiffies;
s[1] = ~0 - jiffies;
} else {
(void) memcpy(s, "improbable seed", sizeof (s));
}
printk("SPL: get_random_bytes() returned 0 "
"when generating random seed. Setting initial seed to "
"0x%016llx%016llx.", cpu_to_be64(s[0]), cpu_to_be64(s[1]));
}
for_each_possible_cpu(i) {
uint64_t *wordp = per_cpu(spl_pseudo_entropy, i);
spl_rand_jump(s);
wordp[0] = s[0];
wordp[1] = s[1];
}
}
static void
spl_kvmem_fini(void)
{
spl_vmem_fini();
spl_kmem_fini();
}
static int __init
spl_init(void)
{
int rc = 0;
bzero(&p0, sizeof (proc_t));
spl_random_init();
if ((rc = spl_kvmem_init()))
goto out1;
if ((rc = spl_mutex_init()))
goto out2;
if ((rc = spl_rw_init()))
goto out3;
if ((rc = spl_tsd_init()))
goto out4;
if ((rc = spl_taskq_init()))
goto out5;
if ((rc = spl_kmem_cache_init()))
goto out6;
if ((rc = spl_vn_init()))
goto out7;
if ((rc = spl_proc_init()))
goto out8;
if ((rc = spl_kstat_init()))
goto out9;
if ((rc = spl_zlib_init()))
goto out10;
printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
SPL_META_RELEASE, SPL_DEBUG_STR);
return (rc);
out10:
spl_kstat_fini();
out9:
spl_proc_fini();
out8:
spl_vn_fini();
out7:
spl_kmem_cache_fini();
out6:
spl_taskq_fini();
out5:
spl_tsd_fini();
out4:
spl_rw_fini();
out3:
spl_mutex_fini();
out2:
spl_kvmem_fini();
out1:
printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer "
"v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE,
SPL_DEBUG_STR, rc);
return (rc);
}
static void __exit
spl_fini(void)
{
printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
spl_zlib_fini();
spl_kstat_fini();
spl_proc_fini();
spl_vn_fini();
spl_kmem_cache_fini();
spl_taskq_fini();
spl_tsd_fini();
spl_rw_fini();
spl_mutex_fini();
spl_kvmem_fini();
}
module_init(spl_init);
module_exit(spl_fini);
MODULE_DESCRIPTION("Solaris Porting Layer");
MODULE_AUTHOR(SPL_META_AUTHOR);
MODULE_LICENSE(SPL_META_LICENSE);
MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE);

1769
module/spl/spl-kmem-cache.c Normal file

File diff suppressed because it is too large Load Diff

567
module/spl/spl-kmem.c Normal file
View File

@ -0,0 +1,567 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#include <sys/debug.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
#include <sys/vmem.h>
#include <linux/mm.h>
#include <linux/ratelimit.h>
/*
* As a general rule kmem_alloc() allocations should be small, preferably
* just a few pages since they must by physically contiguous. Therefore, a
* rate limited warning will be printed to the console for any kmem_alloc()
* which exceeds a reasonable threshold.
*
* The default warning threshold is set to sixteen pages but capped at 64K to
* accommodate systems using large pages. This value was selected to be small
* enough to ensure the largest allocations are quickly noticed and fixed.
* But large enough to avoid logging any warnings when a allocation size is
* larger than optimal but not a serious concern. Since this value is tunable,
* developers are encouraged to set it lower when testing so any new largish
* allocations are quickly caught. These warnings may be disabled by setting
* the threshold to zero.
*/
/* BEGIN CSTYLED */
unsigned int spl_kmem_alloc_warn = MIN(16 * PAGE_SIZE, 64 * 1024);
module_param(spl_kmem_alloc_warn, uint, 0644);
MODULE_PARM_DESC(spl_kmem_alloc_warn,
"Warning threshold in bytes for a kmem_alloc()");
EXPORT_SYMBOL(spl_kmem_alloc_warn);
/*
* Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
* Allocations which are marginally smaller than this limit may succeed but
* should still be avoided due to the expense of locating a contiguous range
* of free pages. Therefore, a maximum kmem size with reasonable safely
* margin of 4x is set. Kmem_alloc() allocations larger than this maximum
* will quickly fail. Vmem_alloc() allocations less than or equal to this
* value will use kmalloc(), but shift to vmalloc() when exceeding this value.
*/
unsigned int spl_kmem_alloc_max = (KMALLOC_MAX_SIZE >> 2);
module_param(spl_kmem_alloc_max, uint, 0644);
MODULE_PARM_DESC(spl_kmem_alloc_max,
"Maximum size in bytes for a kmem_alloc()");
EXPORT_SYMBOL(spl_kmem_alloc_max);
/* END CSTYLED */
int
kmem_debugging(void)
{
return (0);
}
EXPORT_SYMBOL(kmem_debugging);
char *
kmem_vasprintf(const char *fmt, va_list ap)
{
va_list aq;
char *ptr;
do {
va_copy(aq, ap);
ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, aq);
va_end(aq);
} while (ptr == NULL);
return (ptr);
}
EXPORT_SYMBOL(kmem_vasprintf);
char *
kmem_asprintf(const char *fmt, ...)
{
va_list ap;
char *ptr;
do {
va_start(ap, fmt);
ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, ap);
va_end(ap);
} while (ptr == NULL);
return (ptr);
}
EXPORT_SYMBOL(kmem_asprintf);
static char *
__strdup(const char *str, int flags)
{
char *ptr;
int n;
n = strlen(str);
ptr = kmalloc(n + 1, kmem_flags_convert(flags));
if (ptr)
memcpy(ptr, str, n + 1);
return (ptr);
}
char *
strdup(const char *str)
{
return (__strdup(str, KM_SLEEP));
}
EXPORT_SYMBOL(strdup);
void
strfree(char *str)
{
kfree(str);
}
EXPORT_SYMBOL(strfree);
/*
* Limit the number of large allocation stack traces dumped to not more than
* 5 every 60 seconds to prevent denial-of-service attacks from debug code.
*/
DEFINE_RATELIMIT_STATE(kmem_alloc_ratelimit_state, 60 * HZ, 5);
/*
* General purpose unified implementation of kmem_alloc(). It is an
* amalgamation of Linux and Illumos allocator design. It should never be
* exported to ensure that code using kmem_alloc()/kmem_zalloc() remains
* relatively portable. Consumers may only access this function through
* wrappers that enforce the common flags to ensure portability.
*/
inline void *
spl_kmem_alloc_impl(size_t size, int flags, int node)
{
gfp_t lflags = kmem_flags_convert(flags);
int use_vmem = 0;
void *ptr;
/*
* Log abnormally large allocations and rate limit the console output.
* Allocations larger than spl_kmem_alloc_warn should be performed
* through the vmem_alloc()/vmem_zalloc() interfaces.
*/
if ((spl_kmem_alloc_warn > 0) && (size > spl_kmem_alloc_warn) &&
!(flags & KM_VMEM) && __ratelimit(&kmem_alloc_ratelimit_state)) {
printk(KERN_WARNING
"Large kmem_alloc(%lu, 0x%x), please file an issue at:\n"
"https://github.com/zfsonlinux/zfs/issues/new\n",
(unsigned long)size, flags);
dump_stack();
}
/*
* Use a loop because kmalloc_node() can fail when GFP_KERNEL is used
* unlike kmem_alloc() with KM_SLEEP on Illumos.
*/
do {
/*
* Calling kmalloc_node() when the size >= spl_kmem_alloc_max
* is unsafe. This must fail for all for kmem_alloc() and
* kmem_zalloc() callers.
*
* For vmem_alloc() and vmem_zalloc() callers it is permissible
* to use __vmalloc(). However, in general use of __vmalloc()
* is strongly discouraged because a global lock must be
* acquired. Contention on this lock can significantly
* impact performance so frequently manipulating the virtual
* address space is strongly discouraged.
*/
if ((size > spl_kmem_alloc_max) || use_vmem) {
if (flags & KM_VMEM) {
ptr = __vmalloc(size, lflags, PAGE_KERNEL);
} else {
return (NULL);
}
} else {
ptr = kmalloc_node(size, lflags, node);
}
if (likely(ptr) || (flags & KM_NOSLEEP))
return (ptr);
/*
* For vmem_alloc() and vmem_zalloc() callers retry immediately
* using __vmalloc() which is unlikely to fail.
*/
if ((flags & KM_VMEM) && (use_vmem == 0)) {
use_vmem = 1;
continue;
}
if (unlikely(__ratelimit(&kmem_alloc_ratelimit_state))) {
printk(KERN_WARNING
"Possible memory allocation deadlock: "
"size=%lu lflags=0x%x",
(unsigned long)size, lflags);
dump_stack();
}
/*
* Use cond_resched() instead of congestion_wait() to avoid
* deadlocking systems where there are no block devices.
*/
cond_resched();
} while (1);
return (NULL);
}
inline void
spl_kmem_free_impl(const void *buf, size_t size)
{
if (is_vmalloc_addr(buf))
vfree(buf);
else
kfree(buf);
}
/*
* Memory allocation and accounting for kmem_* * style allocations. When
* DEBUG_KMEM is enabled the total memory allocated will be tracked and
* any memory leaked will be reported during module unload.
*
* ./configure --enable-debug-kmem
*/
#ifdef DEBUG_KMEM
/* Shim layer memory accounting */
#ifdef HAVE_ATOMIC64_T
atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
unsigned long long kmem_alloc_max = 0;
#else /* HAVE_ATOMIC64_T */
atomic_t kmem_alloc_used = ATOMIC_INIT(0);
unsigned long long kmem_alloc_max = 0;
#endif /* HAVE_ATOMIC64_T */
EXPORT_SYMBOL(kmem_alloc_used);
EXPORT_SYMBOL(kmem_alloc_max);
inline void *
spl_kmem_alloc_debug(size_t size, int flags, int node)
{
void *ptr;
ptr = spl_kmem_alloc_impl(size, flags, node);
if (ptr) {
kmem_alloc_used_add(size);
if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
kmem_alloc_max = kmem_alloc_used_read();
}
return (ptr);
}
inline void
spl_kmem_free_debug(const void *ptr, size_t size)
{
kmem_alloc_used_sub(size);
spl_kmem_free_impl(ptr, size);
}
/*
* When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
* but also the location of every alloc and free. When the SPL module is
* unloaded a list of all leaked addresses and where they were allocated
* will be dumped to the console. Enabling this feature has a significant
* impact on performance but it makes finding memory leaks straight forward.
*
* Not surprisingly with debugging enabled the xmem_locks are very highly
* contended particularly on xfree(). If we want to run with this detailed
* debugging enabled for anything other than debugging we need to minimize
* the contention by moving to a lock per xmem_table entry model.
*
* ./configure --enable-debug-kmem-tracking
*/
#ifdef DEBUG_KMEM_TRACKING
#include <linux/hash.h>
#include <linux/ctype.h>
#define KMEM_HASH_BITS 10
#define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS)
typedef struct kmem_debug {
struct hlist_node kd_hlist; /* Hash node linkage */
struct list_head kd_list; /* List of all allocations */
void *kd_addr; /* Allocation pointer */
size_t kd_size; /* Allocation size */
const char *kd_func; /* Allocation function */
int kd_line; /* Allocation line */
} kmem_debug_t;
static spinlock_t kmem_lock;
static struct hlist_head kmem_table[KMEM_TABLE_SIZE];
static struct list_head kmem_list;
static kmem_debug_t *
kmem_del_init(spinlock_t *lock, struct hlist_head *table,
int bits, const void *addr)
{
struct hlist_head *head;
struct hlist_node *node;
struct kmem_debug *p;
unsigned long flags;
spin_lock_irqsave(lock, flags);
head = &table[hash_ptr((void *)addr, bits)];
hlist_for_each(node, head) {
p = list_entry(node, struct kmem_debug, kd_hlist);
if (p->kd_addr == addr) {
hlist_del_init(&p->kd_hlist);
list_del_init(&p->kd_list);
spin_unlock_irqrestore(lock, flags);
return (p);
}
}
spin_unlock_irqrestore(lock, flags);
return (NULL);
}
inline void *
spl_kmem_alloc_track(size_t size, int flags,
const char *func, int line, int node)
{
void *ptr = NULL;
kmem_debug_t *dptr;
unsigned long irq_flags;
dptr = kmalloc(sizeof (kmem_debug_t), kmem_flags_convert(flags));
if (dptr == NULL)
return (NULL);
dptr->kd_func = __strdup(func, flags);
if (dptr->kd_func == NULL) {
kfree(dptr);
return (NULL);
}
ptr = spl_kmem_alloc_debug(size, flags, node);
if (ptr == NULL) {
kfree(dptr->kd_func);
kfree(dptr);
return (NULL);
}
INIT_HLIST_NODE(&dptr->kd_hlist);
INIT_LIST_HEAD(&dptr->kd_list);
dptr->kd_addr = ptr;
dptr->kd_size = size;
dptr->kd_line = line;
spin_lock_irqsave(&kmem_lock, irq_flags);
hlist_add_head(&dptr->kd_hlist,
&kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
list_add_tail(&dptr->kd_list, &kmem_list);
spin_unlock_irqrestore(&kmem_lock, irq_flags);
return (ptr);
}
inline void
spl_kmem_free_track(const void *ptr, size_t size)
{
kmem_debug_t *dptr;
/* Ignore NULL pointer since we haven't tracked it at all */
if (ptr == NULL)
return;
/* Must exist in hash due to kmem_alloc() */
dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
ASSERT3P(dptr, !=, NULL);
ASSERT3S(dptr->kd_size, ==, size);
kfree(dptr->kd_func);
kfree(dptr);
spl_kmem_free_debug(ptr, size);
}
#endif /* DEBUG_KMEM_TRACKING */
#endif /* DEBUG_KMEM */
/*
* Public kmem_alloc(), kmem_zalloc() and kmem_free() interfaces.
*/
void *
spl_kmem_alloc(size_t size, int flags, const char *func, int line)
{
ASSERT0(flags & ~KM_PUBLIC_MASK);
#if !defined(DEBUG_KMEM)
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
#elif !defined(DEBUG_KMEM_TRACKING)
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
#else
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
#endif
}
EXPORT_SYMBOL(spl_kmem_alloc);
void *
spl_kmem_zalloc(size_t size, int flags, const char *func, int line)
{
ASSERT0(flags & ~KM_PUBLIC_MASK);
flags |= KM_ZERO;
#if !defined(DEBUG_KMEM)
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
#elif !defined(DEBUG_KMEM_TRACKING)
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
#else
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
#endif
}
EXPORT_SYMBOL(spl_kmem_zalloc);
void
spl_kmem_free(const void *buf, size_t size)
{
#if !defined(DEBUG_KMEM)
return (spl_kmem_free_impl(buf, size));
#elif !defined(DEBUG_KMEM_TRACKING)
return (spl_kmem_free_debug(buf, size));
#else
return (spl_kmem_free_track(buf, size));
#endif
}
EXPORT_SYMBOL(spl_kmem_free);
#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
static char *
spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
{
int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
int i, flag = 1;
ASSERT(str != NULL && len >= 17);
memset(str, 0, len);
/*
* Check for a fully printable string, and while we are at
* it place the printable characters in the passed buffer.
*/
for (i = 0; i < size; i++) {
str[i] = ((char *)(kd->kd_addr))[i];
if (isprint(str[i])) {
continue;
} else {
/*
* Minimum number of printable characters found
* to make it worthwhile to print this as ascii.
*/
if (i > min)
break;
flag = 0;
break;
}
}
if (!flag) {
sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
*((uint8_t *)kd->kd_addr),
*((uint8_t *)kd->kd_addr + 2),
*((uint8_t *)kd->kd_addr + 4),
*((uint8_t *)kd->kd_addr + 6),
*((uint8_t *)kd->kd_addr + 8),
*((uint8_t *)kd->kd_addr + 10),
*((uint8_t *)kd->kd_addr + 12),
*((uint8_t *)kd->kd_addr + 14));
}
return (str);
}
static int
spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
{
int i;
spin_lock_init(lock);
INIT_LIST_HEAD(list);
for (i = 0; i < size; i++)
INIT_HLIST_HEAD(&kmem_table[i]);
return (0);
}
static void
spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
{
unsigned long flags;
kmem_debug_t *kd;
char str[17];
spin_lock_irqsave(lock, flags);
if (!list_empty(list))
printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
"size", "data", "func", "line");
list_for_each_entry(kd, list, kd_list) {
printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
(int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
kd->kd_func, kd->kd_line);
}
spin_unlock_irqrestore(lock, flags);
}
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
int
spl_kmem_init(void)
{
#ifdef DEBUG_KMEM
kmem_alloc_used_set(0);
#ifdef DEBUG_KMEM_TRACKING
spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
#endif /* DEBUG_KMEM_TRACKING */
#endif /* DEBUG_KMEM */
return (0);
}
void
spl_kmem_fini(void)
{
#ifdef DEBUG_KMEM
/*
* Display all unreclaimed memory addresses, including the
* allocation size and the first few bytes of what's located
* at that address to aid in debugging. Performance is not
* a serious concern here since it is module unload time.
*/
if (kmem_alloc_used_read() != 0)
printk(KERN_WARNING "kmem leaked %ld/%llu bytes\n",
(unsigned long)kmem_alloc_used_read(), kmem_alloc_max);
#ifdef DEBUG_KMEM_TRACKING
spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
#endif /* DEBUG_KMEM_TRACKING */
#endif /* DEBUG_KMEM */
}

86
module/spl/spl-kobj.c Normal file
View File

@ -0,0 +1,86 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Kobj Implementation.
*/
#include <sys/kobj.h>
struct _buf *
kobj_open_file(const char *name)
{
struct _buf *file;
vnode_t *vp;
int rc;
file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP));
if (file == NULL)
return ((_buf_t *)-1UL);
if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
kfree(file);
return ((_buf_t *)-1UL);
}
file->vp = vp;
return (file);
} /* kobj_open_file() */
EXPORT_SYMBOL(kobj_open_file);
void
kobj_close_file(struct _buf *file)
{
VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
kfree(file);
} /* kobj_close_file() */
EXPORT_SYMBOL(kobj_close_file);
int
kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
{
ssize_t resid;
if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
return (-1);
return (size - resid);
} /* kobj_read_file() */
EXPORT_SYMBOL(kobj_read_file);
int
kobj_get_filesize(struct _buf *file, uint64_t *size)
{
vattr_t vap;
int rc;
rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
if (rc)
return (rc);
*size = vap.va_size;
return (rc);
} /* kobj_get_filesize() */
EXPORT_SYMBOL(kobj_get_filesize);

733
module/spl/spl-kstat.c Normal file
View File

@ -0,0 +1,733 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Kstat Implementation.
*/
#include <linux/seq_file.h>
#include <sys/kstat.h>
#include <sys/vmem.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
#ifndef HAVE_PDE_DATA
#define PDE_DATA(x) (PDE(x)->data)
#endif
static kmutex_t kstat_module_lock;
static struct list_head kstat_module_list;
static kid_t kstat_id;
static int
kstat_resize_raw(kstat_t *ksp)
{
if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
return (ENOMEM);
vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
return (0);
}
void
kstat_waitq_enter(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t wcnt;
new = gethrtime();
delta = new - kiop->wlastupdate;
kiop->wlastupdate = new;
wcnt = kiop->wcnt++;
if (wcnt != 0) {
kiop->wlentime += delta * wcnt;
kiop->wtime += delta;
}
}
EXPORT_SYMBOL(kstat_waitq_enter);
void
kstat_waitq_exit(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t wcnt;
new = gethrtime();
delta = new - kiop->wlastupdate;
kiop->wlastupdate = new;
wcnt = kiop->wcnt--;
ASSERT((int)wcnt > 0);
kiop->wlentime += delta * wcnt;
kiop->wtime += delta;
}
EXPORT_SYMBOL(kstat_waitq_exit);
void
kstat_runq_enter(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t rcnt;
new = gethrtime();
delta = new - kiop->rlastupdate;
kiop->rlastupdate = new;
rcnt = kiop->rcnt++;
if (rcnt != 0) {
kiop->rlentime += delta * rcnt;
kiop->rtime += delta;
}
}
EXPORT_SYMBOL(kstat_runq_enter);
void
kstat_runq_exit(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t rcnt;
new = gethrtime();
delta = new - kiop->rlastupdate;
kiop->rlastupdate = new;
rcnt = kiop->rcnt--;
ASSERT((int)rcnt > 0);
kiop->rlentime += delta * rcnt;
kiop->rtime += delta;
}
EXPORT_SYMBOL(kstat_runq_exit);
static int
kstat_seq_show_headers(struct seq_file *f)
{
kstat_t *ksp = (kstat_t *)f->private;
int rc = 0;
ASSERT(ksp->ks_magic == KS_MAGIC);
seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
ksp->ks_ndata, (int)ksp->ks_data_size,
ksp->ks_crtime, ksp->ks_snaptime);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
restart:
if (ksp->ks_raw_ops.headers) {
rc = ksp->ks_raw_ops.headers(
ksp->ks_raw_buf, ksp->ks_raw_bufsize);
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart;
if (!rc)
seq_puts(f, ksp->ks_raw_buf);
} else {
seq_printf(f, "raw data\n");
}
break;
case KSTAT_TYPE_NAMED:
seq_printf(f, "%-31s %-4s %s\n",
"name", "type", "data");
break;
case KSTAT_TYPE_INTR:
seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
"hard", "soft", "watchdog",
"spurious", "multsvc");
break;
case KSTAT_TYPE_IO:
seq_printf(f,
"%-8s %-8s %-8s %-8s %-8s %-8s "
"%-8s %-8s %-8s %-8s %-8s %-8s\n",
"nread", "nwritten", "reads", "writes",
"wtime", "wlentime", "wupdate",
"rtime", "rlentime", "rupdate",
"wcnt", "rcnt");
break;
case KSTAT_TYPE_TIMER:
seq_printf(f,
"%-31s %-8s "
"%-8s %-8s %-8s %-8s %-8s\n",
"name", "events", "elapsed",
"min", "max", "start", "stop");
break;
default:
PANIC("Undefined kstat type %d\n", ksp->ks_type);
}
return (-rc);
}
static int
kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
{
int i, j;
for (i = 0; ; i++) {
seq_printf(f, "%03x:", i);
for (j = 0; j < 16; j++) {
if (i * 16 + j >= l) {
seq_printf(f, "\n");
goto out;
}
seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
}
seq_printf(f, "\n");
}
out:
return (0);
}
static int
kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
{
seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
switch (knp->data_type) {
case KSTAT_DATA_CHAR:
knp->value.c[15] = '\0'; /* NULL terminate */
seq_printf(f, "%-16s", knp->value.c);
break;
/*
* NOTE - We need to be more careful able what tokens are
* used for each arch, for now this is correct for x86_64.
*/
case KSTAT_DATA_INT32:
seq_printf(f, "%d", knp->value.i32);
break;
case KSTAT_DATA_UINT32:
seq_printf(f, "%u", knp->value.ui32);
break;
case KSTAT_DATA_INT64:
seq_printf(f, "%lld", (signed long long)knp->value.i64);
break;
case KSTAT_DATA_UINT64:
seq_printf(f, "%llu",
(unsigned long long)knp->value.ui64);
break;
case KSTAT_DATA_LONG:
seq_printf(f, "%ld", knp->value.l);
break;
case KSTAT_DATA_ULONG:
seq_printf(f, "%lu", knp->value.ul);
break;
case KSTAT_DATA_STRING:
KSTAT_NAMED_STR_PTR(knp)
[KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
break;
default:
PANIC("Undefined kstat data type %d\n", knp->data_type);
}
seq_printf(f, "\n");
return (0);
}
static int
kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
{
seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
kip->intrs[KSTAT_INTR_HARD],
kip->intrs[KSTAT_INTR_SOFT],
kip->intrs[KSTAT_INTR_WATCHDOG],
kip->intrs[KSTAT_INTR_SPURIOUS],
kip->intrs[KSTAT_INTR_MULTSVC]);
return (0);
}
static int
kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
{
seq_printf(f,
"%-8llu %-8llu %-8u %-8u %-8lld %-8lld "
"%-8lld %-8lld %-8lld %-8lld %-8u %-8u\n",
kip->nread, kip->nwritten,
kip->reads, kip->writes,
kip->wtime, kip->wlentime, kip->wlastupdate,
kip->rtime, kip->rlentime, kip->rlastupdate,
kip->wcnt, kip->rcnt);
return (0);
}
static int
kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
{
seq_printf(f,
"%-31s %-8llu %-8lld %-8lld %-8lld %-8lld %-8lld\n",
ktp->name, ktp->num_events, ktp->elapsed_time,
ktp->min_time, ktp->max_time,
ktp->start_time, ktp->stop_time);
return (0);
}
static int
kstat_seq_show(struct seq_file *f, void *p)
{
kstat_t *ksp = (kstat_t *)f->private;
int rc = 0;
ASSERT(ksp->ks_magic == KS_MAGIC);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
restart:
if (ksp->ks_raw_ops.data) {
rc = ksp->ks_raw_ops.data(
ksp->ks_raw_buf, ksp->ks_raw_bufsize, p);
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart;
if (!rc)
seq_puts(f, ksp->ks_raw_buf);
} else {
ASSERT(ksp->ks_ndata == 1);
rc = kstat_seq_show_raw(f, ksp->ks_data,
ksp->ks_data_size);
}
break;
case KSTAT_TYPE_NAMED:
rc = kstat_seq_show_named(f, (kstat_named_t *)p);
break;
case KSTAT_TYPE_INTR:
rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
break;
case KSTAT_TYPE_IO:
rc = kstat_seq_show_io(f, (kstat_io_t *)p);
break;
case KSTAT_TYPE_TIMER:
rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
break;
default:
PANIC("Undefined kstat type %d\n", ksp->ks_type);
}
return (-rc);
}
static int
kstat_default_update(kstat_t *ksp, int rw)
{
ASSERT(ksp != NULL);
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
static void *
kstat_seq_data_addr(kstat_t *ksp, loff_t n)
{
void *rc = NULL;
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
if (ksp->ks_raw_ops.addr)
rc = ksp->ks_raw_ops.addr(ksp, n);
else
rc = ksp->ks_data;
break;
case KSTAT_TYPE_NAMED:
rc = ksp->ks_data + n * sizeof (kstat_named_t);
break;
case KSTAT_TYPE_INTR:
rc = ksp->ks_data + n * sizeof (kstat_intr_t);
break;
case KSTAT_TYPE_IO:
rc = ksp->ks_data + n * sizeof (kstat_io_t);
break;
case KSTAT_TYPE_TIMER:
rc = ksp->ks_data + n * sizeof (kstat_timer_t);
break;
default:
PANIC("Undefined kstat type %d\n", ksp->ks_type);
}
return (rc);
}
static void *
kstat_seq_start(struct seq_file *f, loff_t *pos)
{
loff_t n = *pos;
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
mutex_enter(ksp->ks_lock);
if (ksp->ks_type == KSTAT_TYPE_RAW) {
ksp->ks_raw_bufsize = PAGE_SIZE;
ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
}
/* Dynamically update kstat, on error existing kstats are used */
(void) ksp->ks_update(ksp, KSTAT_READ);
ksp->ks_snaptime = gethrtime();
if (!n && kstat_seq_show_headers(f))
return (NULL);
if (n >= ksp->ks_ndata)
return (NULL);
return (kstat_seq_data_addr(ksp, n));
}
static void *
kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
++*pos;
if (*pos >= ksp->ks_ndata)
return (NULL);
return (kstat_seq_data_addr(ksp, *pos));
}
static void
kstat_seq_stop(struct seq_file *f, void *v)
{
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
if (ksp->ks_type == KSTAT_TYPE_RAW)
vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
mutex_exit(ksp->ks_lock);
}
static struct seq_operations kstat_seq_ops = {
.show = kstat_seq_show,
.start = kstat_seq_start,
.next = kstat_seq_next,
.stop = kstat_seq_stop,
};
static kstat_module_t *
kstat_find_module(char *name)
{
kstat_module_t *module;
list_for_each_entry(module, &kstat_module_list, ksm_module_list) {
if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0)
return (module);
}
return (NULL);
}
static kstat_module_t *
kstat_create_module(char *name)
{
kstat_module_t *module;
struct proc_dir_entry *pde;
pde = proc_mkdir(name, proc_spl_kstat);
if (pde == NULL)
return (NULL);
module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
module->ksm_proc = pde;
strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
INIT_LIST_HEAD(&module->ksm_kstat_list);
list_add_tail(&module->ksm_module_list, &kstat_module_list);
return (module);
}
static void
kstat_delete_module(kstat_module_t *module)
{
ASSERT(list_empty(&module->ksm_kstat_list));
remove_proc_entry(module->ksm_name, proc_spl_kstat);
list_del(&module->ksm_module_list);
kmem_free(module, sizeof (kstat_module_t));
}
static int
proc_kstat_open(struct inode *inode, struct file *filp)
{
struct seq_file *f;
int rc;
rc = seq_open(filp, &kstat_seq_ops);
if (rc)
return (rc);
f = filp->private_data;
f->private = PDE_DATA(inode);
return (rc);
}
static ssize_t
proc_kstat_write(struct file *filp, const char __user *buf, size_t len,
loff_t *ppos)
{
struct seq_file *f = filp->private_data;
kstat_t *ksp = f->private;
int rc;
ASSERT(ksp->ks_magic == KS_MAGIC);
mutex_enter(ksp->ks_lock);
rc = ksp->ks_update(ksp, KSTAT_WRITE);
mutex_exit(ksp->ks_lock);
if (rc)
return (-rc);
*ppos += len;
return (len);
}
static struct file_operations proc_kstat_operations = {
.open = proc_kstat_open,
.write = proc_kstat_write,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
void
__kstat_set_raw_ops(kstat_t *ksp,
int (*headers)(char *buf, size_t size),
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index))
{
ksp->ks_raw_ops.headers = headers;
ksp->ks_raw_ops.data = data;
ksp->ks_raw_ops.addr = addr;
}
EXPORT_SYMBOL(__kstat_set_raw_ops);
kstat_t *
__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
uchar_t ks_flags)
{
kstat_t *ksp;
ASSERT(ks_module);
ASSERT(ks_instance == 0);
ASSERT(ks_name);
ASSERT(!(ks_flags & KSTAT_FLAG_UNSUPPORTED));
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
ASSERT(ks_ndata == 1);
ksp = kmem_zalloc(sizeof (*ksp), KM_SLEEP);
if (ksp == NULL)
return (ksp);
mutex_enter(&kstat_module_lock);
ksp->ks_kid = kstat_id;
kstat_id++;
mutex_exit(&kstat_module_lock);
ksp->ks_magic = KS_MAGIC;
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
ksp->ks_lock = &ksp->ks_private_lock;
INIT_LIST_HEAD(&ksp->ks_list);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
ksp->ks_instance = ks_instance;
strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = ks_flags;
ksp->ks_update = kstat_default_update;
ksp->ks_private = NULL;
ksp->ks_raw_ops.headers = NULL;
ksp->ks_raw_ops.data = NULL;
ksp->ks_raw_ops.addr = NULL;
ksp->ks_raw_buf = NULL;
ksp->ks_raw_bufsize = 0;
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
ksp->ks_ndata = 1;
ksp->ks_data_size = ks_ndata;
break;
case KSTAT_TYPE_NAMED:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
break;
case KSTAT_TYPE_INTR:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
break;
case KSTAT_TYPE_IO:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
break;
case KSTAT_TYPE_TIMER:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
break;
default:
PANIC("Undefined kstat type %d\n", ksp->ks_type);
}
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
ksp->ks_data = NULL;
} else {
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
if (ksp->ks_data == NULL) {
kmem_free(ksp, sizeof (*ksp));
ksp = NULL;
}
}
return (ksp);
}
EXPORT_SYMBOL(__kstat_create);
static int
kstat_detect_collision(kstat_t *ksp)
{
kstat_module_t *module;
kstat_t *tmp;
char *parent;
char *cp;
parent = kmem_asprintf("%s", ksp->ks_module);
if ((cp = strrchr(parent, '/')) == NULL) {
strfree(parent);
return (0);
}
cp[0] = '\0';
if ((module = kstat_find_module(parent)) != NULL) {
list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
if (strncmp(tmp->ks_name, cp+1, KSTAT_STRLEN) == 0) {
strfree(parent);
return (EEXIST);
}
}
}
strfree(parent);
return (0);
}
void
__kstat_install(kstat_t *ksp)
{
kstat_module_t *module;
kstat_t *tmp;
ASSERT(ksp);
mutex_enter(&kstat_module_lock);
module = kstat_find_module(ksp->ks_module);
if (module == NULL) {
if (kstat_detect_collision(ksp) != 0) {
cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
" collision", ksp->ks_module, ksp->ks_name);
goto out;
}
module = kstat_create_module(ksp->ks_module);
if (module == NULL)
goto out;
}
/*
* Only one entry by this name per-module, on failure the module
* shouldn't be deleted because we know it has at least one entry.
*/
list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
if (strncmp(tmp->ks_name, ksp->ks_name, KSTAT_STRLEN) == 0)
goto out;
}
list_add_tail(&ksp->ks_list, &module->ksm_kstat_list);
mutex_enter(ksp->ks_lock);
ksp->ks_owner = module;
ksp->ks_proc = proc_create_data(ksp->ks_name, 0644,
module->ksm_proc, &proc_kstat_operations, (void *)ksp);
if (ksp->ks_proc == NULL) {
list_del_init(&ksp->ks_list);
if (list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
}
mutex_exit(ksp->ks_lock);
out:
mutex_exit(&kstat_module_lock);
}
EXPORT_SYMBOL(__kstat_install);
void
__kstat_delete(kstat_t *ksp)
{
kstat_module_t *module = ksp->ks_owner;
mutex_enter(&kstat_module_lock);
list_del_init(&ksp->ks_list);
mutex_exit(&kstat_module_lock);
if (ksp->ks_proc) {
remove_proc_entry(ksp->ks_name, module->ksm_proc);
/* Remove top level module directory if it's empty */
if (list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
}
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);
ksp->ks_lock = NULL;
mutex_destroy(&ksp->ks_private_lock);
kmem_free(ksp, sizeof (*ksp));
}
EXPORT_SYMBOL(__kstat_delete);
int
spl_kstat_init(void)
{
mutex_init(&kstat_module_lock, NULL, MUTEX_DEFAULT, NULL);
INIT_LIST_HEAD(&kstat_module_list);
kstat_id = 0;
return (0);
}
void
spl_kstat_fini(void)
{
ASSERT(list_empty(&kstat_module_list));
mutex_destroy(&kstat_module_lock);
}

30
module/spl/spl-mutex.c Normal file
View File

@ -0,0 +1,30 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Mutex Implementation.
*/
#include <sys/mutex.h>
int spl_mutex_init(void) { return 0; }
void spl_mutex_fini(void) { }

782
module/spl/spl-proc.c Normal file
View File

@ -0,0 +1,782 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Proc Implementation.
*/
#include <sys/systeminfo.h>
#include <sys/kstat.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
#include <sys/taskq.h>
#include <sys/proc.h>
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/version.h>
#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
typedef struct ctl_table __no_const spl_ctl_table;
#else
typedef struct ctl_table spl_ctl_table;
#endif
static unsigned long table_min = 0;
static unsigned long table_max = ~0;
static struct ctl_table_header *spl_header = NULL;
static struct proc_dir_entry *proc_spl = NULL;
static struct proc_dir_entry *proc_spl_kmem = NULL;
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
static struct proc_dir_entry *proc_spl_taskq_all = NULL;
static struct proc_dir_entry *proc_spl_taskq = NULL;
struct proc_dir_entry *proc_spl_kstat = NULL;
static int
proc_copyin_string(char *kbuffer, int kbuffer_size, const char *ubuffer,
int ubuffer_size)
{
int size;
if (ubuffer_size > kbuffer_size)
return (-EOVERFLOW);
if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
return (-EFAULT);
/* strip trailing whitespace */
size = strnlen(kbuffer, ubuffer_size);
while (size-- >= 0)
if (!isspace(kbuffer[size]))
break;
/* empty string */
if (size < 0)
return (-EINVAL);
/* no space to terminate */
if (size == kbuffer_size)
return (-EOVERFLOW);
kbuffer[size + 1] = 0;
return (0);
}
static int
proc_copyout_string(char *ubuffer, int ubuffer_size, const char *kbuffer,
char *append)
{
/*
* NB if 'append' != NULL, it's a single character to append to the
* copied out string - usually "\n", for /proc entries and
* (i.e. a terminating zero byte) for sysctl entries
*/
int size = MIN(strlen(kbuffer), ubuffer_size);
if (copy_to_user(ubuffer, kbuffer, size))
return (-EFAULT);
if (append != NULL && size < ubuffer_size) {
if (copy_to_user(ubuffer + size, append, 1))
return (-EFAULT);
size++;
}
return (size);
}
#ifdef DEBUG_KMEM
static int
proc_domemused(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int rc = 0;
unsigned long min = 0, max = ~0, val;
spl_ctl_table dummy = *table;
dummy.data = &val;
dummy.proc_handler = &proc_dointvec;
dummy.extra1 = &min;
dummy.extra2 = &max;
if (write) {
*ppos += *lenp;
} else {
#ifdef HAVE_ATOMIC64_T
val = atomic64_read((atomic64_t *)table->data);
#else
val = atomic_read((atomic_t *)table->data);
#endif /* HAVE_ATOMIC64_T */
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
}
return (rc);
}
#endif /* DEBUG_KMEM */
static int
proc_doslab(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int rc = 0;
unsigned long min = 0, max = ~0, val = 0, mask;
spl_ctl_table dummy = *table;
spl_kmem_cache_t *skc;
dummy.data = &val;
dummy.proc_handler = &proc_dointvec;
dummy.extra1 = &min;
dummy.extra2 = &max;
if (write) {
*ppos += *lenp;
} else {
down_read(&spl_kmem_cache_sem);
mask = (unsigned long)table->data;
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
/* Only use slabs of the correct kmem/vmem type */
if (!(skc->skc_flags & mask))
continue;
/* Sum the specified field for selected slabs */
switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
case KMC_TOTAL:
val += skc->skc_slab_size * skc->skc_slab_total;
break;
case KMC_ALLOC:
val += skc->skc_obj_size * skc->skc_obj_alloc;
break;
case KMC_MAX:
val += skc->skc_obj_size * skc->skc_obj_max;
break;
}
}
up_read(&spl_kmem_cache_sem);
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
}
return (rc);
}
static int
proc_dohostid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int len, rc = 0;
char *end, str[32];
if (write) {
/*
* We can't use proc_doulongvec_minmax() in the write
* case here because hostid while a hex value has no
* leading 0x which confuses the helper function.
*/
rc = proc_copyin_string(str, sizeof (str), buffer, *lenp);
if (rc < 0)
return (rc);
spl_hostid = simple_strtoul(str, &end, 16);
if (str == end)
return (-EINVAL);
} else {
len = snprintf(str, sizeof (str), "%lx",
(unsigned long) zone_get_hostid(NULL));
if (*ppos >= len)
rc = 0;
else
rc = proc_copyout_string(buffer,
*lenp, str + *ppos, "\n");
if (rc >= 0) {
*lenp = rc;
*ppos += rc;
}
}
return (rc);
}
static void
taskq_seq_show_headers(struct seq_file *f)
{
seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
"taskq", "act", "nthr", "spwn", "maxt", "pri",
"mina", "maxa", "cura", "flags");
}
/* indices into the lheads array below */
#define LHEAD_PEND 0
#define LHEAD_PRIO 1
#define LHEAD_DELAY 2
#define LHEAD_WAIT 3
#define LHEAD_ACTIVE 4
#define LHEAD_SIZE 5
/* BEGIN CSTYLED */
static unsigned int spl_max_show_tasks = 512;
module_param(spl_max_show_tasks, uint, 0644);
MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
/* END CSTYLED */
static int
taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
{
taskq_t *tq = p;
taskq_thread_t *tqt;
spl_wait_queue_entry_t *wq;
struct task_struct *tsk;
taskq_ent_t *tqe;
char name[100];
struct list_head *lheads[LHEAD_SIZE], *lh;
static char *list_names[LHEAD_SIZE] =
{"pend", "prio", "delay", "wait", "active" };
int i, j, have_lheads = 0;
unsigned long wflags, flags;
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
/* get the various lists and check whether they're empty */
lheads[LHEAD_PEND] = &tq->tq_pend_list;
lheads[LHEAD_PRIO] = &tq->tq_prio_list;
lheads[LHEAD_DELAY] = &tq->tq_delay_list;
#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
#else
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
#endif
lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
for (i = 0; i < LHEAD_SIZE; ++i) {
if (list_empty(lheads[i]))
lheads[i] = NULL;
else
++have_lheads;
}
/* early return in non-"all" mode if lists are all empty */
if (!allflag && !have_lheads) {
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
spin_unlock_irqrestore(&tq->tq_lock, flags);
return (0);
}
/* unlock the waitq quickly */
if (!lheads[LHEAD_WAIT])
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
/* show the base taskq contents */
snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
seq_printf(f, "%-25s ", name);
seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
tq->tq_nalloc, tq->tq_flags);
/* show the active list */
if (lheads[LHEAD_ACTIVE]) {
j = 0;
list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
if (j == 0)
seq_printf(f, "\t%s:",
list_names[LHEAD_ACTIVE]);
else if (j == 2) {
seq_printf(f, "\n\t ");
j = 0;
}
seq_printf(f, " [%d]%pf(%ps)",
tqt->tqt_thread->pid,
tqt->tqt_task->tqent_func,
tqt->tqt_task->tqent_arg);
++j;
}
seq_printf(f, "\n");
}
for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
if (lheads[i]) {
j = 0;
list_for_each(lh, lheads[i]) {
if (spl_max_show_tasks != 0 &&
j >= spl_max_show_tasks) {
seq_printf(f, "\n\t(truncated)");
break;
}
/* show the wait waitq list */
if (i == LHEAD_WAIT) {
#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
wq = list_entry(lh,
spl_wait_queue_entry_t, entry);
#else
wq = list_entry(lh,
spl_wait_queue_entry_t, task_list);
#endif
if (j == 0)
seq_printf(f, "\t%s:",
list_names[i]);
else if (j % 8 == 0)
seq_printf(f, "\n\t ");
tsk = wq->private;
seq_printf(f, " %d", tsk->pid);
/* pend, prio and delay lists */
} else {
tqe = list_entry(lh, taskq_ent_t,
tqent_list);
if (j == 0)
seq_printf(f, "\t%s:",
list_names[i]);
else if (j % 2 == 0)
seq_printf(f, "\n\t ");
seq_printf(f, " %pf(%ps)",
tqe->tqent_func,
tqe->tqent_arg);
}
++j;
}
seq_printf(f, "\n");
}
if (lheads[LHEAD_WAIT])
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
spin_unlock_irqrestore(&tq->tq_lock, flags);
return (0);
}
static int
taskq_all_seq_show(struct seq_file *f, void *p)
{
return (taskq_seq_show_impl(f, p, B_TRUE));
}
static int
taskq_seq_show(struct seq_file *f, void *p)
{
return (taskq_seq_show_impl(f, p, B_FALSE));
}
static void *
taskq_seq_start(struct seq_file *f, loff_t *pos)
{
struct list_head *p;
loff_t n = *pos;
down_read(&tq_list_sem);
if (!n)
taskq_seq_show_headers(f);
p = tq_list.next;
while (n--) {
p = p->next;
if (p == &tq_list)
return (NULL);
}
return (list_entry(p, taskq_t, tq_taskqs));
}
static void *
taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
taskq_t *tq = p;
++*pos;
return ((tq->tq_taskqs.next == &tq_list) ?
NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
}
static void
slab_seq_show_headers(struct seq_file *f)
{
seq_printf(f,
"--------------------- cache ----------"
"--------------------------------------------- "
"----- slab ------ "
"---- object ----- "
"--- emergency ---\n");
seq_printf(f,
"name "
" flags size alloc slabsize objsize "
"total alloc max "
"total alloc max "
"dlock alloc max\n");
}
static int
slab_seq_show(struct seq_file *f, void *p)
{
spl_kmem_cache_t *skc = p;
ASSERT(skc->skc_magic == SKC_MAGIC);
/*
* Backed by Linux slab see /proc/slabinfo.
*/
if (skc->skc_flags & KMC_SLAB)
return (0);
spin_lock(&skc->skc_lock);
seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
"%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
(long unsigned)skc->skc_flags,
(long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
(long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
(unsigned)skc->skc_slab_size,
(unsigned)skc->skc_obj_size,
(long unsigned)skc->skc_slab_total,
(long unsigned)skc->skc_slab_alloc,
(long unsigned)skc->skc_slab_max,
(long unsigned)skc->skc_obj_total,
(long unsigned)skc->skc_obj_alloc,
(long unsigned)skc->skc_obj_max,
(long unsigned)skc->skc_obj_deadlock,
(long unsigned)skc->skc_obj_emergency,
(long unsigned)skc->skc_obj_emergency_max);
spin_unlock(&skc->skc_lock);
return (0);
}
static void *
slab_seq_start(struct seq_file *f, loff_t *pos)
{
struct list_head *p;
loff_t n = *pos;
down_read(&spl_kmem_cache_sem);
if (!n)
slab_seq_show_headers(f);
p = spl_kmem_cache_list.next;
while (n--) {
p = p->next;
if (p == &spl_kmem_cache_list)
return (NULL);
}
return (list_entry(p, spl_kmem_cache_t, skc_list));
}
static void *
slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
spl_kmem_cache_t *skc = p;
++*pos;
return ((skc->skc_list.next == &spl_kmem_cache_list) ?
NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
}
static void
slab_seq_stop(struct seq_file *f, void *v)
{
up_read(&spl_kmem_cache_sem);
}
static struct seq_operations slab_seq_ops = {
.show = slab_seq_show,
.start = slab_seq_start,
.next = slab_seq_next,
.stop = slab_seq_stop,
};
static int
proc_slab_open(struct inode *inode, struct file *filp)
{
return (seq_open(filp, &slab_seq_ops));
}
static struct file_operations proc_slab_operations = {
.open = proc_slab_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static void
taskq_seq_stop(struct seq_file *f, void *v)
{
up_read(&tq_list_sem);
}
static struct seq_operations taskq_all_seq_ops = {
.show = taskq_all_seq_show,
.start = taskq_seq_start,
.next = taskq_seq_next,
.stop = taskq_seq_stop,
};
static struct seq_operations taskq_seq_ops = {
.show = taskq_seq_show,
.start = taskq_seq_start,
.next = taskq_seq_next,
.stop = taskq_seq_stop,
};
static int
proc_taskq_all_open(struct inode *inode, struct file *filp)
{
return (seq_open(filp, &taskq_all_seq_ops));
}
static int
proc_taskq_open(struct inode *inode, struct file *filp)
{
return (seq_open(filp, &taskq_seq_ops));
}
static struct file_operations proc_taskq_all_operations = {
.open = proc_taskq_all_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static struct file_operations proc_taskq_operations = {
.open = proc_taskq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static struct ctl_table spl_kmem_table[] = {
#ifdef DEBUG_KMEM
{
.procname = "kmem_used",
.data = &kmem_alloc_used,
#ifdef HAVE_ATOMIC64_T
.maxlen = sizeof (atomic64_t),
#else
.maxlen = sizeof (atomic_t),
#endif /* HAVE_ATOMIC64_T */
.mode = 0444,
.proc_handler = &proc_domemused,
},
{
.procname = "kmem_max",
.data = &kmem_alloc_max,
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doulongvec_minmax,
},
#endif /* DEBUG_KMEM */
{
.procname = "slab_kmem_total",
.data = (void *)(KMC_KMEM | KMC_TOTAL),
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_kmem_alloc",
.data = (void *)(KMC_KMEM | KMC_ALLOC),
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_kmem_max",
.data = (void *)(KMC_KMEM | KMC_MAX),
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_vmem_total",
.data = (void *)(KMC_VMEM | KMC_TOTAL),
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_vmem_alloc",
.data = (void *)(KMC_VMEM | KMC_ALLOC),
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_vmem_max",
.data = (void *)(KMC_VMEM | KMC_MAX),
.maxlen = sizeof (unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{},
};
static struct ctl_table spl_kstat_table[] = {
{},
};
static struct ctl_table spl_table[] = {
/*
* NB No .strategy entries have been provided since
* sysctl(8) prefers to go via /proc for portability.
*/
{
.procname = "version",
.data = spl_version,
.maxlen = sizeof (spl_version),
.mode = 0444,
.proc_handler = &proc_dostring,
},
{
.procname = "hostid",
.data = &spl_hostid,
.maxlen = sizeof (unsigned long),
.mode = 0644,
.proc_handler = &proc_dohostid,
},
{
.procname = "kmem",
.mode = 0555,
.child = spl_kmem_table,
},
{
.procname = "kstat",
.mode = 0555,
.child = spl_kstat_table,
},
{},
};
static struct ctl_table spl_dir[] = {
{
.procname = "spl",
.mode = 0555,
.child = spl_table,
},
{}
};
static struct ctl_table spl_root[] = {
{
#ifdef HAVE_CTL_NAME
.ctl_name = CTL_KERN,
#endif
.procname = "kernel",
.mode = 0555,
.child = spl_dir,
},
{}
};
int
spl_proc_init(void)
{
int rc = 0;
spl_header = register_sysctl_table(spl_root);
if (spl_header == NULL)
return (-EUNATCH);
proc_spl = proc_mkdir("spl", NULL);
if (proc_spl == NULL) {
rc = -EUNATCH;
goto out;
}
proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
&proc_taskq_all_operations, NULL);
if (proc_spl_taskq_all == NULL) {
rc = -EUNATCH;
goto out;
}
proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
&proc_taskq_operations, NULL);
if (proc_spl_taskq == NULL) {
rc = -EUNATCH;
goto out;
}
proc_spl_kmem = proc_mkdir("kmem", proc_spl);
if (proc_spl_kmem == NULL) {
rc = -EUNATCH;
goto out;
}
proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
&proc_slab_operations, NULL);
if (proc_spl_kmem_slab == NULL) {
rc = -EUNATCH;
goto out;
}
proc_spl_kstat = proc_mkdir("kstat", proc_spl);
if (proc_spl_kstat == NULL) {
rc = -EUNATCH;
goto out;
}
out:
if (rc) {
remove_proc_entry("kstat", proc_spl);
remove_proc_entry("slab", proc_spl_kmem);
remove_proc_entry("kmem", proc_spl);
remove_proc_entry("taskq-all", proc_spl);
remove_proc_entry("taskq", proc_spl);
remove_proc_entry("spl", NULL);
unregister_sysctl_table(spl_header);
}
return (rc);
}
void
spl_proc_fini(void)
{
remove_proc_entry("kstat", proc_spl);
remove_proc_entry("slab", proc_spl_kmem);
remove_proc_entry("kmem", proc_spl);
remove_proc_entry("taskq-all", proc_spl);
remove_proc_entry("taskq", proc_spl);
remove_proc_entry("spl", NULL);
ASSERT(spl_header != NULL);
unregister_sysctl_table(spl_header);
}

114
module/spl/spl-rwlock.c Normal file
View File

@ -0,0 +1,114 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Reader/Writer Lock Implementation.
*/
#include <sys/rwlock.h>
#if defined(CONFIG_PREEMPT_RT_FULL)
#include <linux/rtmutex.h>
#define RT_MUTEX_OWNER_MASKALL 1UL
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
ASSERT((struct task_struct *)
((unsigned long)rwsem->lock.owner & ~RT_MUTEX_OWNER_MASKALL) ==
current);
/*
* Under the realtime patch series, rwsem is implemented as a
* single mutex held by readers and writers alike. However,
* this implementation would prevent a thread from taking a
* read lock twice, as the mutex would already be locked on
* the second attempt. Therefore the implementation allows a
* single thread to take a rwsem as read lock multiple times
* tracking that nesting as read_depth counter.
*/
if (rwsem->read_depth <= 1) {
/*
* In case, the current thread has not taken the lock
* more than once as read lock, we can allow an
* upgrade to a write lock. rwsem_rt.h implements
* write locks as read_depth == 0.
*/
rwsem->read_depth = 0;
return (1);
}
return (0);
}
#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
int ret = 0;
unsigned long flags;
spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags);
if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE &&
list_empty(&rwsem->wait_list)) {
ret = 1;
RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE;
}
spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
return (ret);
}
#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT)
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
long val;
val = atomic_long_cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
SPL_RWSEM_SINGLE_WRITER_VALUE);
return (val == SPL_RWSEM_SINGLE_READER_VALUE);
}
#else
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
typeof(rwsem->count) val;
val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
SPL_RWSEM_SINGLE_WRITER_VALUE);
return (val == SPL_RWSEM_SINGLE_READER_VALUE);
}
#endif
int
rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
if (__rwsem_tryupgrade(rwsem)) {
rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
rwsem->owner = current;
#endif
return (1);
}
return (0);
}
EXPORT_SYMBOL(rwsem_tryupgrade);
int spl_rw_init(void) { return 0; }
void spl_rw_fini(void) { }

1305
module/spl/spl-taskq.c Normal file

File diff suppressed because it is too large Load Diff

160
module/spl/spl-thread.c Normal file
View File

@ -0,0 +1,160 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Thread Implementation.
*/
#include <sys/thread.h>
#include <sys/kmem.h>
#include <sys/tsd.h>
/*
* Thread interfaces
*/
typedef struct thread_priv_s {
unsigned long tp_magic; /* Magic */
int tp_name_size; /* Name size */
char *tp_name; /* Name (without _thread suffix) */
void (*tp_func)(void *); /* Registered function */
void *tp_args; /* Args to be passed to function */
size_t tp_len; /* Len to be passed to function */
int tp_state; /* State to start thread at */
pri_t tp_pri; /* Priority to start threat at */
} thread_priv_t;
static int
thread_generic_wrapper(void *arg)
{
thread_priv_t *tp = (thread_priv_t *)arg;
void (*func)(void *);
void *args;
ASSERT(tp->tp_magic == TP_MAGIC);
func = tp->tp_func;
args = tp->tp_args;
set_current_state(tp->tp_state);
set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
kmem_free(tp->tp_name, tp->tp_name_size);
kmem_free(tp, sizeof (thread_priv_t));
if (func)
func(args);
return (0);
}
void
__thread_exit(void)
{
tsd_exit();
complete_and_exit(NULL, 0);
/* Unreachable */
}
EXPORT_SYMBOL(__thread_exit);
/*
* thread_create() may block forever if it cannot create a thread or
* allocate memory. This is preferable to returning a NULL which Solaris
* style callers likely never check for... since it can't fail.
*/
kthread_t *
__thread_create(caddr_t stk, size_t stksize, thread_func_t func,
const char *name, void *args, size_t len, proc_t *pp, int state, pri_t pri)
{
thread_priv_t *tp;
struct task_struct *tsk;
char *p;
/* Option pp is simply ignored */
/* Variable stack size unsupported */
ASSERT(stk == NULL);
tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE);
if (tp == NULL)
return (NULL);
tp->tp_magic = TP_MAGIC;
tp->tp_name_size = strlen(name) + 1;
tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
if (tp->tp_name == NULL) {
kmem_free(tp, sizeof (thread_priv_t));
return (NULL);
}
strncpy(tp->tp_name, name, tp->tp_name_size);
/*
* Strip trailing "_thread" from passed name which will be the func
* name since the exposed API has no parameter for passing a name.
*/
p = strstr(tp->tp_name, "_thread");
if (p)
p[0] = '\0';
tp->tp_func = func;
tp->tp_args = args;
tp->tp_len = len;
tp->tp_state = state;
tp->tp_pri = pri;
tsk = spl_kthread_create(thread_generic_wrapper, (void *)tp,
"%s", tp->tp_name);
if (IS_ERR(tsk))
return (NULL);
wake_up_process(tsk);
return ((kthread_t *)tsk);
}
EXPORT_SYMBOL(__thread_create);
/*
* spl_kthread_create - Wrapper providing pre-3.13 semantics for
* kthread_create() in which it is not killable and less likely
* to return -ENOMEM.
*/
struct task_struct *
spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...)
{
struct task_struct *tsk;
va_list args;
char name[TASK_COMM_LEN];
va_start(args, namefmt);
vsnprintf(name, sizeof (name), namefmt, args);
va_end(args);
do {
tsk = kthread_create(func, data, "%s", name);
if (IS_ERR(tsk)) {
if (signal_pending(current)) {
clear_thread_flag(TIF_SIGPENDING);
continue;
}
if (PTR_ERR(tsk) == -ENOMEM)
continue;
return (NULL);
} else
return (tsk);
} while (1);
}
EXPORT_SYMBOL(spl_kthread_create);

720
module/spl/spl-tsd.c Normal file
View File

@ -0,0 +1,720 @@
/*
* Copyright (C) 2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
*
* Solaris Porting Layer (SPL) Thread Specific Data Implementation.
*
* Thread specific data has implemented using a hash table, this avoids
* the need to add a member to the task structure and allows maximum
* portability between kernels. This implementation has been optimized
* to keep the tsd_set() and tsd_get() times as small as possible.
*
* The majority of the entries in the hash table are for specific tsd
* entries. These entries are hashed by the product of their key and
* pid because by design the key and pid are guaranteed to be unique.
* Their product also has the desirable properly that it will be uniformly
* distributed over the hash bins providing neither the pid nor key is zero.
* Under linux the zero pid is always the init process and thus won't be
* used, and this implementation is careful to never to assign a zero key.
* By default the hash table is sized to 512 bins which is expected to
* be sufficient for light to moderate usage of thread specific data.
*
* The hash table contains two additional type of entries. They first
* type is entry is called a 'key' entry and it is added to the hash during
* tsd_create(). It is used to store the address of the destructor function
* and it is used as an anchor point. All tsd entries which use the same
* key will be linked to this entry. This is used during tsd_destory() to
* quickly call the destructor function for all tsd associated with the key.
* The 'key' entry may be looked up with tsd_hash_search() by passing the
* key you wish to lookup and DTOR_PID constant as the pid.
*
* The second type of entry is called a 'pid' entry and it is added to the
* hash the first time a process set a key. The 'pid' entry is also used
* as an anchor and all tsd for the process will be linked to it. This
* list is using during tsd_exit() to ensure all registered destructors
* are run for the process. The 'pid' entry may be looked up with
* tsd_hash_search() by passing the PID_KEY constant as the key, and
* the process pid. Note that tsd_exit() is called by thread_exit()
* so if your using the Solaris thread API you should not need to call
* tsd_exit() directly.
*
*/
#include <sys/kmem.h>
#include <sys/thread.h>
#include <sys/tsd.h>
#include <linux/hash.h>
typedef struct tsd_hash_bin {
spinlock_t hb_lock;
struct hlist_head hb_head;
} tsd_hash_bin_t;
typedef struct tsd_hash_table {
spinlock_t ht_lock;
uint_t ht_bits;
uint_t ht_key;
tsd_hash_bin_t *ht_bins;
} tsd_hash_table_t;
typedef struct tsd_hash_entry {
uint_t he_key;
pid_t he_pid;
dtor_func_t he_dtor;
void *he_value;
struct hlist_node he_list;
struct list_head he_key_list;
struct list_head he_pid_list;
} tsd_hash_entry_t;
static tsd_hash_table_t *tsd_hash_table = NULL;
/*
* tsd_hash_search - searches hash table for tsd_hash_entry
* @table: hash table
* @key: search key
* @pid: search pid
*/
static tsd_hash_entry_t *
tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
{
struct hlist_node *node;
tsd_hash_entry_t *entry;
tsd_hash_bin_t *bin;
ulong_t hash;
hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
hlist_for_each(node, &bin->hb_head) {
entry = list_entry(node, tsd_hash_entry_t, he_list);
if ((entry->he_key == key) && (entry->he_pid == pid)) {
spin_unlock(&bin->hb_lock);
return (entry);
}
}
spin_unlock(&bin->hb_lock);
return (NULL);
}
/*
* tsd_hash_dtor - call the destructor and free all entries on the list
* @work: list of hash entries
*
* For a list of entries which have all already been removed from the
* hash call their registered destructor then free the associated memory.
*/
static void
tsd_hash_dtor(struct hlist_head *work)
{
tsd_hash_entry_t *entry;
while (!hlist_empty(work)) {
entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
hlist_del(&entry->he_list);
if (entry->he_dtor && entry->he_pid != DTOR_PID)
entry->he_dtor(entry->he_value);
kmem_free(entry, sizeof (tsd_hash_entry_t));
}
}
/*
* tsd_hash_add - adds an entry to hash table
* @table: hash table
* @key: search key
* @pid: search pid
*
* The caller is responsible for ensuring the unique key/pid do not
* already exist in the hash table. This possible because all entries
* are thread specific thus a concurrent thread will never attempt to
* add this key/pid. Because multiple bins must be checked to add
* links to the dtor and pid entries the entire table is locked.
*/
static int
tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
{
tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
tsd_hash_bin_t *bin;
ulong_t hash;
int rc = 0;
ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
/* New entry allocate structure, set value, and add to hash */
entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
if (entry == NULL)
return (ENOMEM);
entry->he_key = key;
entry->he_pid = pid;
entry->he_value = value;
INIT_HLIST_NODE(&entry->he_list);
INIT_LIST_HEAD(&entry->he_key_list);
INIT_LIST_HEAD(&entry->he_pid_list);
spin_lock(&table->ht_lock);
/* Destructor entry must exist for all valid keys */
dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
ASSERT3P(dtor_entry, !=, NULL);
entry->he_dtor = dtor_entry->he_dtor;
/* Process entry must exist for all valid processes */
pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
ASSERT3P(pid_entry, !=, NULL);
hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
/* Add to the hash, key, and pid lists */
hlist_add_head(&entry->he_list, &bin->hb_head);
list_add(&entry->he_key_list, &dtor_entry->he_key_list);
list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
spin_unlock(&bin->hb_lock);
spin_unlock(&table->ht_lock);
return (rc);
}
/*
* tsd_hash_add_key - adds a destructor entry to the hash table
* @table: hash table
* @keyp: search key
* @dtor: key destructor
*
* For every unique key there is a single entry in the hash which is used
* as anchor. All other thread specific entries for this key are linked
* to this anchor via the 'he_key_list' list head. On return they keyp
* will be set to the next available key for the hash table.
*/
static int
tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
{
tsd_hash_entry_t *tmp_entry, *entry;
tsd_hash_bin_t *bin;
ulong_t hash;
int keys_checked = 0;
ASSERT3P(table, !=, NULL);
/* Allocate entry to be used as a destructor for this key */
entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
if (entry == NULL)
return (ENOMEM);
/* Determine next available key value */
spin_lock(&table->ht_lock);
do {
/* Limited to TSD_KEYS_MAX concurrent unique keys */
if (table->ht_key++ > TSD_KEYS_MAX)
table->ht_key = 1;
/* Ensure failure when all TSD_KEYS_MAX keys are in use */
if (keys_checked++ >= TSD_KEYS_MAX) {
spin_unlock(&table->ht_lock);
return (ENOENT);
}
tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
} while (tmp_entry);
/* Add destructor entry in to hash table */
entry->he_key = *keyp = table->ht_key;
entry->he_pid = DTOR_PID;
entry->he_dtor = dtor;
entry->he_value = NULL;
INIT_HLIST_NODE(&entry->he_list);
INIT_LIST_HEAD(&entry->he_key_list);
INIT_LIST_HEAD(&entry->he_pid_list);
hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
hlist_add_head(&entry->he_list, &bin->hb_head);
spin_unlock(&bin->hb_lock);
spin_unlock(&table->ht_lock);
return (0);
}
/*
* tsd_hash_add_pid - adds a process entry to the hash table
* @table: hash table
* @pid: search pid
*
* For every process these is a single entry in the hash which is used
* as anchor. All other thread specific entries for this process are
* linked to this anchor via the 'he_pid_list' list head.
*/
static int
tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
{
tsd_hash_entry_t *entry;
tsd_hash_bin_t *bin;
ulong_t hash;
/* Allocate entry to be used as the process reference */
entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
if (entry == NULL)
return (ENOMEM);
spin_lock(&table->ht_lock);
entry->he_key = PID_KEY;
entry->he_pid = pid;
entry->he_dtor = NULL;
entry->he_value = NULL;
INIT_HLIST_NODE(&entry->he_list);
INIT_LIST_HEAD(&entry->he_key_list);
INIT_LIST_HEAD(&entry->he_pid_list);
hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
hlist_add_head(&entry->he_list, &bin->hb_head);
spin_unlock(&bin->hb_lock);
spin_unlock(&table->ht_lock);
return (0);
}
/*
* tsd_hash_del - delete an entry from hash table, key, and pid lists
* @table: hash table
* @key: search key
* @pid: search pid
*/
static void
tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
{
hlist_del(&entry->he_list);
list_del_init(&entry->he_key_list);
list_del_init(&entry->he_pid_list);
}
/*
* tsd_hash_table_init - allocate a hash table
* @bits: hash table size
*
* A hash table with 2^bits bins will be created, it may not be resized
* after the fact and must be free'd with tsd_hash_table_fini().
*/
static tsd_hash_table_t *
tsd_hash_table_init(uint_t bits)
{
tsd_hash_table_t *table;
int hash, size = (1 << bits);
table = kmem_zalloc(sizeof (tsd_hash_table_t), KM_SLEEP);
if (table == NULL)
return (NULL);
table->ht_bins = kmem_zalloc(sizeof (tsd_hash_bin_t) * size, KM_SLEEP);
if (table->ht_bins == NULL) {
kmem_free(table, sizeof (tsd_hash_table_t));
return (NULL);
}
for (hash = 0; hash < size; hash++) {
spin_lock_init(&table->ht_bins[hash].hb_lock);
INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
}
spin_lock_init(&table->ht_lock);
table->ht_bits = bits;
table->ht_key = 1;
return (table);
}
/*
* tsd_hash_table_fini - free a hash table
* @table: hash table
*
* Free a hash table allocated by tsd_hash_table_init(). If the hash
* table is not empty this function will call the proper destructor for
* all remaining entries before freeing the memory used by those entries.
*/
static void
tsd_hash_table_fini(tsd_hash_table_t *table)
{
HLIST_HEAD(work);
tsd_hash_bin_t *bin;
tsd_hash_entry_t *entry;
int size, i;
ASSERT3P(table, !=, NULL);
spin_lock(&table->ht_lock);
for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
bin = &table->ht_bins[i];
spin_lock(&bin->hb_lock);
while (!hlist_empty(&bin->hb_head)) {
entry = hlist_entry(bin->hb_head.first,
tsd_hash_entry_t, he_list);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
}
spin_unlock(&bin->hb_lock);
}
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
kmem_free(table->ht_bins, sizeof (tsd_hash_bin_t)*(1<<table->ht_bits));
kmem_free(table, sizeof (tsd_hash_table_t));
}
/*
* tsd_remove_entry - remove a tsd entry for this thread
* @entry: entry to remove
*
* Remove the thread specific data @entry for this thread.
* If this is the last entry for this thread, also remove the PID entry.
*/
static void
tsd_remove_entry(tsd_hash_entry_t *entry)
{
HLIST_HEAD(work);
tsd_hash_table_t *table;
tsd_hash_entry_t *pid_entry;
tsd_hash_bin_t *pid_entry_bin, *entry_bin;
ulong_t hash;
table = tsd_hash_table;
ASSERT3P(table, !=, NULL);
ASSERT3P(entry, !=, NULL);
spin_lock(&table->ht_lock);
hash = hash_long((ulong_t)entry->he_key *
(ulong_t)entry->he_pid, table->ht_bits);
entry_bin = &table->ht_bins[hash];
/* save the possible pid_entry */
pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
he_pid_list);
/* remove entry */
spin_lock(&entry_bin->hb_lock);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
spin_unlock(&entry_bin->hb_lock);
/* if pid_entry is indeed pid_entry, then remove it if it's empty */
if (pid_entry->he_key == PID_KEY &&
list_empty(&pid_entry->he_pid_list)) {
hash = hash_long((ulong_t)pid_entry->he_key *
(ulong_t)pid_entry->he_pid, table->ht_bits);
pid_entry_bin = &table->ht_bins[hash];
spin_lock(&pid_entry_bin->hb_lock);
tsd_hash_del(table, pid_entry);
hlist_add_head(&pid_entry->he_list, &work);
spin_unlock(&pid_entry_bin->hb_lock);
}
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
}
/*
* tsd_set - set thread specific data
* @key: lookup key
* @value: value to set
*
* Caller must prevent racing tsd_create() or tsd_destroy(), protected
* from racing tsd_get() or tsd_set() because it is thread specific.
* This function has been optimized to be fast for the update case.
* When setting the tsd initially it will be slower due to additional
* required locking and potential memory allocations.
*/
int
tsd_set(uint_t key, void *value)
{
tsd_hash_table_t *table;
tsd_hash_entry_t *entry;
pid_t pid;
int rc;
/* mark remove if value is NULL */
boolean_t remove = (value == NULL);
table = tsd_hash_table;
pid = curthread->pid;
ASSERT3P(table, !=, NULL);
if ((key == 0) || (key > TSD_KEYS_MAX))
return (EINVAL);
/* Entry already exists in hash table update value */
entry = tsd_hash_search(table, key, pid);
if (entry) {
entry->he_value = value;
/* remove the entry */
if (remove)
tsd_remove_entry(entry);
return (0);
}
/* don't create entry if value is NULL */
if (remove)
return (0);
/* Add a process entry to the hash if not yet exists */
entry = tsd_hash_search(table, PID_KEY, pid);
if (entry == NULL) {
rc = tsd_hash_add_pid(table, pid);
if (rc)
return (rc);
}
rc = tsd_hash_add(table, key, pid, value);
return (rc);
}
EXPORT_SYMBOL(tsd_set);
/*
* tsd_get - get thread specific data
* @key: lookup key
*
* Caller must prevent racing tsd_create() or tsd_destroy(). This
* implementation is designed to be fast and scalable, it does not
* lock the entire table only a single hash bin.
*/
void *
tsd_get(uint_t key)
{
tsd_hash_entry_t *entry;
ASSERT3P(tsd_hash_table, !=, NULL);
if ((key == 0) || (key > TSD_KEYS_MAX))
return (NULL);
entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
if (entry == NULL)
return (NULL);
return (entry->he_value);
}
EXPORT_SYMBOL(tsd_get);
/*
* tsd_get_by_thread - get thread specific data for specified thread
* @key: lookup key
* @thread: thread to lookup
*
* Caller must prevent racing tsd_create() or tsd_destroy(). This
* implementation is designed to be fast and scalable, it does not
* lock the entire table only a single hash bin.
*/
void *
tsd_get_by_thread(uint_t key, kthread_t *thread)
{
tsd_hash_entry_t *entry;
ASSERT3P(tsd_hash_table, !=, NULL);
if ((key == 0) || (key > TSD_KEYS_MAX))
return (NULL);
entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
if (entry == NULL)
return (NULL);
return (entry->he_value);
}
EXPORT_SYMBOL(tsd_get_by_thread);
/*
* tsd_create - create thread specific data key
* @keyp: lookup key address
* @dtor: destructor called during tsd_destroy() or tsd_exit()
*
* Provided key must be set to 0 or it assumed to be already in use.
* The dtor is allowed to be NULL in which case no additional cleanup
* for the data is performed during tsd_destroy() or tsd_exit().
*
* Caller must prevent racing tsd_set() or tsd_get(), this function is
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
*/
void
tsd_create(uint_t *keyp, dtor_func_t dtor)
{
ASSERT3P(keyp, !=, NULL);
if (*keyp)
return;
(void) tsd_hash_add_key(tsd_hash_table, keyp, dtor);
}
EXPORT_SYMBOL(tsd_create);
/*
* tsd_destroy - destroy thread specific data
* @keyp: lookup key address
*
* Destroys the thread specific data on all threads which use this key.
*
* Caller must prevent racing tsd_set() or tsd_get(), this function is
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
*/
void
tsd_destroy(uint_t *keyp)
{
HLIST_HEAD(work);
tsd_hash_table_t *table;
tsd_hash_entry_t *dtor_entry, *entry;
tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
ulong_t hash;
table = tsd_hash_table;
ASSERT3P(table, !=, NULL);
spin_lock(&table->ht_lock);
dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
if (dtor_entry == NULL) {
spin_unlock(&table->ht_lock);
return;
}
/*
* All threads which use this key must be linked off of the
* DTOR_PID entry. They are removed from the hash table and
* linked in to a private working list to be destroyed.
*/
while (!list_empty(&dtor_entry->he_key_list)) {
entry = list_entry(dtor_entry->he_key_list.next,
tsd_hash_entry_t, he_key_list);
ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
hash = hash_long((ulong_t)entry->he_key *
(ulong_t)entry->he_pid, table->ht_bits);
entry_bin = &table->ht_bins[hash];
spin_lock(&entry_bin->hb_lock);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
spin_unlock(&entry_bin->hb_lock);
}
hash = hash_long((ulong_t)dtor_entry->he_key *
(ulong_t)dtor_entry->he_pid, table->ht_bits);
dtor_entry_bin = &table->ht_bins[hash];
spin_lock(&dtor_entry_bin->hb_lock);
tsd_hash_del(table, dtor_entry);
hlist_add_head(&dtor_entry->he_list, &work);
spin_unlock(&dtor_entry_bin->hb_lock);
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
*keyp = 0;
}
EXPORT_SYMBOL(tsd_destroy);
/*
* tsd_exit - destroys all thread specific data for this thread
*
* Destroys all the thread specific data for this thread.
*
* Caller must prevent racing tsd_set() or tsd_get(), this function is
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
*/
void
tsd_exit(void)
{
HLIST_HEAD(work);
tsd_hash_table_t *table;
tsd_hash_entry_t *pid_entry, *entry;
tsd_hash_bin_t *pid_entry_bin, *entry_bin;
ulong_t hash;
table = tsd_hash_table;
ASSERT3P(table, !=, NULL);
spin_lock(&table->ht_lock);
pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
if (pid_entry == NULL) {
spin_unlock(&table->ht_lock);
return;
}
/*
* All keys associated with this pid must be linked off of the
* PID_KEY entry. They are removed from the hash table and
* linked in to a private working list to be destroyed.
*/
while (!list_empty(&pid_entry->he_pid_list)) {
entry = list_entry(pid_entry->he_pid_list.next,
tsd_hash_entry_t, he_pid_list);
ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
hash = hash_long((ulong_t)entry->he_key *
(ulong_t)entry->he_pid, table->ht_bits);
entry_bin = &table->ht_bins[hash];
spin_lock(&entry_bin->hb_lock);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
spin_unlock(&entry_bin->hb_lock);
}
hash = hash_long((ulong_t)pid_entry->he_key *
(ulong_t)pid_entry->he_pid, table->ht_bits);
pid_entry_bin = &table->ht_bins[hash];
spin_lock(&pid_entry_bin->hb_lock);
tsd_hash_del(table, pid_entry);
hlist_add_head(&pid_entry->he_list, &work);
spin_unlock(&pid_entry_bin->hb_lock);
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
}
EXPORT_SYMBOL(tsd_exit);
int
spl_tsd_init(void)
{
tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
if (tsd_hash_table == NULL)
return (1);
return (0);
}
void
spl_tsd_fini(void)
{
tsd_hash_table_fini(tsd_hash_table);
tsd_hash_table = NULL;
}

135
module/spl/spl-vmem.c Normal file
View File

@ -0,0 +1,135 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
#include <sys/debug.h>
#include <sys/vmem.h>
#include <sys/kmem_cache.h>
#include <sys/shrinker.h>
#include <linux/module.h>
vmem_t *heap_arena = NULL;
EXPORT_SYMBOL(heap_arena);
vmem_t *zio_alloc_arena = NULL;
EXPORT_SYMBOL(zio_alloc_arena);
vmem_t *zio_arena = NULL;
EXPORT_SYMBOL(zio_arena);
#define VMEM_FLOOR_SIZE (4 * 1024 * 1024) /* 4MB floor */
/*
* Return approximate virtual memory usage based on these assumptions:
*
* 1) The major SPL consumer of virtual memory is the kmem cache.
* 2) Memory allocated with vmem_alloc() is short lived and can be ignored.
* 3) Allow a 4MB floor as a generous pad given normal consumption.
* 4) The spl_kmem_cache_sem only contends with cache create/destroy.
*/
size_t
vmem_size(vmem_t *vmp, int typemask)
{
spl_kmem_cache_t *skc;
size_t alloc = VMEM_FLOOR_SIZE;
if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE))
return (VMALLOC_TOTAL);
down_read(&spl_kmem_cache_sem);
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
if (skc->skc_flags & KMC_VMEM)
alloc += skc->skc_slab_size * skc->skc_slab_total;
}
up_read(&spl_kmem_cache_sem);
if (typemask & VMEM_ALLOC)
return (MIN(alloc, VMALLOC_TOTAL));
else if (typemask & VMEM_FREE)
return (MAX(VMALLOC_TOTAL - alloc, 0));
else
return (0);
}
EXPORT_SYMBOL(vmem_size);
/*
* Public vmem_alloc(), vmem_zalloc() and vmem_free() interfaces.
*/
void *
spl_vmem_alloc(size_t size, int flags, const char *func, int line)
{
ASSERT0(flags & ~KM_PUBLIC_MASK);
flags |= KM_VMEM;
#if !defined(DEBUG_KMEM)
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
#elif !defined(DEBUG_KMEM_TRACKING)
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
#else
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
#endif
}
EXPORT_SYMBOL(spl_vmem_alloc);
void *
spl_vmem_zalloc(size_t size, int flags, const char *func, int line)
{
ASSERT0(flags & ~KM_PUBLIC_MASK);
flags |= (KM_VMEM | KM_ZERO);
#if !defined(DEBUG_KMEM)
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
#elif !defined(DEBUG_KMEM_TRACKING)
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
#else
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
#endif
}
EXPORT_SYMBOL(spl_vmem_zalloc);
void
spl_vmem_free(const void *buf, size_t size)
{
#if !defined(DEBUG_KMEM)
return (spl_kmem_free_impl(buf, size));
#elif !defined(DEBUG_KMEM_TRACKING)
return (spl_kmem_free_debug(buf, size));
#else
return (spl_kmem_free_track(buf, size));
#endif
}
EXPORT_SYMBOL(spl_vmem_free);
int
spl_vmem_init(void)
{
return (0);
}
void
spl_vmem_fini(void)
{
}

779
module/spl/spl-vnode.c Normal file
View File

@ -0,0 +1,779 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Vnode Implementation.
*/
#include <sys/cred.h>
#include <sys/vnode.h>
#include <sys/kmem_cache.h>
#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#ifdef HAVE_FDTABLE_HEADER
#include <linux/fdtable.h>
#endif
vnode_t *rootdir = (vnode_t *)0xabcd1234;
EXPORT_SYMBOL(rootdir);
static spl_kmem_cache_t *vn_cache;
static spl_kmem_cache_t *vn_file_cache;
static DEFINE_SPINLOCK(vn_file_lock);
static LIST_HEAD(vn_file_list);
static int
spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len)
{
int error = -EOPNOTSUPP;
#ifdef HAVE_FILE_FALLOCATE
if (fp->f_op->fallocate)
error = fp->f_op->fallocate(fp, mode, offset, len);
#else
#ifdef HAVE_INODE_FALLOCATE
if (fp->f_dentry && fp->f_dentry->d_inode &&
fp->f_dentry->d_inode->i_op->fallocate)
error = fp->f_dentry->d_inode->i_op->fallocate(
fp->f_dentry->d_inode, mode, offset, len);
#endif /* HAVE_INODE_FALLOCATE */
#endif /* HAVE_FILE_FALLOCATE */
return (error);
}
static int
spl_filp_fsync(struct file *fp, int sync)
{
#ifdef HAVE_2ARGS_VFS_FSYNC
return (vfs_fsync(fp, sync));
#else
return (vfs_fsync(fp, (fp)->f_dentry, sync));
#endif /* HAVE_2ARGS_VFS_FSYNC */
}
static ssize_t
spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
{
#if defined(HAVE_KERNEL_WRITE_PPOS)
return (kernel_write(file, buf, count, pos));
#else
mm_segment_t saved_fs;
ssize_t ret;
saved_fs = get_fs();
set_fs(get_ds());
ret = vfs_write(file, (__force const char __user *)buf, count, pos);
set_fs(saved_fs);
return (ret);
#endif
}
static ssize_t
spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
#if defined(HAVE_KERNEL_READ_PPOS)
return (kernel_read(file, buf, count, pos));
#else
mm_segment_t saved_fs;
ssize_t ret;
saved_fs = get_fs();
set_fs(get_ds());
ret = vfs_read(file, (void __user *)buf, count, pos);
set_fs(saved_fs);
return (ret);
#endif
}
vtype_t
vn_mode_to_vtype(mode_t mode)
{
if (S_ISREG(mode))
return (VREG);
if (S_ISDIR(mode))
return (VDIR);
if (S_ISCHR(mode))
return (VCHR);
if (S_ISBLK(mode))
return (VBLK);
if (S_ISFIFO(mode))
return (VFIFO);
if (S_ISLNK(mode))
return (VLNK);
if (S_ISSOCK(mode))
return (VSOCK);
return (VNON);
} /* vn_mode_to_vtype() */
EXPORT_SYMBOL(vn_mode_to_vtype);
mode_t
vn_vtype_to_mode(vtype_t vtype)
{
if (vtype == VREG)
return (S_IFREG);
if (vtype == VDIR)
return (S_IFDIR);
if (vtype == VCHR)
return (S_IFCHR);
if (vtype == VBLK)
return (S_IFBLK);
if (vtype == VFIFO)
return (S_IFIFO);
if (vtype == VLNK)
return (S_IFLNK);
if (vtype == VSOCK)
return (S_IFSOCK);
return (VNON);
} /* vn_vtype_to_mode() */
EXPORT_SYMBOL(vn_vtype_to_mode);
vnode_t *
vn_alloc(int flag)
{
vnode_t *vp;
vp = kmem_cache_alloc(vn_cache, flag);
if (vp != NULL) {
vp->v_file = NULL;
vp->v_type = 0;
}
return (vp);
} /* vn_alloc() */
EXPORT_SYMBOL(vn_alloc);
void
vn_free(vnode_t *vp)
{
kmem_cache_free(vn_cache, vp);
} /* vn_free() */
EXPORT_SYMBOL(vn_free);
int
vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp,
int x1, void *x2)
{
struct file *fp;
struct kstat stat;
int rc, saved_umask = 0;
gfp_t saved_gfp;
vnode_t *vp;
ASSERT(flags & (FWRITE | FREAD));
ASSERT(seg == UIO_SYSSPACE);
ASSERT(vpp);
*vpp = NULL;
if (!(flags & FCREAT) && (flags & FWRITE))
flags |= FEXCL;
/*
* Note for filp_open() the two low bits must be remapped to mean:
* 01 - read-only -> 00 read-only
* 10 - write-only -> 01 write-only
* 11 - read-write -> 10 read-write
*/
flags--;
if (flags & FCREAT)
saved_umask = xchg(&current->fs->umask, 0);
fp = filp_open(path, flags, mode);
if (flags & FCREAT)
(void) xchg(&current->fs->umask, saved_umask);
if (IS_ERR(fp))
return (-PTR_ERR(fp));
#if defined(HAVE_4ARGS_VFS_GETATTR)
rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
#elif defined(HAVE_2ARGS_VFS_GETATTR)
rc = vfs_getattr(&fp->f_path, &stat);
#else
rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
#endif
if (rc) {
filp_close(fp, 0);
return (-rc);
}
vp = vn_alloc(KM_SLEEP);
if (!vp) {
filp_close(fp, 0);
return (ENOMEM);
}
saved_gfp = mapping_gfp_mask(fp->f_mapping);
mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
mutex_enter(&vp->v_lock);
vp->v_type = vn_mode_to_vtype(stat.mode);
vp->v_file = fp;
vp->v_gfp_mask = saved_gfp;
*vpp = vp;
mutex_exit(&vp->v_lock);
return (0);
} /* vn_open() */
EXPORT_SYMBOL(vn_open);
int
vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
{
char *realpath;
int len, rc;
ASSERT(vp == rootdir);
len = strlen(path) + 2;
realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
if (!realpath)
return (ENOMEM);
(void) snprintf(realpath, len, "/%s", path);
rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
kfree(realpath);
return (rc);
} /* vn_openat() */
EXPORT_SYMBOL(vn_openat);
int
vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
{
struct file *fp = vp->v_file;
loff_t offset = off;
int rc;
ASSERT(uio == UIO_WRITE || uio == UIO_READ);
ASSERT(seg == UIO_SYSSPACE);
ASSERT((ioflag & ~FAPPEND) == 0);
if (ioflag & FAPPEND)
offset = fp->f_pos;
if (uio & UIO_WRITE)
rc = spl_kernel_write(fp, addr, len, &offset);
else
rc = spl_kernel_read(fp, addr, len, &offset);
fp->f_pos = offset;
if (rc < 0)
return (-rc);
if (residp) {
*residp = len - rc;
} else {
if (rc != len)
return (EIO);
}
return (0);
} /* vn_rdwr() */
EXPORT_SYMBOL(vn_rdwr);
int
vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
{
int rc;
ASSERT(vp);
ASSERT(vp->v_file);
mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
rc = filp_close(vp->v_file, 0);
vn_free(vp);
return (-rc);
} /* vn_close() */
EXPORT_SYMBOL(vn_close);
/*
* vn_seek() does not actually seek it only performs bounds checking on the
* proposed seek. We perform minimal checking and allow vn_rdwr() to catch
* anything more serious.
*/
int
vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
{
return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
}
EXPORT_SYMBOL(vn_seek);
int
vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
{
struct file *fp;
struct kstat stat;
int rc;
ASSERT(vp);
ASSERT(vp->v_file);
ASSERT(vap);
fp = vp->v_file;
#if defined(HAVE_4ARGS_VFS_GETATTR)
rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
AT_STATX_SYNC_AS_STAT);
#elif defined(HAVE_2ARGS_VFS_GETATTR)
rc = vfs_getattr(&fp->f_path, &stat);
#else
rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
#endif
if (rc)
return (-rc);
vap->va_type = vn_mode_to_vtype(stat.mode);
vap->va_mode = stat.mode;
vap->va_uid = KUID_TO_SUID(stat.uid);
vap->va_gid = KGID_TO_SGID(stat.gid);
vap->va_fsid = 0;
vap->va_nodeid = stat.ino;
vap->va_nlink = stat.nlink;
vap->va_size = stat.size;
vap->va_blksize = stat.blksize;
vap->va_atime = stat.atime;
vap->va_mtime = stat.mtime;
vap->va_ctime = stat.ctime;
vap->va_rdev = stat.rdev;
vap->va_nblocks = stat.blocks;
return (0);
}
EXPORT_SYMBOL(vn_getattr);
int
vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
{
int datasync = 0;
int error;
int fstrans;
ASSERT(vp);
ASSERT(vp->v_file);
if (flags & FDSYNC)
datasync = 1;
/*
* May enter XFS which generates a warning when PF_FSTRANS is set.
* To avoid this the flag is cleared over vfs_sync() and then reset.
*/
fstrans = __spl_pf_fstrans_check();
if (fstrans)
current->flags &= ~(__SPL_PF_FSTRANS);
error = -spl_filp_fsync(vp->v_file, datasync);
if (fstrans)
current->flags |= __SPL_PF_FSTRANS;
return (error);
} /* vn_fsync() */
EXPORT_SYMBOL(vn_fsync);
int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
offset_t offset, void *x6, void *x7)
{
int error = EOPNOTSUPP;
#ifdef FALLOC_FL_PUNCH_HOLE
int fstrans;
#endif
if (cmd != F_FREESP || bfp->l_whence != 0)
return (EOPNOTSUPP);
ASSERT(vp);
ASSERT(vp->v_file);
ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
#ifdef FALLOC_FL_PUNCH_HOLE
/*
* May enter XFS which generates a warning when PF_FSTRANS is set.
* To avoid this the flag is cleared over vfs_sync() and then reset.
*/
fstrans = __spl_pf_fstrans_check();
if (fstrans)
current->flags &= ~(__SPL_PF_FSTRANS);
/*
* When supported by the underlying file system preferentially
* use the fallocate() callback to preallocate the space.
*/
error = -spl_filp_fallocate(vp->v_file,
FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
bfp->l_start, bfp->l_len);
if (fstrans)
current->flags |= __SPL_PF_FSTRANS;
if (error == 0)
return (0);
#endif
#ifdef HAVE_INODE_TRUNCATE_RANGE
if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
vp->v_file->f_dentry->d_inode->i_op &&
vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
off_t end = bfp->l_start + bfp->l_len;
/*
* Judging from the code in shmem_truncate_range(),
* it seems the kernel expects the end offset to be
* inclusive and aligned to the end of a page.
*/
if (end % PAGE_SIZE != 0) {
end &= ~(off_t)(PAGE_SIZE - 1);
if (end <= bfp->l_start)
return (0);
}
--end;
vp->v_file->f_dentry->d_inode->i_op->truncate_range(
vp->v_file->f_dentry->d_inode, bfp->l_start, end);
return (0);
}
#endif
return (error);
}
EXPORT_SYMBOL(vn_space);
/* Function must be called while holding the vn_file_lock */
static file_t *
file_find(int fd, struct task_struct *task)
{
file_t *fp;
list_for_each_entry(fp, &vn_file_list, f_list) {
if (fd == fp->f_fd && fp->f_task == task) {
ASSERT(atomic_read(&fp->f_ref) != 0);
return (fp);
}
}
return (NULL);
} /* file_find() */
file_t *
vn_getf(int fd)
{
struct kstat stat;
struct file *lfp;
file_t *fp;
vnode_t *vp;
int rc = 0;
if (fd < 0)
return (NULL);
/* Already open just take an extra reference */
spin_lock(&vn_file_lock);
fp = file_find(fd, current);
if (fp) {
lfp = fget(fd);
fput(fp->f_file);
/*
* areleasef() can cause us to see a stale reference when
* userspace has reused a file descriptor before areleasef()
* has run. fput() the stale reference and replace it. We
* retain the original reference count such that the concurrent
* areleasef() will decrement its reference and terminate.
*/
if (lfp != fp->f_file) {
fp->f_file = lfp;
fp->f_vnode->v_file = lfp;
}
atomic_inc(&fp->f_ref);
spin_unlock(&vn_file_lock);
return (fp);
}
spin_unlock(&vn_file_lock);
/* File was not yet opened create the object and setup */
fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
if (fp == NULL)
goto out;
mutex_enter(&fp->f_lock);
fp->f_fd = fd;
fp->f_task = current;
fp->f_offset = 0;
atomic_inc(&fp->f_ref);
lfp = fget(fd);
if (lfp == NULL)
goto out_mutex;
vp = vn_alloc(KM_SLEEP);
if (vp == NULL)
goto out_fget;
#if defined(HAVE_4ARGS_VFS_GETATTR)
rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE,
AT_STATX_SYNC_AS_STAT);
#elif defined(HAVE_2ARGS_VFS_GETATTR)
rc = vfs_getattr(&lfp->f_path, &stat);
#else
rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
#endif
if (rc)
goto out_vnode;
mutex_enter(&vp->v_lock);
vp->v_type = vn_mode_to_vtype(stat.mode);
vp->v_file = lfp;
mutex_exit(&vp->v_lock);
fp->f_vnode = vp;
fp->f_file = lfp;
/* Put it on the tracking list */
spin_lock(&vn_file_lock);
list_add(&fp->f_list, &vn_file_list);
spin_unlock(&vn_file_lock);
mutex_exit(&fp->f_lock);
return (fp);
out_vnode:
vn_free(vp);
out_fget:
fput(lfp);
out_mutex:
mutex_exit(&fp->f_lock);
kmem_cache_free(vn_file_cache, fp);
out:
return (NULL);
} /* getf() */
EXPORT_SYMBOL(getf);
static void releasef_locked(file_t *fp)
{
ASSERT(fp->f_file);
ASSERT(fp->f_vnode);
/* Unlinked from list, no refs, safe to free outside mutex */
fput(fp->f_file);
vn_free(fp->f_vnode);
kmem_cache_free(vn_file_cache, fp);
}
void
vn_releasef(int fd)
{
areleasef(fd, P_FINFO(current));
}
EXPORT_SYMBOL(releasef);
void
vn_areleasef(int fd, uf_info_t *fip)
{
file_t *fp;
struct task_struct *task = (struct task_struct *)fip;
if (fd < 0)
return;
spin_lock(&vn_file_lock);
fp = file_find(fd, task);
if (fp) {
atomic_dec(&fp->f_ref);
if (atomic_read(&fp->f_ref) > 0) {
spin_unlock(&vn_file_lock);
return;
}
list_del(&fp->f_list);
releasef_locked(fp);
}
spin_unlock(&vn_file_lock);
} /* releasef() */
EXPORT_SYMBOL(areleasef);
static void
#ifdef HAVE_SET_FS_PWD_WITH_CONST
vn_set_fs_pwd(struct fs_struct *fs, const struct path *path)
#else
vn_set_fs_pwd(struct fs_struct *fs, struct path *path)
#endif /* HAVE_SET_FS_PWD_WITH_CONST */
{
struct path old_pwd;
#ifdef HAVE_FS_STRUCT_SPINLOCK
spin_lock(&fs->lock);
old_pwd = fs->pwd;
fs->pwd = *path;
path_get(path);
spin_unlock(&fs->lock);
#else
write_lock(&fs->lock);
old_pwd = fs->pwd;
fs->pwd = *path;
path_get(path);
write_unlock(&fs->lock);
#endif /* HAVE_FS_STRUCT_SPINLOCK */
if (old_pwd.dentry)
path_put(&old_pwd);
}
int
vn_set_pwd(const char *filename)
{
struct path path;
mm_segment_t saved_fs;
int rc;
/*
* user_path_dir() and __user_walk() both expect 'filename' to be
* a user space address so we must briefly increase the data segment
* size to ensure strncpy_from_user() does not fail with -EFAULT.
*/
saved_fs = get_fs();
set_fs(get_ds());
rc = user_path_dir(filename, &path);
if (rc)
goto out;
rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
if (rc)
goto dput_and_out;
vn_set_fs_pwd(current->fs, &path);
dput_and_out:
path_put(&path);
out:
set_fs(saved_fs);
return (-rc);
} /* vn_set_pwd() */
EXPORT_SYMBOL(vn_set_pwd);
static int
vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
{
struct vnode *vp = buf;
mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
return (0);
} /* vn_cache_constructor() */
static void
vn_cache_destructor(void *buf, void *cdrarg)
{
struct vnode *vp = buf;
mutex_destroy(&vp->v_lock);
} /* vn_cache_destructor() */
static int
vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
{
file_t *fp = buf;
atomic_set(&fp->f_ref, 0);
mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
INIT_LIST_HEAD(&fp->f_list);
return (0);
} /* vn_file_cache_constructor() */
static void
vn_file_cache_destructor(void *buf, void *cdrarg)
{
file_t *fp = buf;
mutex_destroy(&fp->f_lock);
} /* vn_file_cache_destructor() */
int
spl_vn_init(void)
{
vn_cache = kmem_cache_create("spl_vn_cache",
sizeof (struct vnode), 64, vn_cache_constructor,
vn_cache_destructor, NULL, NULL, NULL, 0);
vn_file_cache = kmem_cache_create("spl_vn_file_cache",
sizeof (file_t), 64, vn_file_cache_constructor,
vn_file_cache_destructor, NULL, NULL, NULL, 0);
return (0);
} /* spl_vn_init() */
void
spl_vn_fini(void)
{
file_t *fp, *next_fp;
int leaked = 0;
spin_lock(&vn_file_lock);
list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
list_del(&fp->f_list);
releasef_locked(fp);
leaked++;
}
spin_unlock(&vn_file_lock);
if (leaked > 0)
printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
kmem_cache_destroy(vn_file_cache);
kmem_cache_destroy(vn_cache);
} /* spl_vn_fini() */

515
module/spl/spl-xdr.c Normal file
View File

@ -0,0 +1,515 @@
/*
* Copyright (c) 2008-2010 Sun Microsystems, Inc.
* Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) XDR Implementation.
*/
#include <linux/string.h>
#include <sys/kmem.h>
#include <sys/debug.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <rpc/xdr.h>
/*
* SPL's XDR mem implementation.
*
* This is used by libnvpair to serialize/deserialize the name-value pair data
* structures into byte arrays in a well-defined and portable manner.
*
* These data structures are used by the DMU/ZFS to flexibly manipulate various
* information in memory and later serialize it/deserialize it to disk.
* Examples of usages include the pool configuration, lists of pool and dataset
* properties, etc.
*
* Reference documentation for the XDR representation and XDR operations can be
* found in RFC 1832 and xdr(3), respectively.
*
* === Implementation shortcomings ===
*
* It is assumed that the following C types have the following sizes:
*
* char/unsigned char: 1 byte
* short/unsigned short: 2 bytes
* int/unsigned int: 4 bytes
* longlong_t/u_longlong_t: 8 bytes
*
* The C standard allows these types to be larger (and in the case of ints,
* shorter), so if that is the case on some compiler/architecture, the build
* will fail (on purpose).
*
* If someone wants to fix the code to work properly on such environments, then:
*
* 1) Preconditions should be added to xdrmem_enc functions to make sure the
* caller doesn't pass arguments which exceed the expected range.
* 2) Functions which take signed integers should be changed to properly do
* sign extension.
* 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
* problems than this implementation.
*
* It is also assumed that:
*
* 1) Chars have 8 bits.
* 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
* memcpy, memset and memcmp.
* 3) Arrays passed to xdr_array() are packed and the compiler/architecture
* supports element-sized-aligned memory accesses.
* 4) Negative integers are natively stored in two's complement binary
* representation.
*
* No checks are done for the 4 assumptions above, though.
*
* === Caller expectations ===
*
* Existing documentation does not describe the semantics of XDR operations very
* well. Therefore, some assumptions about failure semantics will be made and
* will be described below:
*
* 1) If any encoding operation fails (e.g., due to lack of buffer space), the
* the stream should be considered valid only up to the encoding operation
* previous to the one that first failed. However, the stream size as returned
* by xdr_control() cannot be considered to be strictly correct (it may be
* bigger).
*
* Putting it another way, if there is an encoding failure it's undefined
* whether anything is added to the stream in that operation and therefore
* neither xdr_control() nor future encoding operations on the same stream can
* be relied upon to produce correct results.
*
* 2) If a decoding operation fails, it's undefined whether anything will be
* decoded into passed buffers/pointers during that operation, or what the
* values on those buffers will look like.
*
* Future decoding operations on the same stream will also have similar
* undefined behavior.
*
* 3) When the first decoding operation fails it is OK to trust the results of
* previous decoding operations on the same stream, as long as the caller
* expects a failure to be possible (e.g. due to end-of-stream).
*
* However, this is highly discouraged because the caller should know the
* stream size and should be coded to expect any decoding failure to be data
* corruption due to hardware, accidental or even malicious causes, which should
* be handled gracefully in all cases.
*
* In very rare situations where there are strong reasons to believe the data
* can be trusted to be valid and non-tampered with, then the caller may assume
* a decoding failure to be a bug (e.g. due to mismatched data types) and may
* fail non-gracefully.
*
* 4) Non-zero padding bytes will cause the decoding operation to fail.
*
* 5) Zero bytes on string types will also cause the decoding operation to fail.
*
* 6) It is assumed that either the pointer to the stream buffer given by the
* caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
* memory accesses.
*
* 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
*
* 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
* space or MMIO space), the computer may explode.
*/
static struct xdr_ops xdrmem_encode_ops;
static struct xdr_ops xdrmem_decode_ops;
typedef int bool_t;
void
xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
const enum xdr_op op)
{
switch (op) {
case XDR_ENCODE:
xdrs->x_ops = &xdrmem_encode_ops;
break;
case XDR_DECODE:
xdrs->x_ops = &xdrmem_decode_ops;
break;
default:
xdrs->x_ops = NULL; /* Let the caller know we failed */
return;
}
xdrs->x_op = op;
xdrs->x_addr = addr;
xdrs->x_addr_end = addr + size;
if (xdrs->x_addr_end < xdrs->x_addr) {
xdrs->x_ops = NULL;
}
}
EXPORT_SYMBOL(xdrmem_create);
static bool_t
xdrmem_control(XDR *xdrs, int req, void *info)
{
struct xdr_bytesrec *rec = (struct xdr_bytesrec *)info;
if (req != XDR_GET_BYTES_AVAIL)
return (FALSE);
rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
return (TRUE);
}
static bool_t
xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
{
uint_t size = roundup(cnt, 4);
uint_t pad;
if (size < cnt)
return (FALSE); /* Integer overflow */
if (xdrs->x_addr > xdrs->x_addr_end)
return (FALSE);
if (xdrs->x_addr_end - xdrs->x_addr < size)
return (FALSE);
memcpy(xdrs->x_addr, cp, cnt);
xdrs->x_addr += cnt;
pad = size - cnt;
if (pad > 0) {
memset(xdrs->x_addr, 0, pad);
xdrs->x_addr += pad;
}
return (TRUE);
}
static bool_t
xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
{
static uint32_t zero = 0;
uint_t size = roundup(cnt, 4);
uint_t pad;
if (size < cnt)
return (FALSE); /* Integer overflow */
if (xdrs->x_addr > xdrs->x_addr_end)
return (FALSE);
if (xdrs->x_addr_end - xdrs->x_addr < size)
return (FALSE);
memcpy(cp, xdrs->x_addr, cnt);
xdrs->x_addr += cnt;
pad = size - cnt;
if (pad > 0) {
/* An inverted memchr() would be useful here... */
if (memcmp(&zero, xdrs->x_addr, pad) != 0)
return (FALSE);
xdrs->x_addr += pad;
}
return (TRUE);
}
static bool_t
xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
{
if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
return (FALSE);
*((uint32_t *)xdrs->x_addr) = cpu_to_be32(val);
xdrs->x_addr += sizeof (uint32_t);
return (TRUE);
}
static bool_t
xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
{
if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
return (FALSE);
*val = be32_to_cpu(*((uint32_t *)xdrs->x_addr));
xdrs->x_addr += sizeof (uint32_t);
return (TRUE);
}
static bool_t
xdrmem_enc_char(XDR *xdrs, char *cp)
{
uint32_t val;
BUILD_BUG_ON(sizeof (char) != 1);
val = *((unsigned char *) cp);
return (xdrmem_enc_uint32(xdrs, val));
}
static bool_t
xdrmem_dec_char(XDR *xdrs, char *cp)
{
uint32_t val;
BUILD_BUG_ON(sizeof (char) != 1);
if (!xdrmem_dec_uint32(xdrs, &val))
return (FALSE);
/*
* If any of the 3 other bytes are non-zero then val will be greater
* than 0xff and we fail because according to the RFC, this block does
* not have a char encoded in it.
*/
if (val > 0xff)
return (FALSE);
*((unsigned char *) cp) = val;
return (TRUE);
}
static bool_t
xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
{
BUILD_BUG_ON(sizeof (unsigned short) != 2);
return (xdrmem_enc_uint32(xdrs, *usp));
}
static bool_t
xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
{
uint32_t val;
BUILD_BUG_ON(sizeof (unsigned short) != 2);
if (!xdrmem_dec_uint32(xdrs, &val))
return (FALSE);
/*
* Short ints are not in the RFC, but we assume similar logic as in
* xdrmem_dec_char().
*/
if (val > 0xffff)
return (FALSE);
*usp = val;
return (TRUE);
}
static bool_t
xdrmem_enc_uint(XDR *xdrs, unsigned *up)
{
BUILD_BUG_ON(sizeof (unsigned) != 4);
return (xdrmem_enc_uint32(xdrs, *up));
}
static bool_t
xdrmem_dec_uint(XDR *xdrs, unsigned *up)
{
BUILD_BUG_ON(sizeof (unsigned) != 4);
return (xdrmem_dec_uint32(xdrs, (uint32_t *)up));
}
static bool_t
xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
{
BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
return (FALSE);
return (xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff));
}
static bool_t
xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
{
uint32_t low, high;
BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
if (!xdrmem_dec_uint32(xdrs, &high))
return (FALSE);
if (!xdrmem_dec_uint32(xdrs, &low))
return (FALSE);
*ullp = ((u_longlong_t)high << 32) | low;
return (TRUE);
}
static bool_t
xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
const uint_t elsize, const xdrproc_t elproc)
{
uint_t i;
caddr_t addr = *arrp;
if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
return (FALSE);
if (!xdrmem_enc_uint(xdrs, sizep))
return (FALSE);
for (i = 0; i < *sizep; i++) {
if (!elproc(xdrs, addr))
return (FALSE);
addr += elsize;
}
return (TRUE);
}
static bool_t
xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
const uint_t elsize, const xdrproc_t elproc)
{
uint_t i, size;
bool_t alloc = FALSE;
caddr_t addr;
if (!xdrmem_dec_uint(xdrs, sizep))
return (FALSE);
size = *sizep;
if (size > maxsize || size > UINT_MAX / elsize)
return (FALSE);
/*
* The Solaris man page says: "If *arrp is NULL when decoding,
* xdr_array() allocates memory and *arrp points to it".
*/
if (*arrp == NULL) {
BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
*arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
if (*arrp == NULL)
return (FALSE);
alloc = TRUE;
}
addr = *arrp;
for (i = 0; i < size; i++) {
if (!elproc(xdrs, addr)) {
if (alloc)
kmem_free(*arrp, size * elsize);
return (FALSE);
}
addr += elsize;
}
return (TRUE);
}
static bool_t
xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
{
size_t slen = strlen(*sp);
uint_t len;
if (slen > maxsize)
return (FALSE);
len = slen;
if (!xdrmem_enc_uint(xdrs, &len))
return (FALSE);
return (xdrmem_enc_bytes(xdrs, *sp, len));
}
static bool_t
xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
{
uint_t size;
bool_t alloc = FALSE;
if (!xdrmem_dec_uint(xdrs, &size))
return (FALSE);
if (size > maxsize || size > UINT_MAX - 1)
return (FALSE);
/*
* Solaris man page: "If *sp is NULL when decoding, xdr_string()
* allocates memory and *sp points to it".
*/
if (*sp == NULL) {
BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
*sp = kmem_alloc(size + 1, KM_NOSLEEP);
if (*sp == NULL)
return (FALSE);
alloc = TRUE;
}
if (!xdrmem_dec_bytes(xdrs, *sp, size))
goto fail;
if (memchr(*sp, 0, size) != NULL)
goto fail;
(*sp)[size] = '\0';
return (TRUE);
fail:
if (alloc)
kmem_free(*sp, size + 1);
return (FALSE);
}
static struct xdr_ops xdrmem_encode_ops = {
.xdr_control = xdrmem_control,
.xdr_char = xdrmem_enc_char,
.xdr_u_short = xdrmem_enc_ushort,
.xdr_u_int = xdrmem_enc_uint,
.xdr_u_longlong_t = xdrmem_enc_ulonglong,
.xdr_opaque = xdrmem_enc_bytes,
.xdr_string = xdr_enc_string,
.xdr_array = xdr_enc_array
};
static struct xdr_ops xdrmem_decode_ops = {
.xdr_control = xdrmem_control,
.xdr_char = xdrmem_dec_char,
.xdr_u_short = xdrmem_dec_ushort,
.xdr_u_int = xdrmem_dec_uint,
.xdr_u_longlong_t = xdrmem_dec_ulonglong,
.xdr_opaque = xdrmem_dec_bytes,
.xdr_string = xdr_dec_string,
.xdr_array = xdr_dec_array
};

217
module/spl/spl-zlib.c Normal file
View File

@ -0,0 +1,217 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
*
* z_compress_level/z_uncompress are nearly identical copies of the
* compress2/uncompress functions provided by the official zlib package
* available at http://zlib.net/. The only changes made we to slightly
* adapt the functions called to match the linux kernel implementation
* of zlib. The full zlib license follows:
*
* zlib.h -- interface of the 'zlib' general purpose compression library
* version 1.2.5, April 19th, 2010
*
* Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*
* Jean-loup Gailly
* Mark Adler
*/
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/zmod.h>
static spl_kmem_cache_t *zlib_workspace_cache;
/*
* A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
* and vfree for every call. Using a kmem_cache also has the advantage
* that improves the odds that the memory used will be local to this cpu.
* To further improve things it might be wise to create a dedicated per-cpu
* workspace for use. This would take some additional care because we then
* must disable preemption around the critical section, and verify that
* zlib_deflate* and zlib_inflate* never internally call schedule().
*/
static void *
zlib_workspace_alloc(int flags)
{
return (kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS)));
}
static void
zlib_workspace_free(void *workspace)
{
kmem_cache_free(zlib_workspace_cache, workspace);
}
/*
* Compresses the source buffer into the destination buffer. The level
* parameter has the same meaning as in deflateInit. sourceLen is the byte
* length of the source buffer. Upon entry, destLen is the total size of the
* destination buffer, which must be at least 0.1% larger than sourceLen plus
* 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
*
* compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
* memory, Z_BUF_ERROR if there was not enough room in the output buffer,
* Z_STREAM_ERROR if the level parameter is invalid.
*/
int
z_compress_level(void *dest, size_t *destLen, const void *source,
size_t sourceLen, int level)
{
z_stream stream;
int err;
stream.next_in = (Byte *)source;
stream.avail_in = (uInt)sourceLen;
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((size_t)stream.avail_out != *destLen)
return (Z_BUF_ERROR);
stream.workspace = zlib_workspace_alloc(KM_SLEEP);
if (!stream.workspace)
return (Z_MEM_ERROR);
err = zlib_deflateInit(&stream, level);
if (err != Z_OK) {
zlib_workspace_free(stream.workspace);
return (err);
}
err = zlib_deflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
zlib_deflateEnd(&stream);
zlib_workspace_free(stream.workspace);
return (err == Z_OK ? Z_BUF_ERROR : err);
}
*destLen = stream.total_out;
err = zlib_deflateEnd(&stream);
zlib_workspace_free(stream.workspace);
return (err);
}
EXPORT_SYMBOL(z_compress_level);
/*
* Decompresses the source buffer into the destination buffer. sourceLen is
* the byte length of the source buffer. Upon entry, destLen is the total
* size of the destination buffer, which must be large enough to hold the
* entire uncompressed data. (The size of the uncompressed data must have
* been saved previously by the compressor and transmitted to the decompressor
* by some mechanism outside the scope of this compression library.)
* Upon exit, destLen is the actual size of the compressed buffer.
* This function can be used to decompress a whole file at once if the
* input file is mmap'ed.
*
* uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
* enough memory, Z_BUF_ERROR if there was not enough room in the output
* buffer, or Z_DATA_ERROR if the input data was corrupted.
*/
int
z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
{
z_stream stream;
int err;
stream.next_in = (Byte *)source;
stream.avail_in = (uInt)sourceLen;
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((size_t)stream.avail_out != *destLen)
return (Z_BUF_ERROR);
stream.workspace = zlib_workspace_alloc(KM_SLEEP);
if (!stream.workspace)
return (Z_MEM_ERROR);
err = zlib_inflateInit(&stream);
if (err != Z_OK) {
zlib_workspace_free(stream.workspace);
return (err);
}
err = zlib_inflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
zlib_inflateEnd(&stream);
zlib_workspace_free(stream.workspace);
if (err == Z_NEED_DICT ||
(err == Z_BUF_ERROR && stream.avail_in == 0))
return (Z_DATA_ERROR);
return (err);
}
*destLen = stream.total_out;
err = zlib_inflateEnd(&stream);
zlib_workspace_free(stream.workspace);
return (err);
}
EXPORT_SYMBOL(z_uncompress);
int
spl_zlib_init(void)
{
int size;
size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
zlib_workspace_cache = kmem_cache_create(
"spl_zlib_workspace_cache",
size, 0, NULL, NULL, NULL, NULL, NULL,
KMC_VMEM | KMC_NOEMERGENCY);
if (!zlib_workspace_cache)
return (1);
return (0);
}
void
spl_zlib_fini(void)
{
kmem_cache_destroy(zlib_workspace_cache);
zlib_workspace_cache = NULL;
}