Rename modules to module and update references

This commit is contained in:
Brian Behlendorf
2009-01-15 10:44:54 -08:00
parent f6a19c0d37
commit 617d5a673c
39 changed files with 18 additions and 18 deletions
+51
View File
@@ -0,0 +1,51 @@
# Makefile.in for spl kernel module
MODULES := spl
DISTFILES = Makefile.in \
spl-kmem.c spl-rwlock.c spl-taskq.c \
spl-thread.c spl-generic.c
EXTRA_CFLAGS = @KERNELCPPFLAGS@
# Solaris porting layer module
obj-m := spl.o
spl-objs += spl-debug.o
spl-objs += spl-proc.o
spl-objs += spl-kmem.o
spl-objs += spl-thread.o
spl-objs += spl-taskq.o
spl-objs += spl-rwlock.o
spl-objs += spl-vnode.o
spl-objs += spl-err.o
spl-objs += spl-time.o
spl-objs += spl-kobj.o
spl-objs += spl-module.o
spl-objs += spl-generic.o
spl-objs += spl-atomic.o
spl-objs += spl-mutex.o
spl-objs += spl-kstat.o
spl-objs += spl-condvar.o
splmodule := spl.ko
splmoduledir := @kmoduledir@/kernel/lib/
install:
mkdir -p $(DESTDIR)$(splmoduledir)
$(INSTALL) -m 644 $(splmodule) $(DESTDIR)$(splmoduledir)/$(splmodule)
-/sbin/depmod -a
uninstall:
rm -f $(DESTDIR)$(splmoduledir)/$(splmodule)
-/sbin/depmod -a
clean:
-rm -f $(splmodule) *.o .*.cmd *.mod.c *.ko *.s */*.o
distclean: clean
rm -f Makefile
rm -rf .tmp_versions
maintainer-clean: distclean
distdir: $(DISTFILES)
cp -p $(DISTFILES) $(distdir)
+40
View File
@@ -0,0 +1,40 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/atomic.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_ATOMIC
/* Global atomic lock declarations */
spinlock_t atomic64_lock = SPIN_LOCK_UNLOCKED;
spinlock_t atomic32_lock = SPIN_LOCK_UNLOCKED;
EXPORT_SYMBOL(atomic64_lock);
EXPORT_SYMBOL(atomic32_lock);
+201
View File
@@ -0,0 +1,201 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/condvar.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_CONDVAR
void
__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
{
int flags = KM_SLEEP;
ENTRY;
ASSERT(cvp);
ASSERT(name);
ASSERT(type == CV_DEFAULT);
ASSERT(arg == NULL);
cvp->cv_magic = CV_MAGIC;
init_waitqueue_head(&cvp->cv_event);
spin_lock_init(&cvp->cv_lock);
atomic_set(&cvp->cv_waiters, 0);
cvp->cv_mutex = NULL;
cvp->cv_name = NULL;
cvp->cv_name_size = strlen(name) + 1;
/* We may be called when there is a non-zero preempt_count or
* interrupts are disabled is which case we must not sleep.
*/
if (current_thread_info()->preempt_count || irqs_disabled())
flags = KM_NOSLEEP;
cvp->cv_name = kmem_alloc(cvp->cv_name_size, flags);
if (cvp->cv_name)
strcpy(cvp->cv_name, name);
EXIT;
}
EXPORT_SYMBOL(__cv_init);
void
__cv_destroy(kcondvar_t *cvp)
{
ENTRY;
ASSERT(cvp);
ASSERT(cvp->cv_magic == CV_MAGIC);
spin_lock(&cvp->cv_lock);
ASSERT(atomic_read(&cvp->cv_waiters) == 0);
ASSERT(!waitqueue_active(&cvp->cv_event));
if (cvp->cv_name)
kmem_free(cvp->cv_name, cvp->cv_name_size);
spin_unlock(&cvp->cv_lock);
memset(cvp, CV_POISON, sizeof(*cvp));
EXIT;
}
EXPORT_SYMBOL(__cv_destroy);
void
__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
{
DEFINE_WAIT(wait);
ENTRY;
ASSERT(cvp);
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
spin_lock(&cvp->cv_lock);
ASSERT(mutex_owned(mp));
if (cvp->cv_mutex == NULL)
cvp->cv_mutex = mp;
/* Ensure the same mutex is used by all callers */
ASSERT(cvp->cv_mutex == mp);
spin_unlock(&cvp->cv_lock);
prepare_to_wait_exclusive(&cvp->cv_event, &wait,
TASK_UNINTERRUPTIBLE);
atomic_inc(&cvp->cv_waiters);
/* Mutex should be dropped after prepare_to_wait() this
* ensures we're linked in to the waiters list and avoids the
* race where 'cvp->cv_waiters > 0' but the list is empty. */
mutex_exit(mp);
schedule();
mutex_enter(mp);
atomic_dec(&cvp->cv_waiters);
finish_wait(&cvp->cv_event, &wait);
EXIT;
}
EXPORT_SYMBOL(__cv_wait);
/* 'expire_time' argument is an absolute wall clock time in jiffies.
* Return value is time left (expire_time - now) or -1 if timeout occurred.
*/
clock_t
__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time)
{
DEFINE_WAIT(wait);
clock_t time_left;
ENTRY;
ASSERT(cvp);
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
spin_lock(&cvp->cv_lock);
ASSERT(mutex_owned(mp));
if (cvp->cv_mutex == NULL)
cvp->cv_mutex = mp;
/* Ensure the same mutex is used by all callers */
ASSERT(cvp->cv_mutex == mp);
spin_unlock(&cvp->cv_lock);
/* XXX - Does not handle jiffie wrap properly */
time_left = expire_time - jiffies;
if (time_left <= 0)
RETURN(-1);
prepare_to_wait_exclusive(&cvp->cv_event, &wait,
TASK_UNINTERRUPTIBLE);
atomic_inc(&cvp->cv_waiters);
/* Mutex should be dropped after prepare_to_wait() this
* ensures we're linked in to the waiters list and avoids the
* race where 'cvp->cv_waiters > 0' but the list is empty. */
mutex_exit(mp);
time_left = schedule_timeout(time_left);
mutex_enter(mp);
atomic_dec(&cvp->cv_waiters);
finish_wait(&cvp->cv_event, &wait);
RETURN(time_left > 0 ? time_left : -1);
}
EXPORT_SYMBOL(__cv_timedwait);
void
__cv_signal(kcondvar_t *cvp)
{
ENTRY;
ASSERT(cvp);
ASSERT(cvp->cv_magic == CV_MAGIC);
/* All waiters are added with WQ_FLAG_EXCLUSIVE so only one
* waiter will be set runable with each call to wake_up().
* Additionally wake_up() holds a spin_lock assoicated with
* the wait queue to ensure we don't race waking up processes. */
if (atomic_read(&cvp->cv_waiters) > 0)
wake_up(&cvp->cv_event);
EXIT;
}
EXPORT_SYMBOL(__cv_signal);
void
__cv_broadcast(kcondvar_t *cvp)
{
ASSERT(cvp);
ASSERT(cvp->cv_magic == CV_MAGIC);
ENTRY;
/* Wake_up_all() will wake up all waiters even those which
* have the WQ_FLAG_EXCLUSIVE flag set. */
if (atomic_read(&cvp->cv_waiters) > 0)
wake_up_all(&cvp->cv_event);
EXIT;
}
EXPORT_SYMBOL(__cv_broadcast);
File diff suppressed because it is too large Load Diff
+78
View File
@@ -0,0 +1,78 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_GENERIC
#ifndef NDEBUG
static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
#endif
void
vpanic(const char *fmt, va_list ap)
{
char msg[MAXMSGLEN];
vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
panic(msg);
} /* vpanic() */
EXPORT_SYMBOL(vpanic);
void
cmn_err(int ce, const char *fmt, ...)
{
char msg[MAXMSGLEN];
va_list ap;
va_start(ap, fmt);
vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
va_end(ap);
CERROR("%s", msg);
} /* cmn_err() */
EXPORT_SYMBOL(cmn_err);
void
vcmn_err(int ce, const char *fmt, va_list ap)
{
char msg[MAXMSGLEN];
if (ce == CE_PANIC)
vpanic(fmt, ap);
if (ce != CE_NOTE) { /* suppress noise in stress testing */
vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
CERROR("%s%s%s", ce_prefix[ce], msg, ce_suffix[ce]);
}
} /* vcmn_err() */
EXPORT_SYMBOL(vcmn_err);
+328
View File
@@ -0,0 +1,328 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/sysmacros.h>
#include <sys/vmsystm.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/taskq.h>
#include <sys/debug.h>
#include <sys/proc.h>
#include <sys/kstat.h>
#include <sys/utsname.h>
#include <linux/kmod.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_GENERIC
char spl_version[16] = "SPL v" VERSION;
long spl_hostid = 0;
EXPORT_SYMBOL(spl_hostid);
char hw_serial[11] = "<none>";
EXPORT_SYMBOL(hw_serial);
int p0 = 0;
EXPORT_SYMBOL(p0);
vmem_t *zio_alloc_arena = NULL;
EXPORT_SYMBOL(zio_alloc_arena);
int
highbit(unsigned long i)
{
register int h = 1;
ENTRY;
if (i == 0)
RETURN(0);
#if BITS_PER_LONG == 64
if (i & 0xffffffff00000000ul) {
h += 32; i >>= 32;
}
#endif
if (i & 0xffff0000) {
h += 16; i >>= 16;
}
if (i & 0xff00) {
h += 8; i >>= 8;
}
if (i & 0xf0) {
h += 4; i >>= 4;
}
if (i & 0xc) {
h += 2; i >>= 2;
}
if (i & 0x2) {
h += 1;
}
RETURN(h);
}
EXPORT_SYMBOL(highbit);
/*
* Implementation of 64 bit division for 32-bit machines.
*/
#if BITS_PER_LONG == 32
uint64_t __udivdi3(uint64_t dividend, uint64_t divisor)
{
#ifdef HAVE_DIV64_64
return div64_64(dividend, divisor);
#else
/* Taken from a 2.6.24 kernel. */
uint32_t high, d;
high = divisor >> 32;
if (high) {
unsigned int shift = fls(high);
d = divisor >> shift;
dividend >>= shift;
} else
d = divisor;
do_div(dividend, d);
return dividend;
#endif
}
EXPORT_SYMBOL(__udivdi3);
/*
* Implementation of 64 bit modulo for 32-bit machines.
*/
uint64_t __umoddi3(uint64_t dividend, uint64_t divisor)
{
return dividend - divisor * (dividend / divisor);
}
EXPORT_SYMBOL(__umoddi3);
#endif
/* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
* ddi_strtol(9F) man page. I have not verified the behavior of these
* functions against their Solaris counterparts. It is possible that I
* may have misinterpretted the man page or the man page is incorrect.
*/
int ddi_strtoul(const char *, char **, int, unsigned long *);
int ddi_strtol(const char *, char **, int, long *);
int ddi_strtoull(const char *, char **, int, unsigned long long *);
int ddi_strtoll(const char *, char **, int, long long *);
#define define_ddi_strtoux(type, valtype) \
int ddi_strtou##type(const char *str, char **endptr, \
int base, valtype *result) \
{ \
valtype last_value, value = 0; \
char *ptr = (char *)str; \
int flag = 1, digit; \
\
if (strlen(ptr) == 0) \
return EINVAL; \
\
/* Auto-detect base based on prefix */ \
if (!base) { \
if (str[0] == '0') { \
if (tolower(str[1])=='x' && isxdigit(str[2])) { \
base = 16; /* hex */ \
ptr += 2; \
} else if (str[1] >= '0' && str[1] < 8) { \
base = 8; /* octal */ \
ptr += 1; \
} else { \
return EINVAL; \
} \
} else { \
base = 10; /* decimal */ \
} \
} \
\
while (1) { \
if (isdigit(*ptr)) \
digit = *ptr - '0'; \
else if (isalpha(*ptr)) \
digit = tolower(*ptr) - 'a' + 10; \
else \
break; \
\
if (digit >= base) \
break; \
\
last_value = value; \
value = value * base + digit; \
if (last_value > value) /* Overflow */ \
return ERANGE; \
\
flag = 1; \
ptr++; \
} \
\
if (flag) \
*result = value; \
\
if (endptr) \
*endptr = (char *)(flag ? ptr : str); \
\
return 0; \
} \
#define define_ddi_strtox(type, valtype) \
int ddi_strto##type(const char *str, char **endptr, \
int base, valtype *result) \
{ \
int rc; \
\
if (*str == '-') { \
rc = ddi_strtou##type(str + 1, endptr, base, result); \
if (!rc) { \
if (*endptr == str + 1) \
*endptr = (char *)str; \
else \
*result = -*result; \
} \
} else { \
rc = ddi_strtou##type(str, endptr, base, result); \
} \
\
return rc; \
}
define_ddi_strtoux(l, unsigned long)
define_ddi_strtox(l, long)
define_ddi_strtoux(ll, unsigned long long)
define_ddi_strtox(ll, long long)
EXPORT_SYMBOL(ddi_strtoul);
EXPORT_SYMBOL(ddi_strtol);
EXPORT_SYMBOL(ddi_strtoll);
EXPORT_SYMBOL(ddi_strtoull);
struct new_utsname *__utsname(void)
{
#ifdef HAVE_INIT_UTSNAME
return init_utsname();
#else
return &system_utsname;
#endif
}
EXPORT_SYMBOL(__utsname);
static int
set_hostid(void)
{
char sh_path[] = "/bin/sh";
char *argv[] = { sh_path,
"-c",
"/usr/bin/hostid >/proc/sys/kernel/spl/hostid",
NULL };
char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL };
/* Doing address resolution in the kernel is tricky and just
* not a good idea in general. So to set the proper 'hw_serial'
* use the usermodehelper support to ask '/bin/sh' to run
* '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid
* for us to use. It's a horific solution but it will do for now.
*/
return call_usermodehelper(sh_path, argv, envp, 1);
}
static int __init spl_init(void)
{
int rc = 0;
if ((rc = debug_init()))
return rc;
if ((rc = spl_kmem_init()))
GOTO(out , rc);
if ((rc = spl_mutex_init()))
GOTO(out2 , rc);
if ((rc = spl_taskq_init()))
GOTO(out3, rc);
if ((rc = vn_init()))
GOTO(out4, rc);
if ((rc = proc_init()))
GOTO(out5, rc);
if ((rc = kstat_init()))
GOTO(out6, rc);
if ((rc = set_hostid()))
GOTO(out7, rc = -EADDRNOTAVAIL);
printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION);
RETURN(rc);
out7:
kstat_fini();
out6:
proc_fini();
out5:
vn_fini();
out4:
spl_taskq_fini();
out3:
spl_mutex_fini();
out2:
spl_kmem_fini();
out:
debug_fini();
printk("SPL: Failed to Load Solaris Porting Layer v%s, "
"rc = %d\n", VERSION, rc);
return rc;
}
static void spl_fini(void)
{
ENTRY;
printk("SPL: Unloaded Solaris Porting Layer v%s\n", VERSION);
kstat_fini();
proc_fini();
vn_fini();
spl_taskq_fini();
spl_mutex_fini();
spl_kmem_fini();
debug_fini();
}
module_init(spl_init);
module_exit(spl_fini);
MODULE_AUTHOR("Lawrence Livermore National Labs");
MODULE_DESCRIPTION("Solaris Porting Layer");
MODULE_LICENSE("GPL");
File diff suppressed because it is too large Load Diff
+93
View File
@@ -0,0 +1,93 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/kobj.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_KOBJ
struct _buf *
kobj_open_file(const char *name)
{
struct _buf *file;
vnode_t *vp;
int rc;
ENTRY;
file = kmalloc(sizeof(_buf_t), GFP_KERNEL);
if (file == NULL)
RETURN((_buf_t *)-1UL);
if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
kfree(file);
RETURN((_buf_t *)-1UL);
}
file->vp = vp;
RETURN(file);
} /* kobj_open_file() */
EXPORT_SYMBOL(kobj_open_file);
void
kobj_close_file(struct _buf *file)
{
ENTRY;
VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
VN_RELE(file->vp);
kfree(file);
EXIT;
} /* kobj_close_file() */
EXPORT_SYMBOL(kobj_close_file);
int
kobj_read_file(struct _buf *file, char *buf, ssize_t size, offset_t off)
{
ENTRY;
RETURN(vn_rdwr(UIO_READ, file->vp, buf, size, off,
UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL));
} /* kobj_read_file() */
EXPORT_SYMBOL(kobj_read_file);
int
kobj_get_filesize(struct _buf *file, uint64_t *size)
{
vattr_t vap;
int rc;
ENTRY;
rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
if (rc)
RETURN(rc);
*size = vap.va_size;
RETURN(rc);
} /* kobj_get_filesize() */
EXPORT_SYMBOL(kobj_get_filesize);
+496
View File
@@ -0,0 +1,496 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/kstat.h>
#ifdef DEBUG_KSTAT
static spinlock_t kstat_lock;
static struct list_head kstat_list;
static kid_t kstat_id;
static void
kstat_seq_show_headers(struct seq_file *f)
{
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
ksp->ks_ndata, (int)ksp->ks_data_size,
ksp->ks_crtime, ksp->ks_snaptime);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
seq_printf(f, "raw data");
break;
case KSTAT_TYPE_NAMED:
seq_printf(f, "%-31s %-4s %s\n",
"name", "type", "data");
break;
case KSTAT_TYPE_INTR:
seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
"hard", "soft", "watchdog",
"spurious", "multsvc");
break;
case KSTAT_TYPE_IO:
seq_printf(f,
"%-8s %-8s %-8s %-8s %-8s %-8s "
"%-8s %-8s %-8s %-8s %-8s %-8s\n",
"nread", "nwritten", "reads", "writes",
"wtime", "wlentime", "wupdate",
"rtime", "rlentime", "rupdate",
"wcnt", "rcnt");
break;
case KSTAT_TYPE_TIMER:
seq_printf(f,
"%-31s %-8s "
"%-8s %-8s %-8s %-8s %-8s\n",
"name", "events", "elapsed",
"min", "max", "start", "stop");
break;
default:
SBUG(); /* Unreachable */
}
}
static int
kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
{
int i, j;
for (i = 0; ; i++) {
seq_printf(f, "%03x:", i);
for (j = 0; j < 16; j++) {
if (i * 16 + j >= l) {
seq_printf(f, "\n");
goto out;
}
seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
}
seq_printf(f, "\n");
}
out:
return 0;
}
static int
kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
{
seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
switch (knp->data_type) {
case KSTAT_DATA_CHAR:
knp->value.c[15] = '\0'; /* NULL terminate */
seq_printf(f, "%-16s", knp->value.c);
break;
/* XXX - We need to be more careful able what tokens are
* used for each arch, for now this is correct for x86_64.
*/
case KSTAT_DATA_INT32:
seq_printf(f, "%d", knp->value.i32);
break;
case KSTAT_DATA_UINT32:
seq_printf(f, "%u", knp->value.ui32);
break;
case KSTAT_DATA_INT64:
seq_printf(f, "%lld", (signed long long)knp->value.i64);
break;
case KSTAT_DATA_UINT64:
seq_printf(f, "%llu", (unsigned long long)knp->value.ui64);
break;
case KSTAT_DATA_LONG:
seq_printf(f, "%ld", knp->value.l);
break;
case KSTAT_DATA_ULONG:
seq_printf(f, "%lu", knp->value.ul);
break;
case KSTAT_DATA_STRING:
KSTAT_NAMED_STR_PTR(knp)
[KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
break;
default:
SBUG(); /* Unreachable */
}
seq_printf(f, "\n");
return 0;
}
static int
kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
{
seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
kip->intrs[KSTAT_INTR_HARD],
kip->intrs[KSTAT_INTR_SOFT],
kip->intrs[KSTAT_INTR_WATCHDOG],
kip->intrs[KSTAT_INTR_SPURIOUS],
kip->intrs[KSTAT_INTR_MULTSVC]);
return 0;
}
static int
kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
{
seq_printf(f,
"%-8llu %-8llu %-8u %-8u %-8lld %-8lld "
"%-8lld %-8lld %-8lld %-8lld %-8u %-8u\n",
kip->nread, kip->nwritten,
kip->reads, kip->writes,
kip->wtime, kip->wlentime, kip->wlastupdate,
kip->rtime, kip->wlentime, kip->rlastupdate,
kip->wcnt, kip->rcnt);
return 0;
}
static int
kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
{
seq_printf(f,
"%-31s %-8llu %-8lld %-8lld %-8lld %-8lld %-8lld\n",
ktp->name, ktp->num_events, ktp->elapsed_time,
ktp->min_time, ktp->max_time,
ktp->start_time, ktp->stop_time);
return 0;
}
static int
kstat_seq_show(struct seq_file *f, void *p)
{
kstat_t *ksp = (kstat_t *)f->private;
int rc = 0;
ASSERT(ksp->ks_magic == KS_MAGIC);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
ASSERT(ksp->ks_ndata == 1);
rc = kstat_seq_show_raw(f, ksp->ks_data,
ksp->ks_data_size);
break;
case KSTAT_TYPE_NAMED:
rc = kstat_seq_show_named(f, (kstat_named_t *)p);
break;
case KSTAT_TYPE_INTR:
rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
break;
case KSTAT_TYPE_IO:
rc = kstat_seq_show_io(f, (kstat_io_t *)p);
break;
case KSTAT_TYPE_TIMER:
rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
break;
default:
SBUG(); /* Unreachable */
}
return rc;
}
static void *
kstat_seq_data_addr(kstat_t *ksp, loff_t n)
{
void *rc = NULL;
ENTRY;
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
rc = ksp->ks_data;
break;
case KSTAT_TYPE_NAMED:
rc = ksp->ks_data + n * sizeof(kstat_named_t);
break;
case KSTAT_TYPE_INTR:
rc = ksp->ks_data + n * sizeof(kstat_intr_t);
break;
case KSTAT_TYPE_IO:
rc = ksp->ks_data + n * sizeof(kstat_io_t);
break;
case KSTAT_TYPE_TIMER:
rc = ksp->ks_data + n * sizeof(kstat_timer_t);
break;
default:
SBUG(); /* Unreachable */
}
RETURN(rc);
}
static void *
kstat_seq_start(struct seq_file *f, loff_t *pos)
{
loff_t n = *pos;
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
ENTRY;
spin_lock(&ksp->ks_lock);
ksp->ks_snaptime = gethrtime();
if (!n)
kstat_seq_show_headers(f);
if (n >= ksp->ks_ndata)
RETURN(NULL);
RETURN(kstat_seq_data_addr(ksp, n));
}
static void *
kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
ENTRY;
++*pos;
if (*pos >= ksp->ks_ndata)
RETURN(NULL);
RETURN(kstat_seq_data_addr(ksp, *pos));
}
static void
kstat_seq_stop(struct seq_file *f, void *v)
{
kstat_t *ksp = (kstat_t *)f->private;
ASSERT(ksp->ks_magic == KS_MAGIC);
spin_unlock(&ksp->ks_lock);
}
static struct seq_operations kstat_seq_ops = {
.show = kstat_seq_show,
.start = kstat_seq_start,
.next = kstat_seq_next,
.stop = kstat_seq_stop,
};
static int
proc_kstat_open(struct inode *inode, struct file *filp)
{
struct seq_file *f;
int rc;
rc = seq_open(filp, &kstat_seq_ops);
if (rc)
return rc;
f = filp->private_data;
f->private = PDE(inode)->data;
return rc;
}
static struct file_operations proc_kstat_operations = {
.open = proc_kstat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
kstat_t *
__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
uchar_t ks_flags)
{
kstat_t *ksp;
ASSERT(ks_module);
ASSERT(ks_instance == 0);
ASSERT(ks_name);
ASSERT(!(ks_flags & KSTAT_FLAG_UNSUPPORTED));
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
ASSERT(ks_ndata == 1);
ksp = kmem_zalloc(sizeof(*ksp), KM_SLEEP);
if (ksp == NULL)
return ksp;
spin_lock(&kstat_lock);
ksp->ks_kid = kstat_id;
kstat_id++;
spin_unlock(&kstat_lock);
ksp->ks_magic = KS_MAGIC;
spin_lock_init(&ksp->ks_lock);
INIT_LIST_HEAD(&ksp->ks_list);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
ksp->ks_instance = ks_instance;
strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = ks_flags;
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
ksp->ks_ndata = 1;
ksp->ks_data_size = ks_ndata;
break;
case KSTAT_TYPE_NAMED:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof(kstat_named_t);
break;
case KSTAT_TYPE_INTR:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof(kstat_intr_t);
break;
case KSTAT_TYPE_IO:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof(kstat_io_t);
break;
case KSTAT_TYPE_TIMER:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof(kstat_timer_t);
break;
default:
SBUG(); /* Unreachable */
}
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
ksp->ks_data = NULL;
} else {
ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_SLEEP);
if (ksp->ks_data == NULL) {
kmem_free(ksp, sizeof(*ksp));
ksp = NULL;
}
}
return ksp;
}
EXPORT_SYMBOL(__kstat_create);
void
__kstat_install(kstat_t *ksp)
{
struct proc_dir_entry *de_module, *de_name;
kstat_t *tmp;
int rc = 0;
ENTRY;
spin_lock(&kstat_lock);
/* Item may only be added to the list once */
list_for_each_entry(tmp, &kstat_list, ks_list) {
if (tmp == ksp) {
spin_unlock(&kstat_lock);
GOTO(out, rc = -EEXIST);
}
}
list_add_tail(&ksp->ks_list, &kstat_list);
spin_unlock(&kstat_lock);
de_module = proc_dir_entry_find(proc_spl_kstat, ksp->ks_module);
if (de_module == NULL) {
de_module = proc_mkdir(ksp->ks_module, proc_spl_kstat);
if (de_module == NULL)
GOTO(out, rc = -EUNATCH);
}
de_name = create_proc_entry(ksp->ks_name, 0444, de_module);
if (de_name == NULL)
GOTO(out, rc = -EUNATCH);
spin_lock(&ksp->ks_lock);
ksp->ks_proc = de_name;
de_name->proc_fops = &proc_kstat_operations;
de_name->data = (void *)ksp;
spin_unlock(&ksp->ks_lock);
out:
if (rc) {
spin_lock(&kstat_lock);
list_del_init(&ksp->ks_list);
spin_unlock(&kstat_lock);
}
EXIT;
}
EXPORT_SYMBOL(__kstat_install);
void
__kstat_delete(kstat_t *ksp)
{
struct proc_dir_entry *de_module;
spin_lock(&kstat_lock);
list_del_init(&ksp->ks_list);
spin_unlock(&kstat_lock);
if (ksp->ks_proc) {
de_module = ksp->ks_proc->parent;
remove_proc_entry(ksp->ks_name, de_module);
/* Remove top level module directory if it's empty */
if (proc_dir_entries(de_module) == 0)
remove_proc_entry(de_module->name, de_module->parent);
}
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);
kmem_free(ksp, sizeof(*ksp));
return;
}
EXPORT_SYMBOL(__kstat_delete);
#endif /* DEBUG_KSTAT */
int
kstat_init(void)
{
ENTRY;
#ifdef DEBUG_KSTAT
spin_lock_init(&kstat_lock);
INIT_LIST_HEAD(&kstat_list);
kstat_id = 0;
#endif /* DEBUG_KSTAT */
RETURN(0);
}
void
kstat_fini(void)
{
ENTRY;
#ifdef DEBUG_KSTAT
ASSERT(list_empty(&kstat_list));
#endif /* DEBUG_KSTAT */
EXIT;
}
+331
View File
@@ -0,0 +1,331 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/sysmacros.h>
#include <sys/sunddi.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_MODULE
static spinlock_t dev_info_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(dev_info_list);
static struct dev_info *
get_dev_info(dev_t dev)
{
struct dev_info *di;
spin_lock(&dev_info_lock);
list_for_each_entry(di, &dev_info_list, di_list)
if (di->di_dev == dev)
goto out;
di = NULL;
out:
spin_unlock(&dev_info_lock);
return di;
}
static int
mod_generic_ioctl(struct inode *ino, struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct dev_info *di;
int rc, flags = 0, rvalp = 0;
cred_t *cr = NULL;
di = get_dev_info(MKDEV(imajor(ino), iminor(ino)));
if (di == NULL)
return EINVAL;
rc = di->di_ops->devo_cb_ops->cb_ioctl(di->di_dev,
(int)cmd,(intptr_t)arg,
flags, cr, &rvalp);
return rc;
}
int
__ddi_create_minor_node(dev_info_t *di, char *name, int spec_type,
minor_t minor_num, char *node_type,
int flags, struct module *mod)
{
struct cdev *cdev;
struct dev_ops *dev_ops;
struct cb_ops *cb_ops;
struct file_operations *fops;
int rc;
ENTRY;
ASSERT(spec_type == S_IFCHR);
ASSERT(minor_num < di->di_minors);
ASSERT(!strcmp(node_type, DDI_PSEUDO));
fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
if (fops == NULL)
RETURN(DDI_FAILURE);
cdev = cdev_alloc();
if (cdev == NULL) {
kfree(fops);
RETURN(DDI_FAILURE);
}
cdev->ops = fops;
mutex_enter(&di->di_lock);
dev_ops = di->di_ops;
ASSERT(dev_ops);
cb_ops = di->di_ops->devo_cb_ops;
ASSERT(cb_ops);
/* Setup the fops to cb_ops mapping */
fops->owner = mod;
if (cb_ops->cb_ioctl)
fops->ioctl = mod_generic_ioctl;
#if 0
if (cb_ops->cb_open)
fops->open = mod_generic_open;
if (cb_ops->cb_close)
fops->release = mod_generic_close;
if (cb_ops->cb_read)
fops->read = mod_generic_read;
if (cb_ops->cb_write)
fops->write = mod_generic_write;
#endif
/* XXX: Currently unsupported operations */
ASSERT(cb_ops->cb_open == NULL);
ASSERT(cb_ops->cb_close == NULL);
ASSERT(cb_ops->cb_read == NULL);
ASSERT(cb_ops->cb_write == NULL);
ASSERT(cb_ops->cb_strategy == NULL);
ASSERT(cb_ops->cb_print == NULL);
ASSERT(cb_ops->cb_dump == NULL);
ASSERT(cb_ops->cb_devmap == NULL);
ASSERT(cb_ops->cb_mmap == NULL);
ASSERT(cb_ops->cb_segmap == NULL);
ASSERT(cb_ops->cb_chpoll == NULL);
ASSERT(cb_ops->cb_prop_op == NULL);
ASSERT(cb_ops->cb_str == NULL);
ASSERT(cb_ops->cb_aread == NULL);
ASSERT(cb_ops->cb_awrite == NULL);
di->di_cdev = cdev;
di->di_flags = flags;
di->di_minor = minor_num;
di->di_dev = MKDEV(di->di_major, di->di_minor);
rc = cdev_add(cdev, di->di_dev, 1);
if (rc) {
CERROR("Error adding cdev, %d\n", rc);
kfree(fops);
cdev_del(cdev);
mutex_exit(&di->di_lock);
RETURN(DDI_FAILURE);
}
spin_lock(&dev_info_lock);
list_add(&di->di_list, &dev_info_list);
spin_unlock(&dev_info_lock);
mutex_exit(&di->di_lock);
RETURN(DDI_SUCCESS);
}
EXPORT_SYMBOL(__ddi_create_minor_node);
static void
__ddi_remove_minor_node_locked(dev_info_t *di, char *name)
{
if (di->di_cdev) {
cdev_del(di->di_cdev);
di->di_cdev = NULL;
}
spin_lock(&dev_info_lock);
list_del_init(&di->di_list);
spin_unlock(&dev_info_lock);
}
void
__ddi_remove_minor_node(dev_info_t *di, char *name)
{
ENTRY;
mutex_enter(&di->di_lock);
__ddi_remove_minor_node_locked(di, name);
mutex_exit(&di->di_lock);
EXIT;
}
EXPORT_SYMBOL(__ddi_remove_minor_node);
int
ddi_quiesce_not_needed(dev_info_t *dip)
{
RETURN(DDI_SUCCESS);
}
EXPORT_SYMBOL(ddi_quiesce_not_needed);
#if 0
static int
mod_generic_open(struct inode *, struct file *)
{
open(dev_t *devp, int flags, int otyp, cred_t *credp);
}
static int
mod_generic_close(struct inode *, struct file *)
{
close(dev_t dev, int flags, int otyp, cred_t *credp);
}
static ssize_t
mod_generic_read(struct file *, char __user *, size_t, loff_t *)
{
read(dev_t dev, struct uio *uiop, cred_t *credp);
}
static ssize_t
mod_generic_write(struct file *, const char __user *, size_t, loff_t *)
{
write(dev_t dev, struct uio *uiop, cred_t *credp);
}
#endif
static struct dev_info *
dev_info_alloc(major_t major, minor_t minors, struct dev_ops *ops) {
struct dev_info *di;
di = kmalloc(sizeof(struct dev_info), GFP_KERNEL);
if (di == NULL)
return NULL;
mutex_init(&di->di_lock, NULL, MUTEX_DEFAULT, NULL);
INIT_LIST_HEAD(&di->di_list);
di->di_ops = ops;
di->di_class = NULL;
di->di_cdev = NULL;
di->di_major = major;
di->di_minor = 0;
di->di_minors = minors;
di->di_dev = 0;
return di;
}
static void
dev_info_free(struct dev_info *di)
{
mutex_enter(&di->di_lock);
__ddi_remove_minor_node_locked(di, NULL);
mutex_exit(&di->di_lock);
mutex_destroy(&di->di_lock);
kfree(di);
}
int
__mod_install(struct modlinkage *modlp)
{
struct modldrv *drv = modlp->ml_modldrv;
struct dev_info *di;
int rc;
ENTRY;
di = dev_info_alloc(modlp->ml_major, modlp->ml_minors,
drv->drv_dev_ops);
if (di == NULL)
RETURN(ENOMEM);
/* XXX: Really we need to be calling devo_probe if it's available
* and then calling devo_attach for each device discovered. However
* for now we just call it once and let the app sort it out.
*/
rc = drv->drv_dev_ops->devo_attach(di, DDI_ATTACH);
if (rc != DDI_SUCCESS) {
dev_info_free(di);
RETURN(rc);
}
drv->drv_dev_info = di;
RETURN(DDI_SUCCESS);
}
EXPORT_SYMBOL(__mod_install);
int
__mod_remove(struct modlinkage *modlp)
{
struct modldrv *drv = modlp->ml_modldrv;
struct dev_info *di = drv->drv_dev_info;
int rc;
ENTRY;
rc = drv->drv_dev_ops->devo_detach(di, DDI_DETACH);
if (rc != DDI_SUCCESS)
RETURN(rc);
dev_info_free(di);
drv->drv_dev_info = NULL;
RETURN(DDI_SUCCESS);
}
EXPORT_SYMBOL(__mod_remove);
int
ldi_ident_from_mod(struct modlinkage *modlp, ldi_ident_t *lip)
{
ldi_ident_t li;
ENTRY;
ASSERT(modlp);
ASSERT(lip);
li = kmalloc(sizeof(struct ldi_ident), GFP_KERNEL);
if (li == NULL)
RETURN(ENOMEM);
li->li_dev = MKDEV(modlp->ml_major, 0);
*lip = li;
RETURN(0);
}
EXPORT_SYMBOL(ldi_ident_from_mod);
void
ldi_ident_release(ldi_ident_t lip)
{
ENTRY;
ASSERT(lip);
kfree(lip);
EXIT;
}
EXPORT_SYMBOL(ldi_ident_release);
+309
View File
@@ -0,0 +1,309 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/mutex.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_MUTEX
/* Mutex implementation based on those found in Solaris. This means
* they the MUTEX_DEFAULT type is an adaptive mutex. When calling
* mutex_enter() your process will spin waiting for the lock if it's
* likely the lock will be free'd shortly. If it looks like the
* lock will be held for a longer time we schedule and sleep waiting
* for it. This determination is made by checking if the holder of
* the lock is currently running on cpu or sleeping waiting to be
* scheduled. If the holder is currently running it's likely the
* lock will be shortly dropped.
*
* XXX: This is basically a rough implementation to see if this
* helps our performance. If it does a more careful implementation
* should be done, perhaps in assembly.
*/
/* 0: Never spin when trying to aquire lock
* -1: Spin until aquired or holder yeilds without dropping lock
* 1-MAX_INT: Spin for N attempts before sleeping for lock
*/
int mutex_spin_max = 0;
#ifdef DEBUG_MUTEX
int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
spinlock_t mutex_stats_lock;
struct list_head mutex_stats_list;
#endif
int
__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
{
int flags = KM_SLEEP;
ASSERT(mp);
ASSERT(name);
ASSERT(ibc == NULL);
ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
mp->km_name = NULL;
mp->km_name_size = strlen(name) + 1;
switch (type) {
case MUTEX_DEFAULT:
mp->km_type = MUTEX_ADAPTIVE;
break;
case MUTEX_SPIN:
case MUTEX_ADAPTIVE:
mp->km_type = type;
break;
default:
SBUG();
}
/* We may be called when there is a non-zero preempt_count or
* interrupts are disabled is which case we must not sleep.
*/
if (current_thread_info()->preempt_count || irqs_disabled())
flags = KM_NOSLEEP;
/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags);
if (mp->km_sem == NULL)
return -ENOMEM;
mp->km_name = kmem_alloc(mp->km_name_size, flags);
if (mp->km_name == NULL) {
kmem_free(mp->km_sem, sizeof(struct semaphore));
return -ENOMEM;
}
sema_init(mp->km_sem, 1);
strncpy(mp->km_name, name, mp->km_name_size);
#ifdef DEBUG_MUTEX
mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags);
if (mp->km_stats == NULL) {
kmem_free(mp->km_name, mp->km_name_size);
kmem_free(mp->km_sem, sizeof(struct semaphore));
return -ENOMEM;
}
/* XXX - This appears to be a much more contended lock than I
* would have expected. To run with this debugging enabled and
* get reasonable performance we may need to be more clever and
* do something like hash the mutex ptr on to one of several
* lists to ease this single point of contention.
*/
spin_lock(&mutex_stats_lock);
list_add_tail(&mp->km_list, &mutex_stats_list);
spin_unlock(&mutex_stats_lock);
#endif
mp->km_magic = KM_MAGIC;
mp->km_owner = NULL;
return 0;
}
EXPORT_SYMBOL(__spl_mutex_init);
void
__spl_mutex_destroy(kmutex_t *mp)
{
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
#ifdef DEBUG_MUTEX
spin_lock(&mutex_stats_lock);
list_del_init(&mp->km_list);
spin_unlock(&mutex_stats_lock);
kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
#endif
kmem_free(mp->km_name, mp->km_name_size);
kmem_free(mp->km_sem, sizeof(struct semaphore));
memset(mp, KM_POISON, sizeof(*mp));
}
EXPORT_SYMBOL(__spl_mutex_destroy);
/* Return 1 if we acquired the mutex, else zero. */
int
__mutex_tryenter(kmutex_t *mp)
{
int rc;
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
rc = down_trylock(mp->km_sem);
if (rc == 0) {
ASSERT(mp->km_owner == NULL);
mp->km_owner = current;
MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
}
RETURN(!rc);
}
EXPORT_SYMBOL(__mutex_tryenter);
#ifndef HAVE_TASK_CURR
#define task_curr(owner) 0
#endif
static void
mutex_enter_adaptive(kmutex_t *mp)
{
struct task_struct *owner;
int count = 0;
/* Lock is not held so we expect to aquire the lock */
if ((owner = mp->km_owner) == NULL) {
down(mp->km_sem);
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
} else {
/* The lock is held by a currently running task which
* we expect will drop the lock before leaving the
* head of the runqueue. So the ideal thing to do
* is spin until we aquire the lock and avoid a
* context switch. However it is also possible the
* task holding the lock yields the processor with
* out dropping lock. In which case, we know it's
* going to be a while so we stop spinning and go
* to sleep waiting for the lock to be available.
* This should strike the optimum balance between
* spinning and sleeping waiting for a lock.
*/
while (task_curr(owner) && (count <= mutex_spin_max)) {
if (down_trylock(mp->km_sem) == 0) {
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
GOTO(out, count);
}
count++;
}
/* The lock is held by a sleeping task so it's going to
* cost us minimally one context switch. We might as
* well sleep and yield the processor to other tasks.
*/
down(mp->km_sem);
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
}
out:
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
}
void
__mutex_enter(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
switch (mp->km_type) {
case MUTEX_SPIN:
while (down_trylock(mp->km_sem));
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
break;
case MUTEX_ADAPTIVE:
mutex_enter_adaptive(mp);
break;
}
ASSERT(mp->km_owner == NULL);
mp->km_owner = current;
EXIT;
}
EXPORT_SYMBOL(__mutex_enter);
void
__mutex_exit(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
ASSERT(mp->km_owner == current);
mp->km_owner = NULL;
up(mp->km_sem);
EXIT;
}
EXPORT_SYMBOL(__mutex_exit);
/* Return 1 if mutex is held by current process, else zero. */
int
__mutex_owned(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
RETURN(mp->km_owner == current);
}
EXPORT_SYMBOL(__mutex_owned);
/* Return owner if mutex is owned, else NULL. */
kthread_t *
__spl_mutex_owner(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
RETURN(mp->km_owner);
}
EXPORT_SYMBOL(__spl_mutex_owner);
int
spl_mutex_init(void)
{
ENTRY;
#ifdef DEBUG_MUTEX
spin_lock_init(&mutex_stats_lock);
INIT_LIST_HEAD(&mutex_stats_list);
#endif
RETURN(0);
}
void
spl_mutex_fini(void)
{
ENTRY;
#ifdef DEBUG_MUTEX
ASSERT(list_empty(&mutex_stats_list));
#endif
EXIT;
}
module_param(mutex_spin_max, int, 0644);
MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock");
File diff suppressed because it is too large Load Diff
+361
View File
@@ -0,0 +1,361 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/rwlock.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_RWLOCK
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
unsigned int flags;
#define RWSEM_WAITING_FOR_READ 0x00000001
#define RWSEM_WAITING_FOR_WRITE 0x00000002
};
/* wake a single writer */
static struct rw_semaphore *
__rwsem_wake_one_writer_locked(struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
sem->activity = -1;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
list_del(&waiter->list);
tsk = waiter->task;
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
return sem;
}
/* release a read lock on the semaphore */
static void
__up_read_locked(struct rw_semaphore *sem)
{
if (--sem->activity == 0 && !list_empty(&sem->wait_list))
(void)__rwsem_wake_one_writer_locked(sem);
}
/* trylock for writing -- returns 1 if successful, 0 if contention */
static int
__down_write_trylock_locked(struct rw_semaphore *sem)
{
int ret = 0;
if (sem->activity == 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->activity = -1;
ret = 1;
}
return ret;
}
#endif
void
__rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg)
{
int flags = KM_SLEEP;
ASSERT(rwlp);
ASSERT(name);
ASSERT(type == RW_DEFAULT); /* XXX no irq handler use */
ASSERT(arg == NULL); /* XXX no irq handler use */
rwlp->rw_magic = RW_MAGIC;
rwlp->rw_owner = NULL;
rwlp->rw_name = NULL;
rwlp->rw_name_size = strlen(name) + 1;
/* We may be called when there is a non-zero preempt_count or
* interrupts are disabled is which case we must not sleep.
*/
if (current_thread_info()->preempt_count || irqs_disabled())
flags = KM_NOSLEEP;
rwlp->rw_name = kmem_alloc(rwlp->rw_name_size, flags);
if (rwlp->rw_name == NULL)
return;
init_rwsem(&rwlp->rw_sem);
strcpy(rwlp->rw_name, name);
}
EXPORT_SYMBOL(__rw_init);
void
__rw_destroy(krwlock_t *rwlp)
{
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
ASSERT(rwlp->rw_owner == NULL);
spin_lock(&rwlp->rw_sem.wait_lock);
ASSERT(list_empty(&rwlp->rw_sem.wait_list));
spin_unlock(&rwlp->rw_sem.wait_lock);
kmem_free(rwlp->rw_name, rwlp->rw_name_size);
memset(rwlp, RW_POISON, sizeof(krwlock_t));
}
EXPORT_SYMBOL(__rw_destroy);
/* Return 0 if the lock could not be obtained without blocking. */
int
__rw_tryenter(krwlock_t *rwlp, krw_t rw)
{
int rc = 0;
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
switch (rw) {
/* these functions return 1 if success, 0 if contention */
case RW_READER:
/* Here the Solaris code would return 0
* if there were any write waiters. Specifically
* thinking about the case where readers may have
* the lock and we would also allow this thread
* to grab the read lock with a writer waiting in the
* queue. This doesn't seem like a correctness
* issue, so just call down_read_trylock()
* for the test. We may have to revisit this if
* it becomes an issue */
rc = down_read_trylock(&rwlp->rw_sem);
break;
case RW_WRITER:
rc = down_write_trylock(&rwlp->rw_sem);
if (rc) {
/* there better not be anyone else
* holding the write lock here */
ASSERT(rwlp->rw_owner == NULL);
rwlp->rw_owner = current;
}
break;
default:
SBUG();
}
RETURN(rc);
}
EXPORT_SYMBOL(__rw_tryenter);
void
__rw_enter(krwlock_t *rwlp, krw_t rw)
{
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
switch (rw) {
case RW_READER:
/* Here the Solaris code would block
* if there were any write waiters. Specifically
* thinking about the case where readers may have
* the lock and we would also allow this thread
* to grab the read lock with a writer waiting in the
* queue. This doesn't seem like a correctness
* issue, so just call down_read()
* for the test. We may have to revisit this if
* it becomes an issue */
down_read(&rwlp->rw_sem);
break;
case RW_WRITER:
down_write(&rwlp->rw_sem);
/* there better not be anyone else
* holding the write lock here */
ASSERT(rwlp->rw_owner == NULL);
rwlp->rw_owner = current;
break;
default:
SBUG();
}
EXIT;
}
EXPORT_SYMBOL(__rw_enter);
void
__rw_exit(krwlock_t *rwlp)
{
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
/* rw_owner is held by current
* thread iff it is a writer */
if (rwlp->rw_owner == current) {
rwlp->rw_owner = NULL;
up_write(&rwlp->rw_sem);
} else {
up_read(&rwlp->rw_sem);
}
EXIT;
}
EXPORT_SYMBOL(__rw_exit);
void
__rw_downgrade(krwlock_t *rwlp)
{
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
ASSERT(rwlp->rw_owner == current);
rwlp->rw_owner = NULL;
downgrade_write(&rwlp->rw_sem);
EXIT;
}
EXPORT_SYMBOL(__rw_downgrade);
/* Return 0 if unable to perform the upgrade.
* Might be wise to fix the caller
* to acquire the write lock first?
*/
int
__rw_tryupgrade(krwlock_t *rwlp)
{
int rc = 0;
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
spin_lock(&rwlp->rw_sem.wait_lock);
/* Check if there is anyone waiting for the
* lock. If there is, then we know we should
* not try to upgrade the lock */
if (!list_empty(&rwlp->rw_sem.wait_list)) {
spin_unlock(&rwlp->rw_sem.wait_lock);
RETURN(0);
}
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
/* Note that activity is protected by
* the wait_lock. Don't try to upgrade
* if there are multiple readers currently
* holding the lock */
if (rwlp->rw_sem.activity > 1) {
#else
/* Don't try to upgrade
* if there are multiple readers currently
* holding the lock */
if ((rwlp->rw_sem.count & RWSEM_ACTIVE_MASK) > 1) {
#endif
spin_unlock(&rwlp->rw_sem.wait_lock);
RETURN(0);
}
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
/* Here it should be safe to drop the
* read lock and reacquire it for writing since
* we know there are no waiters */
__up_read_locked(&rwlp->rw_sem);
/* returns 1 if success, 0 if contention */
rc = __down_write_trylock_locked(&rwlp->rw_sem);
#else
/* Here it should be safe to drop the
* read lock and reacquire it for writing since
* we know there are no waiters */
up_read(&rwlp->rw_sem);
/* returns 1 if success, 0 if contention */
rc = down_write_trylock(&rwlp->rw_sem);
#endif
/* Check if upgrade failed. Should not ever happen
* if we got to this point */
ASSERT(rc);
ASSERT(rwlp->rw_owner == NULL);
rwlp->rw_owner = current;
spin_unlock(&rwlp->rw_sem.wait_lock);
RETURN(1);
}
EXPORT_SYMBOL(__rw_tryupgrade);
kthread_t *
__rw_owner(krwlock_t *rwlp)
{
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
RETURN(rwlp->rw_owner);
}
EXPORT_SYMBOL(__rw_owner);
int
__rw_read_held(krwlock_t *rwlp)
{
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
RETURN(__rw_lock_held(rwlp) && rwlp->rw_owner == NULL);
}
EXPORT_SYMBOL(__rw_read_held);
int
__rw_write_held(krwlock_t *rwlp)
{
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
RETURN(rwlp->rw_owner == current);
}
EXPORT_SYMBOL(__rw_write_held);
int
__rw_lock_held(krwlock_t *rwlp)
{
int rc = 0;
ENTRY;
ASSERT(rwlp);
ASSERT(rwlp->rw_magic == RW_MAGIC);
spin_lock_irq(&(rwlp->rw_sem.wait_lock));
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
if (rwlp->rw_sem.activity != 0) {
#else
if (rwlp->rw_sem.count != 0) {
#endif
rc = 1;
}
spin_unlock_irq(&(rwlp->rw_sem.wait_lock));
RETURN(rc);
}
EXPORT_SYMBOL(__rw_lock_held);
+491
View File
@@ -0,0 +1,491 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/taskq.h>
#include <sys/kmem.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_TASKQ
/* Global system-wide dynamic task queue available for all consumers */
taskq_t *system_taskq;
EXPORT_SYMBOL(system_taskq);
typedef struct spl_task {
spinlock_t t_lock;
struct list_head t_list;
taskqid_t t_id;
task_func_t *t_func;
void *t_arg;
} spl_task_t;
/* NOTE: Must be called with tq->tq_lock held, returns a list_t which
* is not attached to the free, work, or pending taskq lists.
*/
static spl_task_t *
task_alloc(taskq_t *tq, uint_t flags)
{
spl_task_t *t;
int count = 0;
ENTRY;
ASSERT(tq);
ASSERT(flags & (TQ_SLEEP | TQ_NOSLEEP)); /* One set */
ASSERT(!((flags & TQ_SLEEP) && (flags & TQ_NOSLEEP))); /* Not both */
ASSERT(spin_is_locked(&tq->tq_lock));
retry:
/* Aquire spl_task_t's from free list if available */
if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
t = list_entry(tq->tq_free_list.next, spl_task_t, t_list);
list_del_init(&t->t_list);
RETURN(t);
}
/* Free list is empty and memory allocs are prohibited */
if (flags & TQ_NOALLOC)
RETURN(NULL);
/* Hit maximum spl_task_t pool size */
if (tq->tq_nalloc >= tq->tq_maxalloc) {
if (flags & TQ_NOSLEEP)
RETURN(NULL);
/* Sleep periodically polling the free list for an available
* spl_task_t. If a full second passes and we have not found
* one gives up and return a NULL to the caller. */
if (flags & TQ_SLEEP) {
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
schedule_timeout(HZ / 100);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
if (count < 100)
GOTO(retry, count++);
RETURN(NULL);
}
/* Unreachable, TQ_SLEEP xor TQ_NOSLEEP */
SBUG();
}
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
t = kmem_alloc(sizeof(spl_task_t), flags & (TQ_SLEEP | TQ_NOSLEEP));
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
if (t) {
spin_lock_init(&t->t_lock);
INIT_LIST_HEAD(&t->t_list);
t->t_id = 0;
t->t_func = NULL;
t->t_arg = NULL;
tq->tq_nalloc++;
}
RETURN(t);
}
/* NOTE: Must be called with tq->tq_lock held, expectes the spl_task_t
* to already be removed from the free, work, or pending taskq lists.
*/
static void
task_free(taskq_t *tq, spl_task_t *t)
{
ENTRY;
ASSERT(tq);
ASSERT(t);
ASSERT(spin_is_locked(&tq->tq_lock));
ASSERT(list_empty(&t->t_list));
kmem_free(t, sizeof(spl_task_t));
tq->tq_nalloc--;
EXIT;
}
/* NOTE: Must be called with tq->tq_lock held, either destroyes the
* spl_task_t if too many exist or moves it to the free list for later use.
*/
static void
task_done(taskq_t *tq, spl_task_t *t)
{
ENTRY;
ASSERT(tq);
ASSERT(t);
ASSERT(spin_is_locked(&tq->tq_lock));
list_del_init(&t->t_list);
if (tq->tq_nalloc <= tq->tq_minalloc) {
t->t_id = 0;
t->t_func = NULL;
t->t_arg = NULL;
list_add_tail(&t->t_list, &tq->tq_free_list);
} else {
task_free(tq, t);
}
EXIT;
}
/* Taskqid's are handed out in a monotonically increasing fashion per
* taskq_t. We don't handle taskqid wrapping yet, but fortuntely it isi
* a 64-bit value so this is probably never going to happen. The lowest
* pending taskqid is stored in the taskq_t to make it easy for any
* taskq_wait()'ers to know if the tasks they're waiting for have
* completed. Unfortunately, tq_task_lowest is kept up to date is
* a pretty brain dead way, something more clever should be done.
*/
static int
taskq_wait_check(taskq_t *tq, taskqid_t id)
{
RETURN(tq->tq_lowest_id >= id);
}
/* Expected to wait for all previously scheduled tasks to complete. We do
* not need to wait for tasked scheduled after this call to complete. In
* otherwords we do not need to drain the entire taskq. */
void
__taskq_wait_id(taskq_t *tq, taskqid_t id)
{
ENTRY;
ASSERT(tq);
wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id));
EXIT;
}
EXPORT_SYMBOL(__taskq_wait_id);
void
__taskq_wait(taskq_t *tq)
{
taskqid_t id;
ENTRY;
ASSERT(tq);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
id = tq->tq_next_id;
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
__taskq_wait_id(tq, id);
EXIT;
}
EXPORT_SYMBOL(__taskq_wait);
int
__taskq_member(taskq_t *tq, void *t)
{
int i;
ENTRY;
ASSERT(tq);
ASSERT(t);
for (i = 0; i < tq->tq_nthreads; i++)
if (tq->tq_threads[i] == (struct task_struct *)t)
RETURN(1);
RETURN(0);
}
EXPORT_SYMBOL(__taskq_member);
taskqid_t
__taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
{
spl_task_t *t;
taskqid_t rc = 0;
ENTRY;
ASSERT(tq);
ASSERT(func);
if (unlikely(in_atomic() && (flags & TQ_SLEEP))) {
CERROR("May schedule while atomic: %s/0x%08x/%d\n",
current->comm, preempt_count(), current->pid);
SBUG();
}
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
/* Taskq being destroyed and all tasks drained */
if (!(tq->tq_flags & TQ_ACTIVE))
GOTO(out, rc = 0);
/* Do not queue the task unless there is idle thread for it */
ASSERT(tq->tq_nactive <= tq->tq_nthreads);
if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
GOTO(out, rc = 0);
if ((t = task_alloc(tq, flags)) == NULL)
GOTO(out, rc = 0);
spin_lock(&t->t_lock);
list_add_tail(&t->t_list, &tq->tq_pend_list);
t->t_id = rc = tq->tq_next_id;
tq->tq_next_id++;
t->t_func = func;
t->t_arg = arg;
spin_unlock(&t->t_lock);
wake_up(&tq->tq_work_waitq);
out:
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
RETURN(rc);
}
EXPORT_SYMBOL(__taskq_dispatch);
/* NOTE: Must be called with tq->tq_lock held */
static taskqid_t
taskq_lowest_id(taskq_t *tq)
{
taskqid_t lowest_id = ~0;
spl_task_t *t;
ENTRY;
ASSERT(tq);
ASSERT(spin_is_locked(&tq->tq_lock));
list_for_each_entry(t, &tq->tq_pend_list, t_list)
if (t->t_id < lowest_id)
lowest_id = t->t_id;
list_for_each_entry(t, &tq->tq_work_list, t_list)
if (t->t_id < lowest_id)
lowest_id = t->t_id;
RETURN(lowest_id);
}
static int
taskq_thread(void *args)
{
DECLARE_WAITQUEUE(wait, current);
sigset_t blocked;
taskqid_t id;
taskq_t *tq = args;
spl_task_t *t;
ENTRY;
ASSERT(tq);
current->flags |= PF_NOFREEZE;
sigfillset(&blocked);
sigprocmask(SIG_BLOCK, &blocked, NULL);
flush_signals(current);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
tq->tq_nthreads++;
wake_up(&tq->tq_wait_waitq);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
add_wait_queue(&tq->tq_work_waitq, &wait);
if (list_empty(&tq->tq_pend_list)) {
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
schedule();
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
} else {
__set_current_state(TASK_RUNNING);
}
remove_wait_queue(&tq->tq_work_waitq, &wait);
if (!list_empty(&tq->tq_pend_list)) {
t = list_entry(tq->tq_pend_list.next, spl_task_t, t_list);
list_del_init(&t->t_list);
list_add_tail(&t->t_list, &tq->tq_work_list);
tq->tq_nactive++;
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
/* Perform the requested task */
t->t_func(t->t_arg);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
tq->tq_nactive--;
id = t->t_id;
task_done(tq, t);
/* Update the lowest remaining taskqid yet to run */
if (tq->tq_lowest_id == id) {
tq->tq_lowest_id = taskq_lowest_id(tq);
ASSERT(tq->tq_lowest_id > id);
}
wake_up_all(&tq->tq_wait_waitq);
}
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
tq->tq_nthreads--;
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
RETURN(0);
}
taskq_t *
__taskq_create(const char *name, int nthreads, pri_t pri,
int minalloc, int maxalloc, uint_t flags)
{
taskq_t *tq;
struct task_struct *t;
int rc = 0, i, j = 0;
ENTRY;
ASSERT(name != NULL);
ASSERT(pri <= maxclsyspri);
ASSERT(minalloc >= 0);
ASSERT(maxalloc <= INT_MAX);
ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */
tq = kmem_alloc(sizeof(*tq), KM_SLEEP);
if (tq == NULL)
RETURN(NULL);
tq->tq_threads = kmem_alloc(nthreads * sizeof(t), KM_SLEEP);
if (tq->tq_threads == NULL) {
kmem_free(tq, sizeof(*tq));
RETURN(NULL);
}
spin_lock_init(&tq->tq_lock);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
tq->tq_name = name;
tq->tq_nactive = 0;
tq->tq_nthreads = 0;
tq->tq_pri = pri;
tq->tq_minalloc = minalloc;
tq->tq_maxalloc = maxalloc;
tq->tq_nalloc = 0;
tq->tq_flags = (flags | TQ_ACTIVE);
tq->tq_next_id = 1;
tq->tq_lowest_id = 1;
INIT_LIST_HEAD(&tq->tq_free_list);
INIT_LIST_HEAD(&tq->tq_work_list);
INIT_LIST_HEAD(&tq->tq_pend_list);
init_waitqueue_head(&tq->tq_work_waitq);
init_waitqueue_head(&tq->tq_wait_waitq);
if (flags & TASKQ_PREPOPULATE)
for (i = 0; i < minalloc; i++)
task_done(tq, task_alloc(tq, TQ_SLEEP | TQ_NEW));
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
for (i = 0; i < nthreads; i++) {
t = kthread_create(taskq_thread, tq, "%s/%d", name, i);
if (t) {
tq->tq_threads[i] = t;
kthread_bind(t, i % num_online_cpus());
set_user_nice(t, PRIO_TO_NICE(pri));
wake_up_process(t);
j++;
} else {
tq->tq_threads[i] = NULL;
rc = 1;
}
}
/* Wait for all threads to be started before potential destroy */
wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
if (rc) {
__taskq_destroy(tq);
tq = NULL;
}
RETURN(tq);
}
EXPORT_SYMBOL(__taskq_create);
void
__taskq_destroy(taskq_t *tq)
{
spl_task_t *t;
int i, nthreads;
ENTRY;
ASSERT(tq);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
tq->tq_flags &= ~TQ_ACTIVE;
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
/* TQ_ACTIVE cleared prevents new tasks being added to pending */
__taskq_wait(tq);
nthreads = tq->tq_nthreads;
for (i = 0; i < nthreads; i++)
if (tq->tq_threads[i])
kthread_stop(tq->tq_threads[i]);
spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
while (!list_empty(&tq->tq_free_list)) {
t = list_entry(tq->tq_free_list.next, spl_task_t, t_list);
list_del_init(&t->t_list);
task_free(tq, t);
}
ASSERT(tq->tq_nthreads == 0);
ASSERT(tq->tq_nalloc == 0);
ASSERT(list_empty(&tq->tq_free_list));
ASSERT(list_empty(&tq->tq_work_list));
ASSERT(list_empty(&tq->tq_pend_list));
spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
kmem_free(tq->tq_threads, nthreads * sizeof(spl_task_t *));
kmem_free(tq, sizeof(taskq_t));
EXIT;
}
EXPORT_SYMBOL(__taskq_destroy);
int
spl_taskq_init(void)
{
ENTRY;
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
TASKQ_PREPOPULATE);
if (system_taskq == NULL)
RETURN(1);
RETURN(0);
}
void
spl_taskq_fini(void)
{
ENTRY;
taskq_destroy(system_taskq);
EXIT;
}
+135
View File
@@ -0,0 +1,135 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/thread.h>
#include <sys/kmem.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_THREAD
/*
* Thread interfaces
*/
typedef struct thread_priv_s {
unsigned long tp_magic; /* Magic */
int tp_name_size; /* Name size */
char *tp_name; /* Name (without _thread suffix) */
void (*tp_func)(void *); /* Registered function */
void *tp_args; /* Args to be passed to function */
size_t tp_len; /* Len to be passed to function */
int tp_state; /* State to start thread at */
pri_t tp_pri; /* Priority to start threat at */
} thread_priv_t;
static int
thread_generic_wrapper(void *arg)
{
thread_priv_t *tp = (thread_priv_t *)arg;
void (*func)(void *);
void *args;
ASSERT(tp->tp_magic == TP_MAGIC);
func = tp->tp_func;
args = tp->tp_args;
set_current_state(tp->tp_state);
set_user_nice((kthread_t *)get_current(), PRIO_TO_NICE(tp->tp_pri));
kmem_free(tp->tp_name, tp->tp_name_size);
kmem_free(tp, sizeof(thread_priv_t));
if (func)
func(args);
return 0;
}
void
__thread_exit(void)
{
ENTRY;
EXIT;
complete_and_exit(NULL, 0);
/* Unreachable */
}
EXPORT_SYMBOL(__thread_exit);
/* thread_create() may block forever if it cannot create a thread or
* allocate memory. This is preferable to returning a NULL which Solaris
* style callers likely never check for... since it can't fail. */
kthread_t *
__thread_create(caddr_t stk, size_t stksize, thread_func_t func,
const char *name, void *args, size_t len, int *pp,
int state, pri_t pri)
{
thread_priv_t *tp;
struct task_struct *tsk;
char *p;
ENTRY;
/* Option pp is simply ignored */
/* Variable stack size unsupported */
ASSERT(stk == NULL);
tp = kmem_alloc(sizeof(thread_priv_t), KM_SLEEP);
if (tp == NULL)
RETURN(NULL);
tp->tp_magic = TP_MAGIC;
tp->tp_name_size = strlen(name) + 1;
tp->tp_name = kmem_alloc(tp->tp_name_size, KM_SLEEP);
if (tp->tp_name == NULL) {
kmem_free(tp, sizeof(thread_priv_t));
RETURN(NULL);
}
strncpy(tp->tp_name, name, tp->tp_name_size);
/* Strip trailing "_thread" from passed name which will be the func
* name since the exposed API has no parameter for passing a name.
*/
p = strstr(tp->tp_name, "_thread");
if (p)
p[0] = '\0';
tp->tp_func = func;
tp->tp_args = args;
tp->tp_len = len;
tp->tp_state = state;
tp->tp_pri = pri;
tsk = kthread_create(thread_generic_wrapper, (void *)tp, tp->tp_name);
if (IS_ERR(tsk)) {
CERROR("Failed to create thread: %ld\n", PTR_ERR(tsk));
RETURN(NULL);
}
wake_up_process(tsk);
RETURN((kthread_t *)tsk);
}
EXPORT_SYMBOL(__thread_create);
+92
View File
@@ -0,0 +1,92 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/sysmacros.h>
#include <sys/time.h>
#ifdef HAVE_MONOTONIC_CLOCK
extern unsigned long long monotonic_clock(void);
#endif
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_TIME
void
__gethrestime(timestruc_t *ts)
{
struct timeval tv;
do_gettimeofday(&tv);
ts->tv_sec = tv.tv_sec;
ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
}
EXPORT_SYMBOL(__gethrestime);
/* Use monotonic_clock() by default. It's faster and is available on older
* kernels, but few architectures have them, so we must fallback to
* do_posix_clock_monotonic_gettime().
*/
hrtime_t
__gethrtime(void) {
#ifdef HAVE_MONOTONIC_CLOCK
unsigned long long res = monotonic_clock();
/* Deal with signed/unsigned mismatch */
return (hrtime_t)(res & ~(1ULL << 63));
#else
int64_t j = get_jiffies_64();
return j * NSEC_PER_SEC / HZ;
#endif
}
EXPORT_SYMBOL(__gethrtime);
/* set_normalized_timespec() API changes
* 2.6.0 - 2.6.15: Inline function provided by linux/time.h
* 2.6.16 - 2.6.25: Function prototype defined but not exported
* 2.6.26 - 2.6.x: Function defined and exported
*/
#if !defined(HAVE_SET_NORMALIZED_TIMESPEC_INLINE) && \
!defined(HAVE_SET_NORMALIZED_TIMESPEC_EXPORT)
void
set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
EXPORT_SYMBOL(set_normalized_timespec);
#endif
+678
View File
@@ -0,0 +1,678 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <sys/sysmacros.h>
#include <sys/vnode.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_VNODE
void *rootdir = NULL;
EXPORT_SYMBOL(rootdir);
static spl_kmem_cache_t *vn_cache;
static spl_kmem_cache_t *vn_file_cache;
static spinlock_t vn_file_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(vn_file_list);
static vtype_t
vn_get_sol_type(umode_t mode)
{
if (S_ISREG(mode))
return VREG;
if (S_ISDIR(mode))
return VDIR;
if (S_ISCHR(mode))
return VCHR;
if (S_ISBLK(mode))
return VBLK;
if (S_ISFIFO(mode))
return VFIFO;
if (S_ISLNK(mode))
return VLNK;
if (S_ISSOCK(mode))
return VSOCK;
if (S_ISCHR(mode))
return VCHR;
return VNON;
} /* vn_get_sol_type() */
vnode_t *
vn_alloc(int flag)
{
vnode_t *vp;
ENTRY;
vp = kmem_cache_alloc(vn_cache, flag);
if (vp != NULL) {
vp->v_file = NULL;
vp->v_type = 0;
}
RETURN(vp);
} /* vn_alloc() */
EXPORT_SYMBOL(vn_alloc);
void
vn_free(vnode_t *vp)
{
ENTRY;
kmem_cache_free(vn_cache, vp);
EXIT;
} /* vn_free() */
EXPORT_SYMBOL(vn_free);
int
vn_open(const char *path, uio_seg_t seg, int flags, int mode,
vnode_t **vpp, int x1, void *x2)
{
struct file *fp;
struct kstat stat;
int rc, saved_umask = 0;
vnode_t *vp;
ENTRY;
ASSERT(flags & (FWRITE | FREAD));
ASSERT(seg == UIO_SYSSPACE);
ASSERT(vpp);
*vpp = NULL;
if (!(flags & FCREAT) && (flags & FWRITE))
flags |= FEXCL;
/* Note for filp_open() the two low bits must be remapped to mean:
* 01 - read-only -> 00 read-only
* 10 - write-only -> 01 write-only
* 11 - read-write -> 10 read-write
*/
flags--;
if (flags & FCREAT)
saved_umask = xchg(&current->fs->umask, 0);
fp = filp_open(path, flags, mode);
if (flags & FCREAT)
(void)xchg(&current->fs->umask, saved_umask);
if (IS_ERR(fp))
RETURN(-PTR_ERR(fp));
rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat);
if (rc) {
filp_close(fp, 0);
RETURN(-rc);
}
vp = vn_alloc(KM_SLEEP);
if (!vp) {
filp_close(fp, 0);
RETURN(ENOMEM);
}
mutex_enter(&vp->v_lock);
vp->v_type = vn_get_sol_type(stat.mode);
vp->v_file = fp;
*vpp = vp;
mutex_exit(&vp->v_lock);
RETURN(0);
} /* vn_open() */
EXPORT_SYMBOL(vn_open);
int
vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
{
char *realpath;
int len, rc;
ENTRY;
ASSERT(vp == rootdir);
len = strlen(path) + 2;
realpath = kmalloc(len, GFP_KERNEL);
if (!realpath)
RETURN(ENOMEM);
(void)snprintf(realpath, len, "/%s", path);
rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
kfree(realpath);
RETURN(rc);
} /* vn_openat() */
EXPORT_SYMBOL(vn_openat);
int
vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
uio_seg_t seg, int x1, rlim64_t x2, void *x3, ssize_t *residp)
{
loff_t offset;
mm_segment_t saved_fs;
struct file *fp;
int rc;
ENTRY;
ASSERT(uio == UIO_WRITE || uio == UIO_READ);
ASSERT(vp);
ASSERT(vp->v_file);
ASSERT(seg == UIO_SYSSPACE);
ASSERT(x1 == 0);
ASSERT(x2 == RLIM64_INFINITY);
offset = off;
fp = vp->v_file;
/* Writable user data segment must be briefly increased for this
* process so we can use the user space read call paths to write
* in to memory allocated by the kernel. */
saved_fs = get_fs();
set_fs(get_ds());
if (uio & UIO_WRITE)
rc = vfs_write(fp, addr, len, &offset);
else
rc = vfs_read(fp, addr, len, &offset);
set_fs(saved_fs);
if (rc < 0)
RETURN(-rc);
if (residp) {
*residp = len - rc;
} else {
if (rc != len)
RETURN(EIO);
}
RETURN(0);
} /* vn_rdwr() */
EXPORT_SYMBOL(vn_rdwr);
int
vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
{
int rc;
ENTRY;
ASSERT(vp);
ASSERT(vp->v_file);
rc = filp_close(vp->v_file, 0);
vn_free(vp);
RETURN(-rc);
} /* vn_close() */
EXPORT_SYMBOL(vn_close);
/* vn_seek() does not actually seek it only performs bounds checking on the
* proposed seek. We perform minimal checking and allow vn_rdwr() to catch
* anything more serious. */
int
vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
{
return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
}
EXPORT_SYMBOL(vn_seek);
static struct dentry *
vn_lookup_hash(struct nameidata *nd)
{
return lookup_one_len(nd->last.name, nd->nd_dentry, nd->last.len);
} /* lookup_hash() */
static void
vn_path_release(struct nameidata *nd)
{
dput(nd->nd_dentry);
mntput(nd->nd_mnt);
}
/* Modified do_unlinkat() from linux/fs/namei.c, only uses exported symbols */
int
vn_remove(const char *path, uio_seg_t seg, int flags)
{
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
int rc = 0;
ENTRY;
ASSERT(seg == UIO_SYSSPACE);
ASSERT(flags == RMFILE);
rc = path_lookup(path, LOOKUP_PARENT, &nd);
if (rc)
GOTO(exit, rc);
rc = -EISDIR;
if (nd.last_type != LAST_NORM)
GOTO(exit1, rc);
#ifdef HAVE_INODE_I_MUTEX
mutex_lock_nested(&nd.nd_dentry->d_inode->i_mutex, I_MUTEX_PARENT);
#else
down(&nd.nd_dentry->d_inode->i_sem);
#endif
dentry = vn_lookup_hash(&nd);
rc = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct rc value */
if (nd.last.name[nd.last.len])
GOTO(slashes, rc);
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
rc = vfs_unlink(nd.nd_dentry->d_inode, dentry);
exit2:
dput(dentry);
}
#ifdef HAVE_INODE_I_MUTEX
mutex_unlock(&nd.nd_dentry->d_inode->i_mutex);
#else
up(&nd.nd_dentry->d_inode->i_sem);
#endif
if (inode)
iput(inode); /* truncate the inode here */
exit1:
vn_path_release(&nd);
exit:
RETURN(-rc);
slashes:
rc = !dentry->d_inode ? -ENOENT :
S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
GOTO(exit2, rc);
} /* vn_remove() */
EXPORT_SYMBOL(vn_remove);
/* Modified do_rename() from linux/fs/namei.c, only uses exported symbols */
int
vn_rename(const char *oldname, const char *newname, int x1)
{
struct dentry * old_dir, * new_dir;
struct dentry * old_dentry, *new_dentry;
struct dentry * trap;
struct nameidata oldnd, newnd;
int rc = 0;
ENTRY;
rc = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
if (rc)
GOTO(exit, rc);
rc = path_lookup(newname, LOOKUP_PARENT, &newnd);
if (rc)
GOTO(exit1, rc);
rc = -EXDEV;
if (oldnd.nd_mnt != newnd.nd_mnt)
GOTO(exit2, rc);
old_dir = oldnd.nd_dentry;
rc = -EBUSY;
if (oldnd.last_type != LAST_NORM)
GOTO(exit2, rc);
new_dir = newnd.nd_dentry;
if (newnd.last_type != LAST_NORM)
GOTO(exit2, rc);
trap = lock_rename(new_dir, old_dir);
old_dentry = vn_lookup_hash(&oldnd);
rc = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
GOTO(exit3, rc);
/* source must exist */
rc = -ENOENT;
if (!old_dentry->d_inode)
GOTO(exit4, rc);
/* unless the source is a directory trailing slashes give -ENOTDIR */
if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
rc = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
GOTO(exit4, rc);
if (newnd.last.name[newnd.last.len])
GOTO(exit4, rc);
}
/* source should not be ancestor of target */
rc = -EINVAL;
if (old_dentry == trap)
GOTO(exit4, rc);
new_dentry = vn_lookup_hash(&newnd);
rc = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
GOTO(exit4, rc);
/* target should not be an ancestor of source */
rc = -ENOTEMPTY;
if (new_dentry == trap)
GOTO(exit5, rc);
rc = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
exit2:
vn_path_release(&newnd);
exit1:
vn_path_release(&oldnd);
exit:
RETURN(-rc);
}
EXPORT_SYMBOL(vn_rename);
int
vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
{
struct file *fp;
struct kstat stat;
int rc;
ENTRY;
ASSERT(vp);
ASSERT(vp->v_file);
ASSERT(vap);
fp = vp->v_file;
rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat);
if (rc)
RETURN(-rc);
vap->va_type = vn_get_sol_type(stat.mode);
vap->va_mode = stat.mode;
vap->va_uid = stat.uid;
vap->va_gid = stat.gid;
vap->va_fsid = 0;
vap->va_nodeid = stat.ino;
vap->va_nlink = stat.nlink;
vap->va_size = stat.size;
vap->va_blocksize = stat.blksize;
vap->va_atime.tv_sec = stat.atime.tv_sec;
vap->va_atime.tv_usec = stat.atime.tv_nsec / NSEC_PER_USEC;
vap->va_mtime.tv_sec = stat.mtime.tv_sec;
vap->va_mtime.tv_usec = stat.mtime.tv_nsec / NSEC_PER_USEC;
vap->va_ctime.tv_sec = stat.ctime.tv_sec;
vap->va_ctime.tv_usec = stat.ctime.tv_nsec / NSEC_PER_USEC;
vap->va_rdev = stat.rdev;
vap->va_blocks = stat.blocks;
RETURN(0);
}
EXPORT_SYMBOL(vn_getattr);
int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
{
int datasync = 0;
ENTRY;
ASSERT(vp);
ASSERT(vp->v_file);
if (flags & FDSYNC)
datasync = 1;
RETURN(-file_fsync(vp->v_file, vp->v_file->f_dentry, datasync));
} /* vn_fsync() */
EXPORT_SYMBOL(vn_fsync);
/* Function must be called while holding the vn_file_lock */
static file_t *
file_find(int fd)
{
file_t *fp;
ASSERT(spin_is_locked(&vn_file_lock));
list_for_each_entry(fp, &vn_file_list, f_list) {
if (fd == fp->f_fd) {
ASSERT(atomic_read(&fp->f_ref) != 0);
return fp;
}
}
return NULL;
} /* file_find() */
file_t *
vn_getf(int fd)
{
struct kstat stat;
struct file *lfp;
file_t *fp;
vnode_t *vp;
int rc = 0;
ENTRY;
/* Already open just take an extra reference */
spin_lock(&vn_file_lock);
fp = file_find(fd);
if (fp) {
atomic_inc(&fp->f_ref);
spin_unlock(&vn_file_lock);
RETURN(fp);
}
spin_unlock(&vn_file_lock);
/* File was not yet opened create the object and setup */
fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
if (fp == NULL)
GOTO(out, rc);
mutex_enter(&fp->f_lock);
fp->f_fd = fd;
fp->f_offset = 0;
atomic_inc(&fp->f_ref);
lfp = fget(fd);
if (lfp == NULL)
GOTO(out_mutex, rc);
vp = vn_alloc(KM_SLEEP);
if (vp == NULL)
GOTO(out_fget, rc);
if (vfs_getattr(lfp->f_vfsmnt, lfp->f_dentry, &stat))
GOTO(out_vnode, rc);
mutex_enter(&vp->v_lock);
vp->v_type = vn_get_sol_type(stat.mode);
vp->v_file = lfp;
mutex_exit(&vp->v_lock);
fp->f_vnode = vp;
fp->f_file = lfp;
/* Put it on the tracking list */
spin_lock(&vn_file_lock);
list_add(&fp->f_list, &vn_file_list);
spin_unlock(&vn_file_lock);
mutex_exit(&fp->f_lock);
RETURN(fp);
out_vnode:
vn_free(vp);
out_fget:
fput(lfp);
out_mutex:
mutex_exit(&fp->f_lock);
kmem_cache_free(vn_file_cache, fp);
out:
RETURN(NULL);
} /* getf() */
EXPORT_SYMBOL(getf);
static void releasef_locked(file_t *fp)
{
ASSERT(fp->f_file);
ASSERT(fp->f_vnode);
/* Unlinked from list, no refs, safe to free outside mutex */
fput(fp->f_file);
vn_free(fp->f_vnode);
kmem_cache_free(vn_file_cache, fp);
}
void
vn_releasef(int fd)
{
file_t *fp;
ENTRY;
spin_lock(&vn_file_lock);
fp = file_find(fd);
if (fp) {
atomic_dec(&fp->f_ref);
if (atomic_read(&fp->f_ref) > 0) {
spin_unlock(&vn_file_lock);
EXIT;
return;
}
list_del(&fp->f_list);
releasef_locked(fp);
}
spin_unlock(&vn_file_lock);
EXIT;
return;
} /* releasef() */
EXPORT_SYMBOL(releasef);
static int
vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
{
struct vnode *vp = buf;
mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
return (0);
} /* vn_cache_constructor() */
static void
vn_cache_destructor(void *buf, void *cdrarg)
{
struct vnode *vp = buf;
mutex_destroy(&vp->v_lock);
} /* vn_cache_destructor() */
static int
vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
{
file_t *fp = buf;
atomic_set(&fp->f_ref, 0);
mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
INIT_LIST_HEAD(&fp->f_list);
return (0);
} /* file_cache_constructor() */
static void
vn_file_cache_destructor(void *buf, void *cdrarg)
{
file_t *fp = buf;
mutex_destroy(&fp->f_lock);
} /* vn_file_cache_destructor() */
int
vn_init(void)
{
ENTRY;
vn_cache = kmem_cache_create("spl_vn_cache",
sizeof(struct vnode), 64,
vn_cache_constructor,
vn_cache_destructor,
NULL, NULL, NULL, 0);
vn_file_cache = kmem_cache_create("spl_vn_file_cache",
sizeof(file_t), 64,
vn_file_cache_constructor,
vn_file_cache_destructor,
NULL, NULL, NULL, 0);
RETURN(0);
} /* vn_init() */
void
vn_fini(void)
{
file_t *fp, *next_fp;
int leaked = 0;
ENTRY;
spin_lock(&vn_file_lock);
list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
list_del(&fp->f_list);
releasef_locked(fp);
leaked++;
}
kmem_cache_destroy(vn_file_cache);
vn_file_cache = NULL;
spin_unlock(&vn_file_lock);
if (leaked > 0)
CWARN("Warning %d files leaked\n", leaked);
kmem_cache_destroy(vn_cache);
EXIT;
return;
} /* vn_fini() */