2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
/*
|
2010-08-27 01:24:34 +04:00
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
2008-11-20 23:01:55 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <poll.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <zlib.h>
|
2010-08-26 21:43:27 +04:00
|
|
|
#include <sys/signal.h>
|
2008-11-20 23:01:55 +03:00
|
|
|
#include <sys/spa.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/processor.h>
|
|
|
|
#include <sys/zfs_context.h>
|
2013-09-04 16:00:57 +04:00
|
|
|
#include <sys/rrwlock.h>
|
2008-11-20 23:01:55 +03:00
|
|
|
#include <sys/utsname.h>
|
2010-08-26 22:56:53 +04:00
|
|
|
#include <sys/time.h>
|
2009-02-18 23:51:31 +03:00
|
|
|
#include <sys/systeminfo.h>
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Emulation of kernel services in userland.
|
|
|
|
*/
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
int aok;
|
2008-11-20 23:01:55 +03:00
|
|
|
uint64_t physmem;
|
|
|
|
vnode_t *rootdir = (vnode_t *)0xabcd1234;
|
2009-02-18 23:51:31 +03:00
|
|
|
char hw_serial[HW_HOSTID_LEN];
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
struct utsname utsname = {
|
|
|
|
"userland", "libzpool", "1", "1", "na"
|
|
|
|
};
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
/* this only exists to have its address taken */
|
|
|
|
struct proc p0;
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* threads
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
2010-08-26 21:43:27 +04:00
|
|
|
|
|
|
|
pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
|
|
|
|
pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
pthread_key_t kthread_key;
|
|
|
|
int kthread_nr = 0;
|
|
|
|
|
|
|
|
static void
|
|
|
|
thread_init(void)
|
|
|
|
{
|
|
|
|
kthread_t *kt;
|
|
|
|
|
|
|
|
VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
|
|
|
|
|
|
|
|
/* Create entry for primary kthread */
|
2013-11-01 23:26:11 +04:00
|
|
|
kt = umem_zalloc(sizeof (kthread_t), UMEM_NOFAIL);
|
2010-08-26 21:43:27 +04:00
|
|
|
kt->t_tid = pthread_self();
|
|
|
|
kt->t_func = NULL;
|
|
|
|
|
|
|
|
VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
|
|
|
|
|
|
|
|
/* Only the main thread should be running at the moment */
|
|
|
|
ASSERT3S(kthread_nr, ==, 0);
|
|
|
|
kthread_nr = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
thread_fini(void)
|
|
|
|
{
|
|
|
|
kthread_t *kt = curthread;
|
|
|
|
|
|
|
|
ASSERT(pthread_equal(kt->t_tid, pthread_self()));
|
|
|
|
ASSERT3P(kt->t_func, ==, NULL);
|
|
|
|
|
2013-11-01 23:26:11 +04:00
|
|
|
umem_free(kt, sizeof (kthread_t));
|
2010-08-26 21:43:27 +04:00
|
|
|
|
|
|
|
/* Wait for all threads to exit via thread_exit() */
|
|
|
|
VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
|
|
|
|
|
|
|
|
kthread_nr--; /* Main thread is exiting */
|
|
|
|
|
|
|
|
while (kthread_nr > 0)
|
|
|
|
VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
|
|
|
|
0);
|
|
|
|
|
|
|
|
ASSERT3S(kthread_nr, ==, 0);
|
|
|
|
VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
|
|
|
|
|
|
|
|
VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
kthread_t *
|
2010-08-26 21:43:27 +04:00
|
|
|
zk_thread_current(void)
|
|
|
|
{
|
|
|
|
kthread_t *kt = pthread_getspecific(kthread_key);
|
|
|
|
|
|
|
|
ASSERT3P(kt, !=, NULL);
|
|
|
|
|
2013-11-01 23:26:11 +04:00
|
|
|
return (kt);
|
2010-08-26 21:43:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
zk_thread_helper(void *arg)
|
2008-11-20 23:01:55 +03:00
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
kthread_t *kt = (kthread_t *) arg;
|
|
|
|
|
|
|
|
VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
|
|
|
|
kthread_nr++;
|
|
|
|
VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
kt->t_tid = pthread_self();
|
|
|
|
((thread_func_arg_t) kt->t_func)(kt->t_arg);
|
|
|
|
|
|
|
|
/* Unreachable, thread must exit with thread_exit() */
|
|
|
|
abort();
|
|
|
|
|
2013-11-01 23:26:11 +04:00
|
|
|
return (NULL);
|
2010-08-26 21:43:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
kthread_t *
|
|
|
|
zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
|
2013-11-01 23:26:11 +04:00
|
|
|
size_t len, proc_t *pp, int state, pri_t pri, int detachstate)
|
2010-08-26 21:43:27 +04:00
|
|
|
{
|
|
|
|
kthread_t *kt;
|
|
|
|
pthread_attr_t attr;
|
|
|
|
size_t stack;
|
|
|
|
|
|
|
|
ASSERT3S(state & ~TS_RUN, ==, 0);
|
|
|
|
|
2013-11-01 23:26:11 +04:00
|
|
|
kt = umem_zalloc(sizeof (kthread_t), UMEM_NOFAIL);
|
2010-08-26 21:43:27 +04:00
|
|
|
kt->t_func = func;
|
|
|
|
kt->t_arg = arg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The Solaris kernel stack size is 24k for x86/x86_64.
|
|
|
|
* The Linux kernel stack size is 8k for x86/x86_64.
|
|
|
|
*
|
|
|
|
* We reduce the default stack size in userspace, to ensure
|
|
|
|
* we observe stack overruns in user space as well as in
|
2012-09-06 13:06:05 +04:00
|
|
|
* kernel space. In practice we can't set the userspace stack
|
|
|
|
* size to 8k because differences in stack usage between kernel
|
|
|
|
* space and userspace could lead to spurious stack overflows
|
|
|
|
* (especially when debugging is enabled). Nevertheless, we try
|
|
|
|
* to set it to the lowest value that works (currently 8k*4).
|
|
|
|
* PTHREAD_STACK_MIN is the minimum stack required for a NULL
|
|
|
|
* procedure in user space and is added in to the stack
|
|
|
|
* requirements.
|
2010-08-26 21:43:27 +04:00
|
|
|
*
|
|
|
|
* Some buggy NPTL threading implementations include the
|
|
|
|
* guard area within the stack size allocations. In
|
|
|
|
* this case we allocate an extra page to account for the
|
|
|
|
* guard area since we only have two pages of usable stack
|
|
|
|
* on Linux.
|
|
|
|
*/
|
|
|
|
|
2013-01-29 22:35:02 +04:00
|
|
|
stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) * 4;
|
2010-08-26 21:43:27 +04:00
|
|
|
|
|
|
|
VERIFY3S(pthread_attr_init(&attr), ==, 0);
|
|
|
|
VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
|
|
|
|
VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
|
2012-09-27 15:31:46 +04:00
|
|
|
VERIFY3S(pthread_attr_setdetachstate(&attr, detachstate), ==, 0);
|
2010-08-26 21:43:27 +04:00
|
|
|
|
|
|
|
VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
|
|
|
|
==, 0);
|
|
|
|
|
|
|
|
VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
|
|
|
|
|
2013-11-01 23:26:11 +04:00
|
|
|
return (kt);
|
2010-08-26 21:43:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
zk_thread_exit(void)
|
|
|
|
{
|
|
|
|
kthread_t *kt = curthread;
|
|
|
|
|
|
|
|
ASSERT(pthread_equal(kt->t_tid, pthread_self()));
|
|
|
|
|
2013-11-01 23:26:11 +04:00
|
|
|
umem_free(kt, sizeof (kthread_t));
|
2010-08-26 21:43:27 +04:00
|
|
|
|
|
|
|
pthread_mutex_lock(&kthread_lock);
|
|
|
|
kthread_nr--;
|
|
|
|
pthread_mutex_unlock(&kthread_lock);
|
|
|
|
|
|
|
|
pthread_cond_broadcast(&kthread_cond);
|
|
|
|
pthread_exit((void *)TS_MAGIC);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
zk_thread_join(kt_did_t tid)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
pthread_join((pthread_t)tid, &ret);
|
|
|
|
VERIFY3P(ret, ==, (void *)TS_MAGIC);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* kstats
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
/*ARGSUSED*/
|
|
|
|
kstat_t *
|
2013-08-27 04:09:29 +04:00
|
|
|
kstat_create(const char *module, int instance, const char *name,
|
|
|
|
const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
|
2008-11-20 23:01:55 +03:00
|
|
|
{
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_install(kstat_t *ksp)
|
|
|
|
{}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_delete(kstat_t *ksp)
|
|
|
|
{}
|
|
|
|
|
Add visibility in to arc_read
This change is an attempt to add visibility into the arc_read calls
occurring on a system, in real time. To do this, a list was added to the
in memory SPA data structure for a pool, with each element on the list
corresponding to a call to arc_read. These entries are then exported
through the kstat interface, which can then be interpreted in userspace.
For each arc_read call, the following information is exported:
* A unique identifier (uint64_t)
* The time the entry was added to the list (hrtime_t)
(*not* wall clock time; relative to the other entries on the list)
* The objset ID (uint64_t)
* The object number (uint64_t)
* The indirection level (uint64_t)
* The block ID (uint64_t)
* The name of the function originating the arc_read call (char[24])
* The arc_flags from the arc_read call (uint32_t)
* The PID of the reading thread (pid_t)
* The command or name of thread originating read (char[16])
From this exported information one can see, in real time, exactly what
is being read, what function is generating the read, and whether or not
the read was found to be already cached.
There is still some work to be done, but this should serve as a good
starting point.
Specifically, dbuf_read's are not accounted for in the currently
exported information. Thus, a follow up patch should probably be added
to export these calls that never call into arc_read (they only hit the
dbuf hash table). In addition, it might be nice to create a utility
similar to "arcstat.py" to digest the exported information and display
it in a more readable format. Or perhaps, log the information and allow
for it to be "replayed" at a later time.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2013-09-07 03:09:05 +04:00
|
|
|
/*ARGSUSED*/
|
2013-08-27 04:09:29 +04:00
|
|
|
void
|
|
|
|
kstat_waitq_enter(kstat_io_t *kiop)
|
|
|
|
{}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_waitq_exit(kstat_io_t *kiop)
|
|
|
|
{}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_runq_enter(kstat_io_t *kiop)
|
|
|
|
{}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_runq_exit(kstat_io_t *kiop)
|
|
|
|
{}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_waitq_to_runq(kstat_io_t *kiop)
|
|
|
|
{}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
void
|
|
|
|
kstat_runq_back_to_waitq(kstat_io_t *kiop)
|
|
|
|
{}
|
|
|
|
|
Add visibility in to arc_read
This change is an attempt to add visibility into the arc_read calls
occurring on a system, in real time. To do this, a list was added to the
in memory SPA data structure for a pool, with each element on the list
corresponding to a call to arc_read. These entries are then exported
through the kstat interface, which can then be interpreted in userspace.
For each arc_read call, the following information is exported:
* A unique identifier (uint64_t)
* The time the entry was added to the list (hrtime_t)
(*not* wall clock time; relative to the other entries on the list)
* The objset ID (uint64_t)
* The object number (uint64_t)
* The indirection level (uint64_t)
* The block ID (uint64_t)
* The name of the function originating the arc_read call (char[24])
* The arc_flags from the arc_read call (uint32_t)
* The PID of the reading thread (pid_t)
* The command or name of thread originating read (char[16])
From this exported information one can see, in real time, exactly what
is being read, what function is generating the read, and whether or not
the read was found to be already cached.
There is still some work to be done, but this should serve as a good
starting point.
Specifically, dbuf_read's are not accounted for in the currently
exported information. Thus, a follow up patch should probably be added
to export these calls that never call into arc_read (they only hit the
dbuf hash table). In addition, it might be nice to create a utility
similar to "arcstat.py" to digest the exported information and display
it in a more readable format. Or perhaps, log the information and allow
for it to be "replayed" at a later time.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2013-09-07 03:09:05 +04:00
|
|
|
void
|
|
|
|
kstat_set_raw_ops(kstat_t *ksp,
|
|
|
|
int (*headers)(char *buf, size_t size),
|
|
|
|
int (*data)(char *buf, size_t size, void *data),
|
|
|
|
void *(*addr)(kstat_t *ksp, loff_t index))
|
|
|
|
{}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* mutexes
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
2010-08-26 21:43:27 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
void
|
2010-08-26 21:43:27 +04:00
|
|
|
mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
|
2008-11-20 23:01:55 +03:00
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3S(type, ==, MUTEX_DEFAULT);
|
|
|
|
ASSERT3P(cookie, ==, NULL);
|
|
|
|
mp->m_owner = MTX_INIT;
|
|
|
|
mp->m_magic = MTX_MAGIC;
|
|
|
|
VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2010-08-26 21:43:27 +04:00
|
|
|
mutex_destroy(kmutex_t *mp)
|
2008-11-20 23:01:55 +03:00
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
|
|
|
|
ASSERT3P(mp->m_owner, ==, MTX_INIT);
|
|
|
|
VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
|
|
|
|
mp->m_owner = MTX_DEST;
|
|
|
|
mp->m_magic = 0;
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mutex_enter(kmutex_t *mp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
|
|
|
|
ASSERT3P(mp->m_owner, !=, MTX_DEST);
|
|
|
|
ASSERT3P(mp->m_owner, !=, curthread);
|
|
|
|
VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
|
|
|
|
ASSERT3P(mp->m_owner, ==, MTX_INIT);
|
2008-11-20 23:01:55 +03:00
|
|
|
mp->m_owner = curthread;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
mutex_tryenter(kmutex_t *mp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
|
|
|
|
ASSERT3P(mp->m_owner, !=, MTX_DEST);
|
|
|
|
if (0 == pthread_mutex_trylock(&mp->m_lock)) {
|
|
|
|
ASSERT3P(mp->m_owner, ==, MTX_INIT);
|
2008-11-20 23:01:55 +03:00
|
|
|
mp->m_owner = curthread;
|
|
|
|
return (1);
|
|
|
|
} else {
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mutex_exit(kmutex_t *mp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
|
|
|
|
ASSERT3P(mutex_owner(mp), ==, curthread);
|
|
|
|
mp->m_owner = MTX_INIT;
|
|
|
|
VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
mutex_owner(kmutex_t *mp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
|
2008-11-20 23:01:55 +03:00
|
|
|
return (mp->m_owner);
|
|
|
|
}
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
int
|
|
|
|
mutex_held(kmutex_t *mp)
|
|
|
|
{
|
|
|
|
return (mp->m_owner == curthread);
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* rwlocks
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
2010-08-26 21:43:27 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
void
|
|
|
|
rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3S(type, ==, RW_DEFAULT);
|
|
|
|
ASSERT3P(arg, ==, NULL);
|
|
|
|
VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
|
|
|
|
rwlp->rw_owner = RW_INIT;
|
|
|
|
rwlp->rw_wr_owner = RW_INIT;
|
|
|
|
rwlp->rw_readers = 0;
|
|
|
|
rwlp->rw_magic = RW_MAGIC;
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rw_destroy(krwlock_t *rwlp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
|
|
|
|
|
|
|
|
VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
|
|
|
|
rwlp->rw_magic = 0;
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rw_enter(krwlock_t *rwlp, krw_t rw)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
|
|
|
|
ASSERT3P(rwlp->rw_owner, !=, curthread);
|
|
|
|
ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
if (rw == RW_READER) {
|
|
|
|
VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
|
|
|
|
ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
|
|
|
|
|
|
|
|
atomic_inc_uint(&rwlp->rw_readers);
|
|
|
|
} else {
|
|
|
|
VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
|
|
|
|
ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
|
|
|
|
ASSERT3U(rwlp->rw_readers, ==, 0);
|
|
|
|
|
|
|
|
rwlp->rw_wr_owner = curthread;
|
|
|
|
}
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
rwlp->rw_owner = curthread;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rw_exit(krwlock_t *rwlp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
|
|
|
|
ASSERT(RW_LOCK_HELD(rwlp));
|
|
|
|
|
|
|
|
if (RW_READ_HELD(rwlp))
|
|
|
|
atomic_dec_uint(&rwlp->rw_readers);
|
|
|
|
else
|
|
|
|
rwlp->rw_wr_owner = RW_INIT;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
rwlp->rw_owner = RW_INIT;
|
|
|
|
VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
rw_tryenter(krwlock_t *rwlp, krw_t rw)
|
|
|
|
{
|
|
|
|
int rv;
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
if (rw == RW_READER)
|
2010-08-26 21:43:27 +04:00
|
|
|
rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
|
2008-11-20 23:01:55 +03:00
|
|
|
else
|
2010-08-26 21:43:27 +04:00
|
|
|
rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
if (rv == 0) {
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
|
|
|
|
|
|
|
|
if (rw == RW_READER)
|
|
|
|
atomic_inc_uint(&rwlp->rw_readers);
|
|
|
|
else {
|
|
|
|
ASSERT3U(rwlp->rw_readers, ==, 0);
|
|
|
|
rwlp->rw_wr_owner = curthread;
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
rwlp->rw_owner = curthread;
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
VERIFY3S(rv, ==, EBUSY);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
rw_tryupgrade(krwlock_t *rwlp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* condition variables
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
2010-08-26 21:43:27 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
void
|
|
|
|
cv_init(kcondvar_t *cv, char *name, int type, void *arg)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3S(type, ==, CV_DEFAULT);
|
|
|
|
cv->cv_magic = CV_MAGIC;
|
|
|
|
VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cv_destroy(kcondvar_t *cv)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
|
|
|
|
VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
|
|
|
|
cv->cv_magic = 0;
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cv_wait(kcondvar_t *cv, kmutex_t *mp)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
|
|
|
|
ASSERT3P(mutex_owner(mp), ==, curthread);
|
|
|
|
mp->m_owner = MTX_INIT;
|
|
|
|
int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
|
|
|
|
if (ret != 0)
|
|
|
|
VERIFY3S(ret, ==, EINTR);
|
2008-11-20 23:01:55 +03:00
|
|
|
mp->m_owner = curthread;
|
|
|
|
}
|
|
|
|
|
|
|
|
clock_t
|
|
|
|
cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
|
|
|
|
{
|
|
|
|
int error;
|
2010-08-26 21:43:27 +04:00
|
|
|
struct timeval tv;
|
2008-11-20 23:01:55 +03:00
|
|
|
timestruc_t ts;
|
|
|
|
clock_t delta;
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
top:
|
2010-05-29 00:45:14 +04:00
|
|
|
delta = abstime - ddi_get_lbolt();
|
2008-11-20 23:01:55 +03:00
|
|
|
if (delta <= 0)
|
|
|
|
return (-1);
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
VERIFY(gettimeofday(&tv, NULL) == 0);
|
|
|
|
|
|
|
|
ts.tv_sec = tv.tv_sec + delta / hz;
|
|
|
|
ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
|
|
|
|
if (ts.tv_nsec >= NANOSEC) {
|
|
|
|
ts.tv_sec++;
|
|
|
|
ts.tv_nsec -= NANOSEC;
|
|
|
|
}
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3P(mutex_owner(mp), ==, curthread);
|
|
|
|
mp->m_owner = MTX_INIT;
|
|
|
|
error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
|
2008-11-20 23:01:55 +03:00
|
|
|
mp->m_owner = curthread;
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
if (error == ETIMEDOUT)
|
2008-11-20 23:01:55 +03:00
|
|
|
return (-1);
|
|
|
|
|
|
|
|
if (error == EINTR)
|
|
|
|
goto top;
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
VERIFY3S(error, ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2013-08-29 03:05:48 +04:00
|
|
|
/*ARGSUSED*/
|
|
|
|
clock_t
|
|
|
|
cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
|
|
|
|
int flag)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
timestruc_t ts;
|
|
|
|
hrtime_t delta;
|
|
|
|
|
|
|
|
ASSERT(flag == 0);
|
|
|
|
|
|
|
|
top:
|
|
|
|
delta = tim - gethrtime();
|
|
|
|
if (delta <= 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
ts.tv_sec = delta / NANOSEC;
|
|
|
|
ts.tv_nsec = delta % NANOSEC;
|
|
|
|
|
|
|
|
ASSERT(mutex_owner(mp) == curthread);
|
|
|
|
mp->m_owner = NULL;
|
|
|
|
error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
|
|
|
|
mp->m_owner = curthread;
|
|
|
|
|
|
|
|
if (error == ETIME)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
if (error == EINTR)
|
|
|
|
goto top;
|
|
|
|
|
|
|
|
ASSERT(error == 0);
|
|
|
|
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
void
|
|
|
|
cv_signal(kcondvar_t *cv)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
|
|
|
|
VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cv_broadcast(kcondvar_t *cv)
|
|
|
|
{
|
2010-08-26 21:43:27 +04:00
|
|
|
ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
|
|
|
|
VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* vnode operations
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Note: for the xxxat() versions of these functions, we assume that the
|
|
|
|
* starting vp is always rootdir (which is true for spa_directory.c, the only
|
|
|
|
* ZFS consumer of these interfaces). We assert this is true, and then emulate
|
|
|
|
* them by adding '/' in front of the path.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
int
|
|
|
|
vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
vnode_t *vp;
|
2013-04-30 01:07:46 +04:00
|
|
|
int old_umask = 0;
|
2010-08-26 22:03:04 +04:00
|
|
|
char *realpath;
|
2008-11-20 23:01:55 +03:00
|
|
|
struct stat64 st;
|
2010-08-26 21:21:44 +04:00
|
|
|
int err;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-26 22:03:04 +04:00
|
|
|
realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* If we're accessing a real disk from userland, we need to use
|
|
|
|
* the character interface to avoid caching. This is particularly
|
|
|
|
* important if we're trying to look at a real in-kernel storage
|
|
|
|
* pool from userland, e.g. via zdb, because otherwise we won't
|
|
|
|
* see the changes occurring under the segmap cache.
|
|
|
|
* On the other hand, the stupid character device returns zero
|
|
|
|
* for its size. So -- gag -- we open the block device to get
|
|
|
|
* its size, and remember it for subsequent VOP_GETATTR().
|
|
|
|
*/
|
2010-08-26 22:56:53 +04:00
|
|
|
#if defined(__sun__) || defined(__sun)
|
2008-11-20 23:01:55 +03:00
|
|
|
if (strncmp(path, "/dev/", 5) == 0) {
|
2010-08-26 22:56:53 +04:00
|
|
|
#else
|
|
|
|
if (0) {
|
|
|
|
#endif
|
2008-11-20 23:01:55 +03:00
|
|
|
char *dsk;
|
|
|
|
fd = open64(path, O_RDONLY);
|
2010-08-26 22:03:04 +04:00
|
|
|
if (fd == -1) {
|
|
|
|
err = errno;
|
|
|
|
free(realpath);
|
|
|
|
return (err);
|
|
|
|
}
|
2008-11-20 23:01:55 +03:00
|
|
|
if (fstat64(fd, &st) == -1) {
|
2010-08-26 22:03:04 +04:00
|
|
|
err = errno;
|
2008-11-20 23:01:55 +03:00
|
|
|
close(fd);
|
2010-08-26 22:03:04 +04:00
|
|
|
free(realpath);
|
|
|
|
return (err);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
(void) sprintf(realpath, "%s", path);
|
|
|
|
dsk = strstr(path, "/dsk/");
|
|
|
|
if (dsk != NULL)
|
|
|
|
(void) sprintf(realpath + (dsk - path) + 1, "r%s",
|
|
|
|
dsk + 1);
|
|
|
|
} else {
|
|
|
|
(void) sprintf(realpath, "%s", path);
|
2010-08-26 22:03:04 +04:00
|
|
|
if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
|
|
|
|
err = errno;
|
|
|
|
free(realpath);
|
|
|
|
return (err);
|
|
|
|
}
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
2010-08-26 22:56:53 +04:00
|
|
|
if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
|
|
|
|
#ifdef __linux__
|
|
|
|
flags |= O_DIRECT;
|
|
|
|
#endif
|
|
|
|
/* We shouldn't be writing to block devices in userspace */
|
|
|
|
VERIFY(!(flags & FWRITE));
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
if (flags & FCREAT)
|
|
|
|
old_umask = umask(0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The construct 'flags - FREAD' conveniently maps combinations of
|
|
|
|
* FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
|
|
|
|
*/
|
|
|
|
fd = open64(realpath, flags - FREAD, mode);
|
2010-08-26 22:03:04 +04:00
|
|
|
free(realpath);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
if (flags & FCREAT)
|
|
|
|
(void) umask(old_umask);
|
|
|
|
|
|
|
|
if (fd == -1)
|
|
|
|
return (errno);
|
|
|
|
|
2010-12-14 20:50:37 +03:00
|
|
|
if (fstat64_blk(fd, &st) == -1) {
|
2010-08-26 21:21:44 +04:00
|
|
|
err = errno;
|
2008-11-20 23:01:55 +03:00
|
|
|
close(fd);
|
2010-08-26 21:21:44 +04:00
|
|
|
return (err);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
|
|
|
|
|
|
|
|
*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
|
|
|
|
|
|
|
|
vp->v_fd = fd;
|
|
|
|
vp->v_size = st.st_size;
|
|
|
|
vp->v_path = spa_strdup(path);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
int
|
|
|
|
vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
|
|
|
|
int x3, vnode_t *startvp, int fd)
|
|
|
|
{
|
|
|
|
char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ASSERT(startvp == rootdir);
|
|
|
|
(void) sprintf(realpath, "/%s", path);
|
|
|
|
|
|
|
|
/* fd ignored for now, need if want to simulate nbmand support */
|
|
|
|
ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
|
|
|
|
|
|
|
|
umem_free(realpath, strlen(path) + 2);
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*ARGSUSED*/
|
|
|
|
int
|
|
|
|
vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
|
|
|
|
int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
|
|
|
|
{
|
2010-08-26 21:21:44 +04:00
|
|
|
ssize_t rc, done = 0, split;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
if (uio == UIO_READ) {
|
2010-08-26 21:21:44 +04:00
|
|
|
rc = pread64(vp->v_fd, addr, len, offset);
|
2008-11-20 23:01:55 +03:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* To simulate partial disk writes, we split writes into two
|
|
|
|
* system calls so that the process can be killed in between.
|
|
|
|
*/
|
2012-12-14 03:24:15 +04:00
|
|
|
int sectors = len >> SPA_MINBLOCKSHIFT;
|
|
|
|
split = (sectors > 0 ? rand() % sectors : 0) <<
|
|
|
|
SPA_MINBLOCKSHIFT;
|
2010-08-26 21:21:44 +04:00
|
|
|
rc = pwrite64(vp->v_fd, addr, split, offset);
|
|
|
|
if (rc != -1) {
|
|
|
|
done = rc;
|
|
|
|
rc = pwrite64(vp->v_fd, (char *)addr + split,
|
|
|
|
len - split, offset + split);
|
|
|
|
}
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
2010-08-26 22:56:53 +04:00
|
|
|
#ifdef __linux__
|
|
|
|
if (rc == -1 && errno == EINVAL) {
|
|
|
|
/*
|
|
|
|
* Under Linux, this most likely means an alignment issue
|
|
|
|
* (memory or disk) due to O_DIRECT, so we abort() in order to
|
|
|
|
* catch the offender.
|
|
|
|
*/
|
2013-11-01 23:26:11 +04:00
|
|
|
abort();
|
2010-08-26 22:56:53 +04:00
|
|
|
}
|
|
|
|
#endif
|
2010-08-26 21:21:44 +04:00
|
|
|
if (rc == -1)
|
2008-11-20 23:01:55 +03:00
|
|
|
return (errno);
|
2010-08-26 21:21:44 +04:00
|
|
|
|
|
|
|
done += rc;
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
if (residp)
|
2010-08-26 21:21:44 +04:00
|
|
|
*residp = len - done;
|
|
|
|
else if (done != len)
|
2008-11-20 23:01:55 +03:00
|
|
|
return (EIO);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vn_close(vnode_t *vp)
|
|
|
|
{
|
|
|
|
close(vp->v_fd);
|
|
|
|
spa_strfree(vp->v_path);
|
|
|
|
umem_free(vp, sizeof (vnode_t));
|
|
|
|
}
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
/*
|
|
|
|
* At a minimum we need to update the size since vdev_reopen()
|
|
|
|
* will no longer call vn_openat().
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
fop_getattr(vnode_t *vp, vattr_t *vap)
|
|
|
|
{
|
|
|
|
struct stat64 st;
|
2010-12-14 20:50:37 +03:00
|
|
|
int err;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2010-12-14 20:50:37 +03:00
|
|
|
if (fstat64_blk(vp->v_fd, &st) == -1) {
|
|
|
|
err = errno;
|
2010-05-29 00:45:14 +04:00
|
|
|
close(vp->v_fd);
|
2010-12-14 20:50:37 +03:00
|
|
|
return (err);
|
2010-05-29 00:45:14 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
vap->va_size = st.st_size;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* Figure out which debugging statements to print
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
static char *dprintf_string;
|
|
|
|
static int dprintf_print_all;
|
|
|
|
|
|
|
|
int
|
|
|
|
dprintf_find_string(const char *string)
|
|
|
|
{
|
|
|
|
char *tmp_str = dprintf_string;
|
|
|
|
int len = strlen(string);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find out if this is a string we want to print.
|
|
|
|
* String format: file1.c,function_name1,file2.c,file3.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
while (tmp_str != NULL) {
|
|
|
|
if (strncmp(tmp_str, string, len) == 0 &&
|
|
|
|
(tmp_str[len] == ',' || tmp_str[len] == '\0'))
|
|
|
|
return (1);
|
|
|
|
tmp_str = strchr(tmp_str, ',');
|
|
|
|
if (tmp_str != NULL)
|
|
|
|
tmp_str++; /* Get rid of , */
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dprintf_setup(int *argc, char **argv)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Debugging can be specified two ways: by setting the
|
|
|
|
* environment variable ZFS_DEBUG, or by including a
|
|
|
|
* "debug=..." argument on the command line. The command
|
|
|
|
* line setting overrides the environment variable.
|
|
|
|
*/
|
|
|
|
|
|
|
|
for (i = 1; i < *argc; i++) {
|
|
|
|
int len = strlen("debug=");
|
|
|
|
/* First look for a command line argument */
|
|
|
|
if (strncmp("debug=", argv[i], len) == 0) {
|
|
|
|
dprintf_string = argv[i] + len;
|
|
|
|
/* Remove from args */
|
|
|
|
for (j = i; j < *argc; j++)
|
|
|
|
argv[j] = argv[j+1];
|
|
|
|
argv[j] = NULL;
|
|
|
|
(*argc)--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dprintf_string == NULL) {
|
|
|
|
/* Look for ZFS_DEBUG environment variable */
|
|
|
|
dprintf_string = getenv("ZFS_DEBUG");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Are we just turning on all debugging?
|
|
|
|
*/
|
|
|
|
if (dprintf_find_string("on"))
|
|
|
|
dprintf_print_all = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* debug printfs
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
const char *newfile;
|
|
|
|
va_list adx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get rid of annoying "../common/" prefix to filename.
|
|
|
|
*/
|
|
|
|
newfile = strrchr(file, '/');
|
|
|
|
if (newfile != NULL) {
|
|
|
|
newfile = newfile + 1; /* Get rid of leading / */
|
|
|
|
} else {
|
|
|
|
newfile = file;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dprintf_print_all ||
|
|
|
|
dprintf_find_string(newfile) ||
|
|
|
|
dprintf_find_string(func)) {
|
|
|
|
/* Print out just the function name if requested */
|
|
|
|
flockfile(stdout);
|
|
|
|
if (dprintf_find_string("pid"))
|
|
|
|
(void) printf("%d ", getpid());
|
|
|
|
if (dprintf_find_string("tid"))
|
2010-08-26 21:43:27 +04:00
|
|
|
(void) printf("%u ", (uint_t) pthread_self());
|
2008-11-20 23:01:55 +03:00
|
|
|
if (dprintf_find_string("cpu"))
|
|
|
|
(void) printf("%u ", getcpuid());
|
|
|
|
if (dprintf_find_string("time"))
|
|
|
|
(void) printf("%llu ", gethrtime());
|
|
|
|
if (dprintf_find_string("long"))
|
|
|
|
(void) printf("%s, line %d: ", newfile, line);
|
|
|
|
(void) printf("%s: ", func);
|
|
|
|
va_start(adx, fmt);
|
|
|
|
(void) vprintf(fmt, adx);
|
|
|
|
va_end(adx);
|
|
|
|
funlockfile(stdout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* cmn_err() and panic()
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
|
|
|
|
static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
|
|
|
|
|
|
|
|
void
|
|
|
|
vpanic(const char *fmt, va_list adx)
|
|
|
|
{
|
|
|
|
(void) fprintf(stderr, "error: ");
|
|
|
|
(void) vfprintf(stderr, fmt, adx);
|
|
|
|
(void) fprintf(stderr, "\n");
|
|
|
|
|
|
|
|
abort(); /* think of it as a "user-level crash dump" */
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
panic(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list adx;
|
|
|
|
|
|
|
|
va_start(adx, fmt);
|
|
|
|
vpanic(fmt, adx);
|
|
|
|
va_end(adx);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vcmn_err(int ce, const char *fmt, va_list adx)
|
|
|
|
{
|
|
|
|
if (ce == CE_PANIC)
|
|
|
|
vpanic(fmt, adx);
|
|
|
|
if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
|
|
|
|
(void) fprintf(stderr, "%s", ce_prefix[ce]);
|
|
|
|
(void) vfprintf(stderr, fmt, adx);
|
|
|
|
(void) fprintf(stderr, "%s", ce_suffix[ce]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*PRINTFLIKE2*/
|
|
|
|
void
|
|
|
|
cmn_err(int ce, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list adx;
|
|
|
|
|
|
|
|
va_start(adx, fmt);
|
|
|
|
vcmn_err(ce, fmt, adx);
|
|
|
|
va_end(adx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* kobj interfaces
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
struct _buf *
|
|
|
|
kobj_open_file(char *name)
|
|
|
|
{
|
|
|
|
struct _buf *file;
|
|
|
|
vnode_t *vp;
|
|
|
|
|
|
|
|
/* set vp as the _fd field of the file */
|
|
|
|
if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
|
|
|
|
-1) != 0)
|
|
|
|
return ((void *)-1UL);
|
|
|
|
|
|
|
|
file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
|
|
|
|
file->_fd = (intptr_t)vp;
|
|
|
|
return (file);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
|
|
|
|
{
|
|
|
|
ssize_t resid;
|
|
|
|
|
|
|
|
vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
|
|
|
|
UIO_SYSSPACE, 0, 0, 0, &resid);
|
|
|
|
|
|
|
|
return (size - resid);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
kobj_close_file(struct _buf *file)
|
|
|
|
{
|
|
|
|
vn_close((vnode_t *)file->_fd);
|
|
|
|
umem_free(file, sizeof (struct _buf));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kobj_get_filesize(struct _buf *file, uint64_t *size)
|
|
|
|
{
|
|
|
|
struct stat64 st;
|
|
|
|
vnode_t *vp = (vnode_t *)file->_fd;
|
|
|
|
|
|
|
|
if (fstat64(vp->v_fd, &st) == -1) {
|
|
|
|
vn_close(vp);
|
|
|
|
return (errno);
|
|
|
|
}
|
|
|
|
*size = st.st_size;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* misc routines
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
delay(clock_t ticks)
|
|
|
|
{
|
|
|
|
poll(0, 0, ticks * (1000 / hz));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find highest one bit set.
|
|
|
|
* Returns bit number + 1 of highest bit that is set, otherwise returns 0.
|
|
|
|
* High order bit is 31 (or 63 in _LP64 kernel).
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
highbit(ulong_t i)
|
|
|
|
{
|
|
|
|
register int h = 1;
|
|
|
|
|
|
|
|
if (i == 0)
|
|
|
|
return (0);
|
|
|
|
#ifdef _LP64
|
|
|
|
if (i & 0xffffffff00000000ul) {
|
|
|
|
h += 32; i >>= 32;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (i & 0xffff0000) {
|
|
|
|
h += 16; i >>= 16;
|
|
|
|
}
|
|
|
|
if (i & 0xff00) {
|
|
|
|
h += 8; i >>= 8;
|
|
|
|
}
|
|
|
|
if (i & 0xf0) {
|
|
|
|
h += 4; i >>= 4;
|
|
|
|
}
|
|
|
|
if (i & 0xc) {
|
|
|
|
h += 2; i >>= 2;
|
|
|
|
}
|
|
|
|
if (i & 0x2) {
|
|
|
|
h += 1;
|
|
|
|
}
|
|
|
|
return (h);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int random_fd = -1, urandom_fd = -1;
|
|
|
|
|
|
|
|
static int
|
|
|
|
random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
|
|
|
|
{
|
|
|
|
size_t resid = len;
|
|
|
|
ssize_t bytes;
|
|
|
|
|
|
|
|
ASSERT(fd != -1);
|
|
|
|
|
|
|
|
while (resid != 0) {
|
|
|
|
bytes = read(fd, ptr, resid);
|
|
|
|
ASSERT3S(bytes, >=, 0);
|
|
|
|
ptr += bytes;
|
|
|
|
resid -= bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
random_get_bytes(uint8_t *ptr, size_t len)
|
|
|
|
{
|
|
|
|
return (random_get_bytes_common(ptr, len, random_fd));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
random_get_pseudo_bytes(uint8_t *ptr, size_t len)
|
|
|
|
{
|
|
|
|
return (random_get_bytes_common(ptr, len, urandom_fd));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
|
|
|
|
{
|
|
|
|
char *end;
|
|
|
|
|
|
|
|
*result = strtoul(hw_serial, &end, base);
|
|
|
|
if (*result == 0)
|
|
|
|
return (errno);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
int
|
|
|
|
ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
|
|
|
|
{
|
|
|
|
char *end;
|
|
|
|
|
|
|
|
*result = strtoull(str, &end, base);
|
|
|
|
if (*result == 0)
|
|
|
|
return (errno);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* =========================================================================
|
|
|
|
* kernel emulation setup & teardown
|
|
|
|
* =========================================================================
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
umem_out_of_memory(void)
|
|
|
|
{
|
|
|
|
char errmsg[] = "out of memory -- generating core dump\n";
|
|
|
|
|
2010-08-26 20:52:40 +04:00
|
|
|
(void) fprintf(stderr, "%s", errmsg);
|
2008-11-20 23:01:55 +03:00
|
|
|
abort();
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
kernel_init(int mode)
|
|
|
|
{
|
2013-09-04 16:00:57 +04:00
|
|
|
extern uint_t rrw_tsd_key;
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
umem_nofail_callback(umem_out_of_memory);
|
|
|
|
|
|
|
|
physmem = sysconf(_SC_PHYS_PAGES);
|
|
|
|
|
|
|
|
dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
|
|
|
|
(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
|
|
|
|
(mode & FWRITE) ? gethostid() : 0);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
|
|
|
|
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
|
|
|
|
|
2010-08-26 21:43:27 +04:00
|
|
|
thread_init();
|
2008-12-03 23:09:06 +03:00
|
|
|
system_taskq_init();
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
spa_init(mode);
|
2013-09-04 16:00:57 +04:00
|
|
|
|
|
|
|
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
|
2008-11-20 23:01:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
kernel_fini(void)
|
|
|
|
{
|
|
|
|
spa_fini();
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
system_taskq_fini();
|
2010-08-26 21:43:27 +04:00
|
|
|
thread_fini();
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
close(random_fd);
|
|
|
|
close(urandom_fd);
|
|
|
|
|
|
|
|
random_fd = -1;
|
|
|
|
urandom_fd = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
uid_t
|
|
|
|
crgetuid(cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2013-08-28 15:45:09 +04:00
|
|
|
uid_t
|
|
|
|
crgetruid(cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
gid_t
|
|
|
|
crgetgid(cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
crgetngroups(cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
gid_t *
|
|
|
|
crgetgroups(cred_t *cr)
|
|
|
|
{
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
ksiddomain_t *
|
|
|
|
ksid_lookupdomain(const char *dom)
|
|
|
|
{
|
|
|
|
ksiddomain_t *kd;
|
|
|
|
|
|
|
|
kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
|
|
|
|
kd->kd_name = spa_strdup(dom);
|
|
|
|
return (kd);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ksiddomain_rele(ksiddomain_t *ksid)
|
|
|
|
{
|
|
|
|
spa_strfree(ksid->kd_name);
|
|
|
|
umem_free(ksid, sizeof (ksiddomain_t));
|
|
|
|
}
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
char *
|
2010-08-26 22:46:09 +04:00
|
|
|
kmem_vasprintf(const char *fmt, va_list adx)
|
2010-05-29 00:45:14 +04:00
|
|
|
{
|
2010-08-26 22:46:09 +04:00
|
|
|
char *buf = NULL;
|
|
|
|
va_list adx_copy;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2010-08-26 22:46:09 +04:00
|
|
|
va_copy(adx_copy, adx);
|
|
|
|
VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
|
|
|
|
va_end(adx_copy);
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2010-08-26 22:46:09 +04:00
|
|
|
return (buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
|
|
|
kmem_asprintf(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
char *buf = NULL;
|
|
|
|
va_list adx;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
va_start(adx, fmt);
|
2010-08-26 22:46:09 +04:00
|
|
|
VERIFY(vasprintf(&buf, fmt, adx) != -1);
|
2010-05-29 00:45:14 +04:00
|
|
|
va_end(adx);
|
|
|
|
|
|
|
|
return (buf);
|
|
|
|
}
|
2010-08-27 01:24:34 +04:00
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
int
|
|
|
|
zfs_onexit_fd_hold(int fd, minor_t *minorp)
|
|
|
|
{
|
|
|
|
*minorp = 0;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
void
|
|
|
|
zfs_onexit_fd_rele(int fd)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
int
|
|
|
|
zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
|
|
|
|
uint64_t *action_handle)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
int
|
|
|
|
zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
int
|
|
|
|
zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|