mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-28 10:54:21 +03:00
6bdb09510b
Without this, the sysctl system calls will acquire a global lock before invoking the handler. This is noticeable in some situations when running top(1). The global lock is mostly vestigal but continues to see some use and so contention is still a problem; until the default sense of the MPSAFE flag changes, we have to annotate each and every handler. Reviewed-by: Allan Jude <allan@klarasystems.com> Reviewed-by: Ryan Moeller <ryan@ixsystems.com> Signed-off-by: Mark Johnston <markj@FreeBSD.org> Closes #10836
517 lines
13 KiB
C
517 lines
13 KiB
C
/*
|
|
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* Links to Illumos.org for more information on kstat function:
|
|
* [1] https://illumos.org/man/1M/kstat
|
|
* [2] https://illumos.org/man/9f/kstat_create
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/param.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/kstat.h>
|
|
#include <sys/sbuf.h>
|
|
|
|
static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
|
|
|
|
SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
|
|
|
|
void
|
|
__kstat_set_raw_ops(kstat_t *ksp,
|
|
int (*headers)(char *buf, size_t size),
|
|
int (*data)(char *buf, size_t size, void *data),
|
|
void *(*addr)(kstat_t *ksp, loff_t index))
|
|
{
|
|
ksp->ks_raw_ops.headers = headers;
|
|
ksp->ks_raw_ops.data = data;
|
|
ksp->ks_raw_ops.addr = addr;
|
|
}
|
|
|
|
static int
|
|
kstat_default_update(kstat_t *ksp, int rw)
|
|
{
|
|
ASSERT(ksp != NULL);
|
|
|
|
if (rw == KSTAT_WRITE)
|
|
return (EACCES);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
kstat_resize_raw(kstat_t *ksp)
|
|
{
|
|
if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
|
|
return (ENOMEM);
|
|
|
|
free(ksp->ks_raw_buf, M_TEMP);
|
|
ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
|
|
ksp->ks_raw_buf = malloc(ksp->ks_raw_bufsize, M_TEMP, M_WAITOK);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void *
|
|
kstat_raw_default_addr(kstat_t *ksp, loff_t n)
|
|
{
|
|
if (n == 0)
|
|
return (ksp->ks_data);
|
|
return (NULL);
|
|
}
|
|
|
|
static int
|
|
kstat_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
kstat_t *ksp = arg1;
|
|
kstat_named_t *ksent;
|
|
uint64_t val;
|
|
|
|
ksent = ksp->ks_data;
|
|
/* Select the correct element */
|
|
ksent += arg2;
|
|
/* Update the aggsums before reading */
|
|
(void) ksp->ks_update(ksp, KSTAT_READ);
|
|
val = ksent->value.ui64;
|
|
|
|
return (sysctl_handle_64(oidp, &val, 0, req));
|
|
}
|
|
|
|
static int
|
|
kstat_sysctl_string(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
kstat_t *ksp = arg1;
|
|
kstat_named_t *ksent = ksp->ks_data;
|
|
char *val;
|
|
uint32_t len = 0;
|
|
|
|
/* Select the correct element */
|
|
ksent += arg2;
|
|
/* Update the aggsums before reading */
|
|
(void) ksp->ks_update(ksp, KSTAT_READ);
|
|
val = KSTAT_NAMED_STR_PTR(ksent);
|
|
len = KSTAT_NAMED_STR_BUFLEN(ksent);
|
|
val[len-1] = '\0';
|
|
|
|
return (sysctl_handle_string(oidp, val, len, req));
|
|
}
|
|
|
|
static int
|
|
kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct sbuf *sb;
|
|
kstat_t *ksp = arg1;
|
|
kstat_io_t *kip = ksp->ks_data;
|
|
int rc;
|
|
|
|
sb = sbuf_new_auto();
|
|
if (sb == NULL)
|
|
return (ENOMEM);
|
|
/* Update the aggsums before reading */
|
|
(void) ksp->ks_update(ksp, KSTAT_READ);
|
|
|
|
/* though wlentime & friends are signed, they will never be negative */
|
|
sbuf_printf(sb,
|
|
"%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
|
|
"%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
|
|
kip->nread, kip->nwritten,
|
|
kip->reads, kip->writes,
|
|
kip->wtime, kip->wlentime, kip->wlastupdate,
|
|
kip->rtime, kip->rlentime, kip->rlastupdate,
|
|
kip->wcnt, kip->rcnt);
|
|
rc = sbuf_finish(sb);
|
|
if (rc == 0)
|
|
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
|
|
sbuf_delete(sb);
|
|
return (rc);
|
|
}
|
|
|
|
static int
|
|
kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct sbuf *sb;
|
|
void *data;
|
|
kstat_t *ksp = arg1;
|
|
void *(*addr_op)(kstat_t *ksp, loff_t index);
|
|
int n, rc = 0;
|
|
|
|
sb = sbuf_new_auto();
|
|
if (sb == NULL)
|
|
return (ENOMEM);
|
|
|
|
if (ksp->ks_raw_ops.addr)
|
|
addr_op = ksp->ks_raw_ops.addr;
|
|
else
|
|
addr_op = kstat_raw_default_addr;
|
|
|
|
mutex_enter(ksp->ks_lock);
|
|
|
|
/* Update the aggsums before reading */
|
|
(void) ksp->ks_update(ksp, KSTAT_READ);
|
|
|
|
ksp->ks_raw_bufsize = PAGE_SIZE;
|
|
ksp->ks_raw_buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
|
|
|
|
n = 0;
|
|
restart_headers:
|
|
if (ksp->ks_raw_ops.headers) {
|
|
rc = ksp->ks_raw_ops.headers(
|
|
ksp->ks_raw_buf, ksp->ks_raw_bufsize);
|
|
if (rc == ENOMEM && !kstat_resize_raw(ksp))
|
|
goto restart_headers;
|
|
if (rc == 0)
|
|
sbuf_printf(sb, "%s", ksp->ks_raw_buf);
|
|
}
|
|
|
|
while ((data = addr_op(ksp, n)) != NULL) {
|
|
restart:
|
|
if (ksp->ks_raw_ops.data) {
|
|
rc = ksp->ks_raw_ops.data(ksp->ks_raw_buf,
|
|
ksp->ks_raw_bufsize, data);
|
|
if (rc == ENOMEM && !kstat_resize_raw(ksp))
|
|
goto restart;
|
|
if (rc == 0)
|
|
sbuf_printf(sb, "%s", ksp->ks_raw_buf);
|
|
|
|
} else {
|
|
ASSERT(ksp->ks_ndata == 1);
|
|
sbuf_hexdump(sb, ksp->ks_data,
|
|
ksp->ks_data_size, NULL, 0);
|
|
}
|
|
n++;
|
|
}
|
|
free(ksp->ks_raw_buf, M_TEMP);
|
|
mutex_exit(ksp->ks_lock);
|
|
rc = sbuf_finish(sb);
|
|
if (rc == 0)
|
|
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
|
|
sbuf_delete(sb);
|
|
return (rc);
|
|
}
|
|
|
|
kstat_t *
|
|
__kstat_create(const char *module, int instance, const char *name,
|
|
const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags)
|
|
{
|
|
struct sysctl_oid *root;
|
|
kstat_t *ksp;
|
|
|
|
KASSERT(instance == 0, ("instance=%d", instance));
|
|
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
|
|
ASSERT(ks_ndata == 1);
|
|
|
|
/*
|
|
* Allocate the main structure. We don't need to copy module/class/name
|
|
* stuff in here, because it is only used for sysctl node creation
|
|
* done in this function.
|
|
*/
|
|
ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO);
|
|
|
|
ksp->ks_crtime = gethrtime();
|
|
ksp->ks_snaptime = ksp->ks_crtime;
|
|
ksp->ks_instance = instance;
|
|
strncpy(ksp->ks_name, name, KSTAT_STRLEN);
|
|
strncpy(ksp->ks_class, class, KSTAT_STRLEN);
|
|
ksp->ks_type = ks_type;
|
|
ksp->ks_flags = flags;
|
|
ksp->ks_update = kstat_default_update;
|
|
|
|
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
ksp->ks_lock = &ksp->ks_private_lock;
|
|
|
|
switch (ksp->ks_type) {
|
|
case KSTAT_TYPE_RAW:
|
|
ksp->ks_ndata = 1;
|
|
ksp->ks_data_size = ks_ndata;
|
|
break;
|
|
case KSTAT_TYPE_NAMED:
|
|
ksp->ks_ndata = ks_ndata;
|
|
ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
|
|
break;
|
|
case KSTAT_TYPE_INTR:
|
|
ksp->ks_ndata = ks_ndata;
|
|
ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
|
|
break;
|
|
case KSTAT_TYPE_IO:
|
|
ksp->ks_ndata = ks_ndata;
|
|
ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
|
|
break;
|
|
case KSTAT_TYPE_TIMER:
|
|
ksp->ks_ndata = ks_ndata;
|
|
ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
|
|
break;
|
|
default:
|
|
panic("Undefined kstat type %d\n", ksp->ks_type);
|
|
}
|
|
|
|
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
|
|
ksp->ks_data = NULL;
|
|
} else {
|
|
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
|
|
if (ksp->ks_data == NULL) {
|
|
kmem_free(ksp, sizeof (*ksp));
|
|
ksp = NULL;
|
|
}
|
|
}
|
|
/*
|
|
* Create sysctl tree for those statistics:
|
|
*
|
|
* kstat.<module>.<class>.<name>.
|
|
*/
|
|
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
|
|
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
|
|
"");
|
|
if (root == NULL) {
|
|
printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
|
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
|
free(ksp, M_KSTAT);
|
|
return (NULL);
|
|
}
|
|
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
|
|
OID_AUTO, class, CTLFLAG_RW, 0, "");
|
|
if (root == NULL) {
|
|
printf("%s: Cannot create kstat.%s.%s tree!\n", __func__,
|
|
module, class);
|
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
|
free(ksp, M_KSTAT);
|
|
return (NULL);
|
|
}
|
|
if (ksp->ks_type == KSTAT_TYPE_NAMED) {
|
|
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(root),
|
|
OID_AUTO, name, CTLFLAG_RW, 0, "");
|
|
if (root == NULL) {
|
|
printf("%s: Cannot create kstat.%s.%s.%s tree!\n",
|
|
__func__, module, class, name);
|
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
|
free(ksp, M_KSTAT);
|
|
return (NULL);
|
|
}
|
|
|
|
}
|
|
ksp->ks_sysctl_root = root;
|
|
|
|
return (ksp);
|
|
}
|
|
|
|
static void
|
|
kstat_install_named(kstat_t *ksp)
|
|
{
|
|
kstat_named_t *ksent;
|
|
char *namelast;
|
|
int typelast;
|
|
|
|
ksent = ksp->ks_data;
|
|
|
|
VERIFY((ksp->ks_flags & KSTAT_FLAG_VIRTUAL) || ksent != NULL);
|
|
|
|
typelast = 0;
|
|
namelast = NULL;
|
|
|
|
for (int i = 0; i < ksp->ks_ndata; i++, ksent++) {
|
|
if (ksent->data_type != 0) {
|
|
typelast = ksent->data_type;
|
|
namelast = ksent->name;
|
|
}
|
|
switch (typelast) {
|
|
case KSTAT_DATA_CHAR:
|
|
/* Not Implemented */
|
|
break;
|
|
case KSTAT_DATA_INT32:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_S32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl, "I", namelast);
|
|
break;
|
|
case KSTAT_DATA_UINT32:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl, "IU", namelast);
|
|
break;
|
|
case KSTAT_DATA_INT64:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl, "Q", namelast);
|
|
break;
|
|
case KSTAT_DATA_UINT64:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl, "QU", namelast);
|
|
break;
|
|
case KSTAT_DATA_LONG:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl, "L", namelast);
|
|
break;
|
|
case KSTAT_DATA_ULONG:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl, "LU", namelast);
|
|
break;
|
|
case KSTAT_DATA_STRING:
|
|
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, namelast,
|
|
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, i, kstat_sysctl_string, "A", namelast);
|
|
break;
|
|
default:
|
|
panic("unsupported type: %d", typelast);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
kstat_install(kstat_t *ksp)
|
|
{
|
|
struct sysctl_oid *root;
|
|
|
|
if (ksp->ks_ndata == UINT32_MAX)
|
|
VERIFY(ksp->ks_type == KSTAT_TYPE_RAW);
|
|
|
|
switch (ksp->ks_type) {
|
|
case KSTAT_TYPE_NAMED:
|
|
return (kstat_install_named(ksp));
|
|
break;
|
|
case KSTAT_TYPE_RAW:
|
|
if (ksp->ks_raw_ops.data) {
|
|
root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, ksp->ks_name,
|
|
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, 0, kstat_sysctl_raw, "A", ksp->ks_name);
|
|
} else {
|
|
root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, ksp->ks_name,
|
|
CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, 0, kstat_sysctl_raw, "", ksp->ks_name);
|
|
}
|
|
VERIFY(root != NULL);
|
|
break;
|
|
case KSTAT_TYPE_IO:
|
|
root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
|
OID_AUTO, ksp->ks_name,
|
|
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
|
ksp, 0, kstat_sysctl_io, "A", ksp->ks_name);
|
|
break;
|
|
case KSTAT_TYPE_TIMER:
|
|
case KSTAT_TYPE_INTR:
|
|
default:
|
|
panic("unsupported kstat type %d\n", ksp->ks_type);
|
|
}
|
|
ksp->ks_sysctl_root = root;
|
|
}
|
|
|
|
void
|
|
kstat_delete(kstat_t *ksp)
|
|
{
|
|
|
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
|
ksp->ks_lock = NULL;
|
|
mutex_destroy(&ksp->ks_private_lock);
|
|
free(ksp, M_KSTAT);
|
|
}
|
|
|
|
void
|
|
kstat_waitq_enter(kstat_io_t *kiop)
|
|
{
|
|
hrtime_t new, delta;
|
|
ulong_t wcnt;
|
|
|
|
new = gethrtime();
|
|
delta = new - kiop->wlastupdate;
|
|
kiop->wlastupdate = new;
|
|
wcnt = kiop->wcnt++;
|
|
if (wcnt != 0) {
|
|
kiop->wlentime += delta * wcnt;
|
|
kiop->wtime += delta;
|
|
}
|
|
}
|
|
|
|
void
|
|
kstat_waitq_exit(kstat_io_t *kiop)
|
|
{
|
|
hrtime_t new, delta;
|
|
ulong_t wcnt;
|
|
|
|
new = gethrtime();
|
|
delta = new - kiop->wlastupdate;
|
|
kiop->wlastupdate = new;
|
|
wcnt = kiop->wcnt--;
|
|
ASSERT((int)wcnt > 0);
|
|
kiop->wlentime += delta * wcnt;
|
|
kiop->wtime += delta;
|
|
}
|
|
|
|
void
|
|
kstat_runq_enter(kstat_io_t *kiop)
|
|
{
|
|
hrtime_t new, delta;
|
|
ulong_t rcnt;
|
|
|
|
new = gethrtime();
|
|
delta = new - kiop->rlastupdate;
|
|
kiop->rlastupdate = new;
|
|
rcnt = kiop->rcnt++;
|
|
if (rcnt != 0) {
|
|
kiop->rlentime += delta * rcnt;
|
|
kiop->rtime += delta;
|
|
}
|
|
}
|
|
|
|
void
|
|
kstat_runq_exit(kstat_io_t *kiop)
|
|
{
|
|
hrtime_t new, delta;
|
|
ulong_t rcnt;
|
|
|
|
new = gethrtime();
|
|
delta = new - kiop->rlastupdate;
|
|
kiop->rlastupdate = new;
|
|
rcnt = kiop->rcnt--;
|
|
ASSERT((int)rcnt > 0);
|
|
kiop->rlentime += delta * rcnt;
|
|
kiop->rtime += delta;
|
|
}
|