mirror_zfs/module/os/freebsd/spl/spl_kstat.c
Alexander Motin 371f88d96f
Remove pool io kstats (#12212)
This mostly reverts "3537 want pool io kstats" commit of 8 years ago.

From one side this code using pool-wide locks became pretty bad for
performance, creating significant lock contention in I/O pipeline.
From another, there are more efficient ways now to obtain detailed
statistics, while this statistics is illumos-specific and much less
usable on Linux and FreeBSD, reported only via procfs/sysctls.

This commit does not remove KSTAT_TYPE_IO implementation, that may
be removed later together with already unused KSTAT_TYPE_INTR and
KSTAT_TYPE_TIMER.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #12212
2021-06-10 08:27:33 -07:00

511 lines
14 KiB
C

/*
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Links to Illumos.org for more information on kstat function:
* [1] https://illumos.org/man/1M/kstat
* [2] https://illumos.org/man/9f/kstat_create
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <sys/kstat.h>
#include <sys/sbuf.h>
static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
void
__kstat_set_raw_ops(kstat_t *ksp,
int (*headers)(char *buf, size_t size),
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index))
{
ksp->ks_raw_ops.headers = headers;
ksp->ks_raw_ops.data = data;
ksp->ks_raw_ops.addr = addr;
}
void
__kstat_set_seq_raw_ops(kstat_t *ksp,
int (*headers)(struct seq_file *f),
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index))
{
ksp->ks_raw_ops.seq_headers = headers;
ksp->ks_raw_ops.data = data;
ksp->ks_raw_ops.addr = addr;
}
static int
kstat_default_update(kstat_t *ksp, int rw)
{
ASSERT3P(ksp, !=, NULL);
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
static int
kstat_resize_raw(kstat_t *ksp)
{
if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
return (ENOMEM);
free(ksp->ks_raw_buf, M_TEMP);
ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
ksp->ks_raw_buf = malloc(ksp->ks_raw_bufsize, M_TEMP, M_WAITOK);
return (0);
}
static void *
kstat_raw_default_addr(kstat_t *ksp, loff_t n)
{
if (n == 0)
return (ksp->ks_data);
return (NULL);
}
static int
kstat_sysctl(SYSCTL_HANDLER_ARGS)
{
kstat_t *ksp = arg1;
kstat_named_t *ksent;
uint64_t val;
ksent = ksp->ks_data;
/* Select the correct element */
ksent += arg2;
/* Update the aggsums before reading */
(void) ksp->ks_update(ksp, KSTAT_READ);
val = ksent->value.ui64;
return (sysctl_handle_64(oidp, &val, 0, req));
}
static int
kstat_sysctl_string(SYSCTL_HANDLER_ARGS)
{
kstat_t *ksp = arg1;
kstat_named_t *ksent = ksp->ks_data;
char *val;
uint32_t len = 0;
/* Select the correct element */
ksent += arg2;
/* Update the aggsums before reading */
(void) ksp->ks_update(ksp, KSTAT_READ);
val = KSTAT_NAMED_STR_PTR(ksent);
len = KSTAT_NAMED_STR_BUFLEN(ksent);
val[len-1] = '\0';
return (sysctl_handle_string(oidp, val, len, req));
}
static int
kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
{
struct sbuf *sb;
kstat_t *ksp = arg1;
kstat_io_t *kip = ksp->ks_data;
int rc;
sb = sbuf_new_auto();
if (sb == NULL)
return (ENOMEM);
/* Update the aggsums before reading */
(void) ksp->ks_update(ksp, KSTAT_READ);
/* though wlentime & friends are signed, they will never be negative */
sbuf_printf(sb,
"%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
"%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
kip->nread, kip->nwritten,
kip->reads, kip->writes,
kip->wtime, kip->wlentime, kip->wlastupdate,
kip->rtime, kip->rlentime, kip->rlastupdate,
kip->wcnt, kip->rcnt);
rc = sbuf_finish(sb);
if (rc == 0)
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
sbuf_delete(sb);
return (rc);
}
static int
kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
{
struct sbuf *sb;
void *data;
kstat_t *ksp = arg1;
void *(*addr_op)(kstat_t *ksp, loff_t index);
int n, has_header, rc = 0;
sb = sbuf_new_auto();
if (sb == NULL)
return (ENOMEM);
if (ksp->ks_raw_ops.addr)
addr_op = ksp->ks_raw_ops.addr;
else
addr_op = kstat_raw_default_addr;
mutex_enter(ksp->ks_lock);
/* Update the aggsums before reading */
(void) ksp->ks_update(ksp, KSTAT_READ);
ksp->ks_raw_bufsize = PAGE_SIZE;
ksp->ks_raw_buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
n = 0;
has_header = (ksp->ks_raw_ops.headers ||
ksp->ks_raw_ops.seq_headers);
restart_headers:
if (ksp->ks_raw_ops.headers) {
rc = ksp->ks_raw_ops.headers(
ksp->ks_raw_buf, ksp->ks_raw_bufsize);
} else if (ksp->ks_raw_ops.seq_headers) {
struct seq_file f;
f.sf_buf = ksp->ks_raw_buf;
f.sf_size = ksp->ks_raw_bufsize;
rc = ksp->ks_raw_ops.seq_headers(&f);
}
if (has_header) {
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart_headers;
if (rc == 0)
sbuf_printf(sb, "\n%s", ksp->ks_raw_buf);
}
while ((data = addr_op(ksp, n)) != NULL) {
restart:
if (ksp->ks_raw_ops.data) {
rc = ksp->ks_raw_ops.data(ksp->ks_raw_buf,
ksp->ks_raw_bufsize, data);
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart;
if (rc == 0)
sbuf_printf(sb, "%s", ksp->ks_raw_buf);
} else {
ASSERT3U(ksp->ks_ndata, ==, 1);
sbuf_hexdump(sb, ksp->ks_data,
ksp->ks_data_size, NULL, 0);
}
n++;
}
free(ksp->ks_raw_buf, M_TEMP);
mutex_exit(ksp->ks_lock);
sbuf_trim(sb);
rc = sbuf_finish(sb);
if (rc == 0)
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
sbuf_delete(sb);
return (rc);
}
kstat_t *
__kstat_create(const char *module, int instance, const char *name,
const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags)
{
char buf[KSTAT_STRLEN];
struct sysctl_oid *root;
kstat_t *ksp;
char *pool;
KASSERT(instance == 0, ("instance=%d", instance));
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
ASSERT3U(ks_ndata, ==, 1);
if (class == NULL)
class = "misc";
/*
* Allocate the main structure. We don't need to keep a copy of
* module in here, because it is only used for sysctl node creation
* done in this function.
*/
ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
ksp->ks_instance = instance;
(void) strlcpy(ksp->ks_name, name, KSTAT_STRLEN);
(void) strlcpy(ksp->ks_class, class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = flags;
ksp->ks_update = kstat_default_update;
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
ksp->ks_lock = &ksp->ks_private_lock;
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
ksp->ks_ndata = 1;
ksp->ks_data_size = ks_ndata;
break;
case KSTAT_TYPE_NAMED:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
break;
case KSTAT_TYPE_INTR:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
break;
case KSTAT_TYPE_IO:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
break;
case KSTAT_TYPE_TIMER:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
break;
default:
panic("Undefined kstat type %d\n", ksp->ks_type);
}
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL)
ksp->ks_data = NULL;
else
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
/*
* Some kstats use a module name like "zfs/poolname" to distinguish a
* set of kstats belonging to a specific pool. Split on '/' to add an
* extra node for the pool name if needed.
*/
(void) strlcpy(buf, module, KSTAT_STRLEN);
module = buf;
pool = strchr(module, '/');
if (pool != NULL)
*pool++ = '\0';
/*
* Create sysctl tree for those statistics:
*
* kstat.<module>[.<pool>].<class>.<name>
*/
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
"");
if (root == NULL) {
printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
if (pool != NULL) {
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(root), OID_AUTO, pool, CTLFLAG_RW, 0, "");
if (root == NULL) {
printf("%s: Cannot create kstat.%s.%s tree!\n",
__func__, module, pool);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
}
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
OID_AUTO, class, CTLFLAG_RW, 0, "");
if (root == NULL) {
if (pool != NULL)
printf("%s: Cannot create kstat.%s.%s.%s tree!\n",
__func__, module, pool, class);
else
printf("%s: Cannot create kstat.%s.%s tree!\n",
__func__, module, class);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
if (ksp->ks_type == KSTAT_TYPE_NAMED) {
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(root),
OID_AUTO, name, CTLFLAG_RW, 0, "");
if (root == NULL) {
if (pool != NULL)
printf("%s: Cannot create kstat.%s.%s.%s.%s "
"tree!\n", __func__, module, pool, class,
name);
else
printf("%s: Cannot create kstat.%s.%s.%s "
"tree!\n", __func__, module, class, name);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
}
ksp->ks_sysctl_root = root;
return (ksp);
}
static void
kstat_install_named(kstat_t *ksp)
{
kstat_named_t *ksent;
char *namelast;
int typelast;
ksent = ksp->ks_data;
VERIFY((ksp->ks_flags & KSTAT_FLAG_VIRTUAL) || ksent != NULL);
typelast = 0;
namelast = NULL;
for (int i = 0; i < ksp->ks_ndata; i++, ksent++) {
if (ksent->data_type != 0) {
typelast = ksent->data_type;
namelast = ksent->name;
}
switch (typelast) {
case KSTAT_DATA_CHAR:
/* Not Implemented */
break;
case KSTAT_DATA_INT32:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_S32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl, "I", namelast);
break;
case KSTAT_DATA_UINT32:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl, "IU", namelast);
break;
case KSTAT_DATA_INT64:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl, "Q", namelast);
break;
case KSTAT_DATA_UINT64:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl, "QU", namelast);
break;
case KSTAT_DATA_LONG:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl, "L", namelast);
break;
case KSTAT_DATA_ULONG:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl, "LU", namelast);
break;
case KSTAT_DATA_STRING:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, i, kstat_sysctl_string, "A", namelast);
break;
default:
panic("unsupported type: %d", typelast);
}
}
}
void
kstat_install(kstat_t *ksp)
{
struct sysctl_oid *root;
if (ksp->ks_ndata == UINT32_MAX)
VERIFY3U(ksp->ks_type, ==, KSTAT_TYPE_RAW);
switch (ksp->ks_type) {
case KSTAT_TYPE_NAMED:
return (kstat_install_named(ksp));
case KSTAT_TYPE_RAW:
if (ksp->ks_raw_ops.data) {
root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, ksp->ks_name, CTLTYPE_STRING | CTLFLAG_RD
| CTLFLAG_MPSAFE | CTLFLAG_SKIP,
ksp, 0, kstat_sysctl_raw, "A", ksp->ks_name);
} else {
root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, ksp->ks_name, CTLTYPE_OPAQUE | CTLFLAG_RD
| CTLFLAG_MPSAFE | CTLFLAG_SKIP,
ksp, 0, kstat_sysctl_raw, "", ksp->ks_name);
}
break;
case KSTAT_TYPE_IO:
root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, ksp->ks_name,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
ksp, 0, kstat_sysctl_io, "A", ksp->ks_name);
break;
case KSTAT_TYPE_TIMER:
case KSTAT_TYPE_INTR:
default:
panic("unsupported kstat type %d\n", ksp->ks_type);
}
VERIFY3P(root, !=, NULL);
ksp->ks_sysctl_root = root;
}
void
kstat_delete(kstat_t *ksp)
{
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
ksp->ks_lock = NULL;
mutex_destroy(&ksp->ks_private_lock);
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);
free(ksp, M_KSTAT);
}