From 330847ff36146a427a48e79a9733dda3828284e8 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Mon, 26 Aug 2013 17:09:29 -0700 Subject: [PATCH] Illumos #3537 3537 want pool io kstats Reviewed by: George Wilson Reviewed by: Adam Leventhal Reviewed by: Eric Schrock Reviewed by: Sa?o Kiselkov Reviewed by: Garrett D'Amore Reviewed by: Brendan Gregg Approved by: Gordon Ross References: http://www.illumos.org/issues/3537 illumos/illumos-gate@c3a6601 Ported by: Cyril Plisko Signed-off-by: Brian Behlendorf Porting Notes: 1. The patch was restructured to take advantage of the existing spa statistics infrastructure. To accomplish this the kstat was moved in to spa->io_stats and the init/destroy code moved to spa_stats.c. 2. The I/O kstat was simply named which conflicted with the pool directory we had already created. Therefore it was renamed to /io 3. An update handler was added to allow the kstat to be zeroed. --- include/sys/spa.h | 1 + include/sys/zfs_context.h | 10 ++++-- lib/libzpool/kernel.c | 33 ++++++++++++++++++-- module/zfs/spa_stats.c | 50 ++++++++++++++++++++++++++++++ module/zfs/vdev_queue.c | 65 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 152 insertions(+), 7 deletions(-) diff --git a/include/sys/spa.h b/include/sys/spa.h index cb3ce11bc..13c9672aa 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -550,6 +550,7 @@ typedef struct spa_stats { spa_stats_history_t read_history; spa_stats_history_t txg_history; spa_stats_history_t tx_assign_histogram; + spa_stats_history_t io_history; } spa_stats_t; typedef enum txg_state { diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index dfd11d9f1..7910e08aa 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -344,10 +344,16 @@ extern void cv_broadcast(kcondvar_t *cv); /* * kstat creation, installation and deletion */ -extern kstat_t *kstat_create(char *, int, - char *, char *, uchar_t, ulong_t, uchar_t); +extern kstat_t *kstat_create(const char *, int, + const char *, const char *, uchar_t, ulong_t, uchar_t); extern void kstat_install(kstat_t *); extern void kstat_delete(kstat_t *); +extern void kstat_waitq_enter(kstat_io_t *); +extern void kstat_waitq_exit(kstat_io_t *); +extern void kstat_runq_enter(kstat_io_t *); +extern void kstat_runq_exit(kstat_io_t *); +extern void kstat_waitq_to_runq(kstat_io_t *); +extern void kstat_runq_back_to_waitq(kstat_io_t *); extern void kstat_set_raw_ops(kstat_t *ksp, int (*headers)(char *buf, size_t size), int (*data)(char *buf, size_t size, void *data), diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 57a3739e2..2e5eef69b 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -224,8 +224,8 @@ zk_thread_join(kt_did_t tid) */ /*ARGSUSED*/ kstat_t * -kstat_create(char *module, int instance, char *name, char *class, - uchar_t type, ulong_t ndata, uchar_t ks_flag) +kstat_create(const char *module, int instance, const char *name, + const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag) { return (NULL); } @@ -241,6 +241,35 @@ kstat_delete(kstat_t *ksp) {} /*ARGSUSED*/ +void +kstat_waitq_enter(kstat_io_t *kiop) +{} + +/*ARGSUSED*/ +void +kstat_waitq_exit(kstat_io_t *kiop) +{} + +/*ARGSUSED*/ +void +kstat_runq_enter(kstat_io_t *kiop) +{} + +/*ARGSUSED*/ +void +kstat_runq_exit(kstat_io_t *kiop) +{} + +/*ARGSUSED*/ +void +kstat_waitq_to_runq(kstat_io_t *kiop) +{} + +/*ARGSUSED*/ +void +kstat_runq_back_to_waitq(kstat_io_t *kiop) +{} + void kstat_set_raw_ops(kstat_t *ksp, int (*headers)(char *buf, size_t size), diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index 789e8c3e6..d37b0af4f 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -608,12 +608,61 @@ spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs) atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64); } +/* + * ========================================================================== + * SPA IO History Routines + * ========================================================================== + */ +static int +spa_io_history_update(kstat_t *ksp, int rw) +{ + if (rw == KSTAT_WRITE) + memset(ksp->ks_data, 0, ksp->ks_data_size); + + return (0); +} + +static void +spa_io_history_init(spa_t *spa) +{ + spa_stats_history_t *ssh = &spa->spa_stats.io_history; + char name[KSTAT_STRLEN]; + kstat_t *ksp; + + mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); + + (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa)); + name[KSTAT_STRLEN-1] = '\0'; + + ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0); + ssh->kstat = ksp; + + if (ksp) { + ksp->ks_lock = &ssh->lock; + ksp->ks_private = spa; + ksp->ks_update = spa_io_history_update; + kstat_install(ksp); + } +} + +static void +spa_io_history_destroy(spa_t *spa) +{ + spa_stats_history_t *ssh = &spa->spa_stats.io_history; + + if (ssh->kstat) + kstat_delete(ssh->kstat); + + mutex_destroy(&ssh->lock); +} + void spa_stats_init(spa_t *spa) { spa_read_history_init(spa); spa_txg_history_init(spa); spa_tx_assign_init(spa); + spa_io_history_init(spa); } void @@ -622,6 +671,7 @@ spa_stats_destroy(spa_t *spa) spa_tx_assign_destroy(spa); spa_txg_history_destroy(spa); spa_read_history_destroy(spa); + spa_io_history_destroy(spa); } #if defined(_KERNEL) && defined(HAVE_SPL) diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index b2cc6b87f..c01990bf7 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -29,8 +29,10 @@ #include #include +#include #include #include +#include /* * These tunables are for performance analysis. @@ -164,15 +166,72 @@ vdev_queue_fini(vdev_t *vd) static void vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) { + spa_t *spa = zio->io_spa; + spa_stats_history_t *ssh = &spa->spa_stats.io_history; + avl_add(&vq->vq_deadline_tree, zio); avl_add(zio->io_vdev_tree, zio); + + if (ssh->kstat != NULL) { + mutex_enter(&ssh->lock); + kstat_waitq_enter(ssh->kstat->ks_data); + mutex_exit(&ssh->lock); + } } static void vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) { + spa_t *spa = zio->io_spa; + spa_stats_history_t *ssh = &spa->spa_stats.io_history; + avl_remove(&vq->vq_deadline_tree, zio); avl_remove(zio->io_vdev_tree, zio); + + if (ssh->kstat != NULL) { + mutex_enter(&ssh->lock); + kstat_waitq_exit(ssh->kstat->ks_data); + mutex_exit(&ssh->lock); + } +} + +static void +vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) +{ + spa_t *spa = zio->io_spa; + spa_stats_history_t *ssh = &spa->spa_stats.io_history; + + avl_add(&vq->vq_pending_tree, zio); + + if (ssh->kstat != NULL) { + mutex_enter(&ssh->lock); + kstat_runq_enter(ssh->kstat->ks_data); + mutex_exit(&ssh->lock); + } +} + +static void +vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) +{ + spa_t *spa = zio->io_spa; + spa_stats_history_t *ssh = &spa->spa_stats.io_history; + + avl_remove(&vq->vq_pending_tree, zio); + + if (ssh->kstat != NULL) { + kstat_io_t *ksio = ssh->kstat->ks_data; + + mutex_enter(&ssh->lock); + kstat_runq_exit(ksio); + if (zio->io_type == ZIO_TYPE_READ) { + ksio->reads++; + ksio->nread += zio->io_size; + } else if (zio->io_type == ZIO_TYPE_WRITE) { + ksio->writes++; + ksio->nwritten += zio->io_size; + } + mutex_exit(&ssh->lock); + } } static void @@ -351,7 +410,7 @@ again: zio_execute(dio); } while (dio != lio); - avl_add(&vq->vq_pending_tree, aio); + vdev_queue_pending_add(vq, aio); list_remove(&vq->vq_io_list, vi); return (aio); @@ -374,7 +433,7 @@ again: goto again; } - avl_add(&vq->vq_pending_tree, fio); + vdev_queue_pending_add(vq, fio); return (fio); } @@ -431,7 +490,7 @@ vdev_queue_io_done(zio_t *zio) mutex_enter(&vq->vq_lock); - avl_remove(&vq->vq_pending_tree, zio); + vdev_queue_pending_remove(vq, zio); zio->io_delta = gethrtime() - zio->io_timestamp; vq->vq_io_complete_ts = gethrtime();