Add txgs-<pool> kstat file

Create a kstat file which contains useful statistics about the
last N txgs processed.  This can be helpful when analyzing pool
performance.  The new KSTAT_TYPE_TXG type was added for this
purpose and it tracks the following statistics per-txg.

  txg          - Unique txg number
  state        - State (O)pen/(Q)uiescing/(S)yncing/(C)ommitted
  birth;       - Creation time
  nread        - Bytes read
  nwritten;    - Bytes written
  reads        - IOPs read
  writes       - IOPs write
  open_time;   - Length in nanoseconds the txg was open
  quiesce_time - Length in nanoseconds the txg was quiescing
  sync_time;   - Length in nanoseconds the txg was syncing

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2012-10-23 13:48:22 -07:00
parent e8fd45a0f9
commit e95853a331
4 changed files with 231 additions and 1 deletions

View File

@ -65,6 +65,13 @@ typedef struct zfs_all_blkstats {
zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1]; zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
} zfs_all_blkstats_t; } zfs_all_blkstats_t;
typedef struct txg_history {
kstat_txg_t th_kstat;
vdev_stat_t th_vs1;
vdev_stat_t th_vs2;
kmutex_t th_lock;
list_node_t th_link;
} txg_history_t;
typedef struct dsl_pool { typedef struct dsl_pool {
/* Immutable */ /* Immutable */
@ -76,6 +83,7 @@ typedef struct dsl_pool {
struct dsl_dataset *dp_origin_snap; struct dsl_dataset *dp_origin_snap;
uint64_t dp_root_dir_obj; uint64_t dp_root_dir_obj;
struct taskq *dp_iput_taskq; struct taskq *dp_iput_taskq;
kstat_t *dp_txg_kstat;
/* No lock needed - sync context only */ /* No lock needed - sync context only */
blkptr_t dp_meta_rootbp; blkptr_t dp_meta_rootbp;
@ -92,6 +100,9 @@ typedef struct dsl_pool {
kmutex_t dp_lock; kmutex_t dp_lock;
uint64_t dp_space_towrite[TXG_SIZE]; uint64_t dp_space_towrite[TXG_SIZE];
uint64_t dp_tempreserved[TXG_SIZE]; uint64_t dp_tempreserved[TXG_SIZE];
uint64_t dp_txg_history_size;
list_t dp_txg_history;
/* Has its own locking */ /* Has its own locking */
tx_state_t dp_tx; tx_state_t dp_tx;
@ -144,6 +155,10 @@ extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp); extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **); int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
txg_history_t *dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg);
txg_history_t *dsl_pool_txg_history_get(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_txg_history_put(txg_history_t *th);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -228,8 +228,10 @@ typedef struct kstat32 {
/* ks_ndata == 1 */ /* ks_ndata == 1 */
#define KSTAT_TYPE_TIMER 4 /* event timer */ #define KSTAT_TYPE_TIMER 4 /* event timer */
/* ks_ndata >= 1 */ /* ks_ndata >= 1 */
#define KSTAT_TYPE_TXG 5 /* txg statistics */
/* ks_ndata >= 0 */
#define KSTAT_NUM_TYPES 5 #define KSTAT_NUM_TYPES 6
/* /*
* kstat class * kstat class
@ -698,6 +700,29 @@ typedef struct kstat_timer {
#define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data) #define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data)
/*
* TXG statistics - bytes read/written and iops performed
*/
typedef enum kstat_txg_state {
TXG_STATE_OPEN = 1,
TXG_STATE_QUIESCING = 2,
TXG_STATE_SYNCING = 3,
TXG_STATE_COMMITTED = 4,
} kstat_txg_state_t;
typedef struct kstat_txg {
u_longlong_t txg; /* txg id */
kstat_txg_state_t state; /* txg state */
hrtime_t birth; /* birth time stamp */
u_longlong_t nread; /* number of bytes read */
u_longlong_t nwritten; /* number of bytes written */
uint_t reads; /* number of read operations */
uint_t writes; /* number of write operations */
hrtime_t open_time; /* open time */
hrtime_t quiesce_time; /* quiesce time */
hrtime_t sync_time; /* sync time */
} kstat_txg_t;
#if defined(_KERNEL) #if defined(_KERNEL)
#include <sys/t_lock.h> #include <sys/t_lock.h>

View File

@ -44,6 +44,7 @@
int zfs_no_write_throttle = 0; int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
int zfs_txg_synctime_ms = 1000; /* target millisecs to sync a txg */ int zfs_txg_synctime_ms = 1000; /* target millisecs to sync a txg */
int zfs_txg_history = 60; /* statistics for the last N txgs */
unsigned long zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */ unsigned long zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */
unsigned long zfs_write_limit_max = 0; /* max data payload per txg */ unsigned long zfs_write_limit_max = 0; /* max data payload per txg */
@ -54,6 +55,143 @@ kmutex_t zfs_write_limit_lock;
static pgcnt_t old_physmem = 0; static pgcnt_t old_physmem = 0;
static int
dsl_pool_txg_history_update(kstat_t *ksp, int rw)
{
dsl_pool_t *dp = ksp->ks_private;
txg_history_t *th;
int i = 0;
if (rw == KSTAT_WRITE)
return (EACCES);
if (ksp->ks_data)
kmem_free(ksp->ks_data, ksp->ks_data_size);
mutex_enter(&dp->dp_lock);
ksp->ks_ndata = dp->dp_txg_history_size;
ksp->ks_data_size = dp->dp_txg_history_size * sizeof(kstat_txg_t);
if (ksp->ks_data_size > 0)
ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_PUSHPAGE);
/* Traversed oldest to youngest for the most readable kstat output */
for (th = list_tail(&dp->dp_txg_history); th != NULL;
th = list_prev(&dp->dp_txg_history, th)) {
mutex_enter(&th->th_lock);
ASSERT3S(i + sizeof(kstat_txg_t), <=, ksp->ks_data_size);
memcpy(ksp->ks_data + i, &th->th_kstat, sizeof(kstat_txg_t));
i += sizeof(kstat_txg_t);
mutex_exit(&th->th_lock);
}
mutex_exit(&dp->dp_lock);
return (0);
}
static void
dsl_pool_txg_history_init(dsl_pool_t *dp, uint64_t txg)
{
char name[KSTAT_STRLEN];
list_create(&dp->dp_txg_history, sizeof (txg_history_t),
offsetof(txg_history_t, th_link));
dsl_pool_txg_history_add(dp, txg);
(void) snprintf(name, KSTAT_STRLEN, "txgs-%s", spa_name(dp->dp_spa));
dp->dp_txg_kstat = kstat_create("zfs", 0, name, "misc",
KSTAT_TYPE_TXG, 0, KSTAT_FLAG_VIRTUAL);
if (dp->dp_txg_kstat) {
dp->dp_txg_kstat->ks_data = NULL;
dp->dp_txg_kstat->ks_private = dp;
dp->dp_txg_kstat->ks_update = dsl_pool_txg_history_update;
kstat_install(dp->dp_txg_kstat);
}
}
static void
dsl_pool_txg_history_destroy(dsl_pool_t *dp)
{
txg_history_t *th;
if (dp->dp_txg_kstat) {
if (dp->dp_txg_kstat->ks_data)
kmem_free(dp->dp_txg_kstat->ks_data,
dp->dp_txg_kstat->ks_data_size);
kstat_delete(dp->dp_txg_kstat);
}
mutex_enter(&dp->dp_lock);
while ((th = list_remove_head(&dp->dp_txg_history))) {
dp->dp_txg_history_size--;
mutex_destroy(&th->th_lock);
kmem_free(th, sizeof(txg_history_t));
}
ASSERT3U(dp->dp_txg_history_size, ==, 0);
list_destroy(&dp->dp_txg_history);
mutex_exit(&dp->dp_lock);
}
txg_history_t *
dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg)
{
txg_history_t *th, *rm;
th = kmem_zalloc(sizeof(txg_history_t), KM_SLEEP);
mutex_init(&th->th_lock, NULL, MUTEX_DEFAULT, NULL);
th->th_kstat.txg = txg;
th->th_kstat.state = TXG_STATE_OPEN;
th->th_kstat.birth = gethrtime();
mutex_enter(&dp->dp_lock);
list_insert_head(&dp->dp_txg_history, th);
dp->dp_txg_history_size++;
while (dp->dp_txg_history_size > zfs_txg_history) {
dp->dp_txg_history_size--;
rm = list_remove_tail(&dp->dp_txg_history);
mutex_destroy(&rm->th_lock);
kmem_free(rm, sizeof(txg_history_t));
}
mutex_exit(&dp->dp_lock);
return (th);
}
/*
* Traversed youngest to oldest because lookups are only done for open
* or syncing txgs which are guaranteed to be at the head of the list.
* The txg_history_t structure will be returned locked.
*/
txg_history_t *
dsl_pool_txg_history_get(dsl_pool_t *dp, uint64_t txg)
{
txg_history_t *th;
mutex_enter(&dp->dp_lock);
for (th = list_head(&dp->dp_txg_history); th != NULL;
th = list_next(&dp->dp_txg_history, th)) {
if (th->th_kstat.txg == txg) {
mutex_enter(&th->th_lock);
break;
}
}
mutex_exit(&dp->dp_lock);
return (th);
}
void
dsl_pool_txg_history_put(txg_history_t *th)
{
mutex_exit(&th->th_lock);
}
int int
dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
{ {
@ -96,6 +234,8 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri, dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri,
1, 4, 0); 1, 4, 0);
dsl_pool_txg_history_init(dp, txg);
return (dp); return (dp);
} }
@ -213,6 +353,7 @@ dsl_pool_close(dsl_pool_t *dp)
arc_flush(dp->dp_spa); arc_flush(dp->dp_spa);
txg_fini(dp); txg_fini(dp);
dsl_scan_fini(dp); dsl_scan_fini(dp);
dsl_pool_txg_history_destroy(dp);
rw_destroy(&dp->dp_config_rwlock); rw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock); mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_iput_taskq); taskq_destroy(dp->dp_iput_taskq);
@ -864,6 +1005,9 @@ MODULE_PARM_DESC(zfs_write_limit_shift, "log2(fraction of memory) per txg");
module_param(zfs_txg_synctime_ms, int, 0644); module_param(zfs_txg_synctime_ms, int, 0644);
MODULE_PARM_DESC(zfs_txg_synctime_ms, "Target milliseconds between txg sync"); MODULE_PARM_DESC(zfs_txg_synctime_ms, "Target milliseconds between txg sync");
module_param(zfs_txg_history, int, 0644);
MODULE_PARM_DESC(zfs_txg_history, "Historic statistics for the last N txgs");
module_param(zfs_write_limit_min, ulong, 0444); module_param(zfs_write_limit_min, ulong, 0444);
MODULE_PARM_DESC(zfs_write_limit_min, "Min txg write limit"); MODULE_PARM_DESC(zfs_write_limit_min, "Min txg write limit");

View File

@ -29,6 +29,7 @@
#include <sys/dsl_pool.h> #include <sys/dsl_pool.h>
#include <sys/dsl_scan.h> #include <sys/dsl_scan.h>
#include <sys/callb.h> #include <sys/callb.h>
#include <sys/spa_impl.h>
/* /*
* Pool-wide transaction groups. * Pool-wide transaction groups.
@ -279,6 +280,8 @@ txg_rele_to_sync(txg_handle_t *th)
static void static void
txg_quiesce(dsl_pool_t *dp, uint64_t txg) txg_quiesce(dsl_pool_t *dp, uint64_t txg)
{ {
hrtime_t start;
txg_history_t *th;
tx_state_t *tx = &dp->dp_tx; tx_state_t *tx = &dp->dp_tx;
int g = txg & TXG_MASK; int g = txg & TXG_MASK;
int c; int c;
@ -292,6 +295,15 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
ASSERT(txg == tx->tx_open_txg); ASSERT(txg == tx->tx_open_txg);
tx->tx_open_txg++; tx->tx_open_txg++;
/*
* Measure how long the txg was open and replace the kstat.
*/
th = dsl_pool_txg_history_get(dp, txg);
th->th_kstat.open_time = gethrtime() - th->th_kstat.birth;
th->th_kstat.state = TXG_STATE_QUIESCING;
dsl_pool_txg_history_put(th);
dsl_pool_txg_history_add(dp, tx->tx_open_txg);
/* /*
* Now that we've incremented tx_open_txg, we can let threads * Now that we've incremented tx_open_txg, we can let threads
* enter the next transaction group. * enter the next transaction group.
@ -302,6 +314,8 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
/* /*
* Quiesce the transaction group by waiting for everyone to txg_exit(). * Quiesce the transaction group by waiting for everyone to txg_exit().
*/ */
start = gethrtime();
for (c = 0; c < max_ncpus; c++) { for (c = 0; c < max_ncpus; c++) {
tx_cpu_t *tc = &tx->tx_cpu[c]; tx_cpu_t *tc = &tx->tx_cpu[c];
mutex_enter(&tc->tc_lock); mutex_enter(&tc->tc_lock);
@ -309,6 +323,13 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
cv_wait(&tc->tc_cv[g], &tc->tc_lock); cv_wait(&tc->tc_cv[g], &tc->tc_lock);
mutex_exit(&tc->tc_lock); mutex_exit(&tc->tc_lock);
} }
/*
* Measure how long the txg took to quiesce.
*/
th = dsl_pool_txg_history_get(dp, txg);
th->th_kstat.quiesce_time = gethrtime() - start;
dsl_pool_txg_history_put(th);
} }
static void static void
@ -395,6 +416,8 @@ txg_sync_thread(dsl_pool_t *dp)
start = delta = 0; start = delta = 0;
for (;;) { for (;;) {
hrtime_t hrstart;
txg_history_t *th;
uint64_t timer, timeout; uint64_t timer, timeout;
uint64_t txg; uint64_t txg;
@ -441,11 +464,17 @@ txg_sync_thread(dsl_pool_t *dp)
tx->tx_syncing_txg = txg; tx->tx_syncing_txg = txg;
cv_broadcast(&tx->tx_quiesce_more_cv); cv_broadcast(&tx->tx_quiesce_more_cv);
th = dsl_pool_txg_history_get(dp, txg);
th->th_kstat.state = TXG_STATE_SYNCING;
vdev_get_stats(spa->spa_root_vdev, &th->th_vs1);
dsl_pool_txg_history_put(th);
dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
mutex_exit(&tx->tx_sync_lock); mutex_exit(&tx->tx_sync_lock);
start = ddi_get_lbolt(); start = ddi_get_lbolt();
hrstart = gethrtime();
spa_sync(spa, txg); spa_sync(spa, txg);
delta = ddi_get_lbolt() - start; delta = ddi_get_lbolt() - start;
@ -458,6 +487,23 @@ txg_sync_thread(dsl_pool_t *dp)
* Dispatch commit callbacks to worker threads. * Dispatch commit callbacks to worker threads.
*/ */
txg_dispatch_callbacks(dp, txg); txg_dispatch_callbacks(dp, txg);
/*
* Measure the txg sync time determine the amount of I/O done.
*/
th = dsl_pool_txg_history_get(dp, txg);
vdev_get_stats(spa->spa_root_vdev, &th->th_vs2);
th->th_kstat.sync_time = gethrtime() - hrstart;
th->th_kstat.nread = th->th_vs2.vs_bytes[ZIO_TYPE_READ] -
th->th_vs1.vs_bytes[ZIO_TYPE_READ];
th->th_kstat.nwritten = th->th_vs2.vs_bytes[ZIO_TYPE_WRITE] -
th->th_vs1.vs_bytes[ZIO_TYPE_WRITE];
th->th_kstat.reads = th->th_vs2.vs_ops[ZIO_TYPE_READ] -
th->th_vs1.vs_ops[ZIO_TYPE_READ];
th->th_kstat.writes = th->th_vs2.vs_ops[ZIO_TYPE_WRITE] -
th->th_vs1.vs_ops[ZIO_TYPE_WRITE];
th->th_kstat.state = TXG_STATE_COMMITTED;
dsl_pool_txg_history_put(th);
} }
} }