Add support for per dataset zil stats and use wmsum counters

ZIL kstats are reported in an inclusive way, i.e., same counters are
shared to capture all the activities happening in zil. Added support
to report zil stats for every datset individually by combining them
with already exposed dataset kstats.

Wmsum uses per cpu counters and provide less overhead as compared
to atomic operations. Updated zil kstats to replace wmsum counters
to avoid atomic operations.

Reviewed-by: Christian Schwarz <christian.schwarz@nutanix.com>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #13636
This commit is contained in:
ixhamza 2022-07-21 05:14:06 +05:00 committed by GitHub
parent 33dba8c792
commit fb087146de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 231 additions and 61 deletions

View File

@ -2908,7 +2908,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
zil_close(zd->zd_zilog); zil_close(zd->zd_zilog);
/* zfsvfs_setup() */ /* zfsvfs_setup() */
VERIFY3P(zil_open(os, ztest_get_data), ==, zd->zd_zilog); VERIFY3P(zil_open(os, ztest_get_data, NULL), ==, zd->zd_zilog);
zil_replay(os, zd, ztest_replay_vector); zil_replay(os, zd, ztest_replay_vector);
(void) pthread_rwlock_unlock(&zd->zd_zilog_lock); (void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
@ -4378,7 +4378,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/* /*
* Open the intent log for it. * Open the intent log for it.
*/ */
zilog = zil_open(os, ztest_get_data); zilog = zil_open(os, ztest_get_data, NULL);
/* /*
* Put some objects in there, do a little I/O to them, * Put some objects in there, do a little I/O to them,
@ -7304,7 +7304,7 @@ ztest_dataset_open(int d)
zilog->zl_parse_lr_count, zilog->zl_parse_lr_count,
zilog->zl_replaying_seq); zilog->zl_replaying_seq);
zilog = zil_open(os, ztest_get_data); zilog = zil_open(os, ztest_get_data, NULL);
if (zilog->zl_replaying_seq != 0 && if (zilog->zl_replaying_seq != 0 &&
zilog->zl_replaying_seq < committed_seq) zilog->zl_replaying_seq < committed_seq)

View File

@ -30,6 +30,7 @@
#include <sys/wmsum.h> #include <sys/wmsum.h>
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/kstat.h> #include <sys/kstat.h>
#include <sys/zil.h>
typedef struct dataset_sum_stats_t { typedef struct dataset_sum_stats_t {
wmsum_t dss_writes; wmsum_t dss_writes;
@ -56,14 +57,19 @@ typedef struct dataset_kstat_values {
* entry is removed from the unlinked set * entry is removed from the unlinked set
*/ */
kstat_named_t dkv_nunlinked; kstat_named_t dkv_nunlinked;
/*
* Per dataset zil kstats
*/
zil_kstat_values_t dkv_zil_stats;
} dataset_kstat_values_t; } dataset_kstat_values_t;
typedef struct dataset_kstats { typedef struct dataset_kstats {
dataset_sum_stats_t dk_sums; dataset_sum_stats_t dk_sums;
zil_sums_t dk_zil_sums;
kstat_t *dk_kstats; kstat_t *dk_kstats;
} dataset_kstats_t; } dataset_kstats_t;
void dataset_kstats_create(dataset_kstats_t *, objset_t *); int dataset_kstats_create(dataset_kstats_t *, objset_t *);
void dataset_kstats_destroy(dataset_kstats_t *); void dataset_kstats_destroy(dataset_kstats_t *);
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t); void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);

View File

@ -33,6 +33,7 @@
#include <sys/zio.h> #include <sys/zio.h>
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/zio_crypt.h> #include <sys/zio_crypt.h>
#include <sys/wmsum.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -472,12 +473,34 @@ typedef struct zil_stats {
*/ */
kstat_named_t zil_itx_metaslab_slog_count; kstat_named_t zil_itx_metaslab_slog_count;
kstat_named_t zil_itx_metaslab_slog_bytes; kstat_named_t zil_itx_metaslab_slog_bytes;
} zil_stats_t; } zil_kstat_values_t;
#define ZIL_STAT_INCR(stat, val) \ typedef struct zil_sums {
atomic_add_64(&zil_stats.stat.value.ui64, (val)); wmsum_t zil_commit_count;
#define ZIL_STAT_BUMP(stat) \ wmsum_t zil_commit_writer_count;
ZIL_STAT_INCR(stat, 1); wmsum_t zil_itx_count;
wmsum_t zil_itx_indirect_count;
wmsum_t zil_itx_indirect_bytes;
wmsum_t zil_itx_copied_count;
wmsum_t zil_itx_copied_bytes;
wmsum_t zil_itx_needcopy_count;
wmsum_t zil_itx_needcopy_bytes;
wmsum_t zil_itx_metaslab_normal_count;
wmsum_t zil_itx_metaslab_normal_bytes;
wmsum_t zil_itx_metaslab_slog_count;
wmsum_t zil_itx_metaslab_slog_bytes;
} zil_sums_t;
#define ZIL_STAT_INCR(zil, stat, val) \
do { \
int64_t tmpval = (val); \
wmsum_add(&(zil_sums_global.stat), tmpval); \
if ((zil)->zl_sums) \
wmsum_add(&((zil)->zl_sums->stat), tmpval); \
} while (0)
#define ZIL_STAT_BUMP(zil, stat) \
ZIL_STAT_INCR(zil, stat, 1);
typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg, typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
uint64_t txg); uint64_t txg);
@ -497,7 +520,8 @@ extern void zil_fini(void);
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys); extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
extern void zil_free(zilog_t *zilog); extern void zil_free(zilog_t *zilog);
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data); extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data,
zil_sums_t *zil_sums);
extern void zil_close(zilog_t *zilog); extern void zil_close(zilog_t *zilog);
extern void zil_replay(objset_t *os, void *arg, extern void zil_replay(objset_t *os, void *arg,
@ -537,6 +561,11 @@ extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval);
extern uint64_t zil_max_copied_data(zilog_t *zilog); extern uint64_t zil_max_copied_data(zilog_t *zilog);
extern uint64_t zil_max_log_data(zilog_t *zilog); extern uint64_t zil_max_log_data(zilog_t *zilog);
extern void zil_sums_init(zil_sums_t *zs);
extern void zil_sums_fini(zil_sums_t *zs);
extern void zil_kstat_values_update(zil_kstat_values_t *zs,
zil_sums_t *zil_sums);
extern int zil_replay_disable; extern int zil_replay_disable;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -222,6 +222,9 @@ struct zilog {
* (see zil_max_copied_data()). * (see zil_max_copied_data()).
*/ */
uint64_t zl_max_block_size; uint64_t zl_max_block_size;
/* Pointer for per dataset zil sums */
zil_sums_t *zl_sums;
}; };
typedef struct zil_bp_node { typedef struct zil_bp_node {

View File

@ -1027,8 +1027,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
if (error) if (error)
return (error); return (error);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
/* /*
* If we are not mounting (ie: online recv), then we don't * If we are not mounting (ie: online recv), then we don't
* have to worry about replaying the log as we blocked all * have to worry about replaying the log as we blocked all
@ -1038,7 +1036,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
boolean_t readonly; boolean_t readonly;
ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
if (error)
return (error);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
&zfsvfs->z_kstat.dk_zil_sums);
/* /*
* During replay we remove the read only flag to * During replay we remove the read only flag to
@ -1109,6 +1111,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/* restore readonly bit */ /* restore readonly bit */
if (readonly != 0) if (readonly != 0)
zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
} else {
ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
&zfsvfs->z_kstat.dk_zil_sums);
} }
/* /*

View File

@ -1189,7 +1189,7 @@ zvol_ensure_zilog(zvol_state_t *zv)
} }
if (zv->zv_zilog == NULL) { if (zv->zv_zilog == NULL) {
zv->zv_zilog = zil_open(zv->zv_objset, zv->zv_zilog = zil_open(zv->zv_objset,
zvol_get_data); zvol_get_data, &zv->zv_kstat.dk_zil_sums);
zv->zv_flags |= ZVOL_WRITTEN_TO; zv->zv_flags |= ZVOL_WRITTEN_TO;
/* replay / destroy done in zvol_os_create_minor() */ /* replay / destroy done in zvol_os_create_minor() */
VERIFY0(zv->zv_zilog->zl_header->zh_flags & VERIFY0(zv->zv_zilog->zl_header->zh_flags &
@ -1422,8 +1422,12 @@ zvol_os_create_minor(const char *name)
zv->zv_volsize = volsize; zv->zv_volsize = volsize;
zv->zv_objset = os; zv->zv_objset = os;
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
if (error)
goto out_dmu_objset_disown;
ASSERT3P(zv->zv_zilog, ==, NULL); ASSERT3P(zv->zv_zilog, ==, NULL);
zv->zv_zilog = zil_open(os, zvol_get_data); zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
if (spa_writeable(dmu_objset_spa(os))) { if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable) if (zil_replay_disable)
zil_destroy(zv->zv_zilog, B_FALSE); zil_destroy(zv->zv_zilog, B_FALSE);
@ -1432,8 +1436,6 @@ zvol_os_create_minor(const char *name)
} }
zil_close(zv->zv_zilog); zil_close(zv->zv_zilog);
zv->zv_zilog = NULL; zv->zv_zilog = NULL;
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
/* TODO: prefetch for geom tasting */ /* TODO: prefetch for geom tasting */

View File

@ -848,8 +848,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
if (error) if (error)
return (error); return (error);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
/* /*
* If we are not mounting (ie: online recv), then we don't * If we are not mounting (ie: online recv), then we don't
* have to worry about replaying the log as we blocked all * have to worry about replaying the log as we blocked all
@ -857,7 +855,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
*/ */
if (mounting) { if (mounting) {
ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
if (error)
return (error);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
&zfsvfs->z_kstat.dk_zil_sums);
/* /*
* During replay we remove the read only flag to * During replay we remove the read only flag to
@ -921,6 +923,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/* restore readonly bit */ /* restore readonly bit */
if (readonly != 0) if (readonly != 0)
readonly_changed_cb(zfsvfs, B_TRUE); readonly_changed_cb(zfsvfs, B_TRUE);
} else {
ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
&zfsvfs->z_kstat.dk_zil_sums);
} }
/* /*

View File

@ -558,7 +558,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
rw_enter(&zv->zv_suspend_lock, RW_WRITER); rw_enter(&zv->zv_suspend_lock, RW_WRITER);
if (zv->zv_zilog == NULL) { if (zv->zv_zilog == NULL) {
zv->zv_zilog = zil_open(zv->zv_objset, zv->zv_zilog = zil_open(zv->zv_objset,
zvol_get_data); zvol_get_data, &zv->zv_kstat.dk_zil_sums);
zv->zv_flags |= ZVOL_WRITTEN_TO; zv->zv_flags |= ZVOL_WRITTEN_TO;
/* replay / destroy done in zvol_create_minor */ /* replay / destroy done in zvol_create_minor */
VERIFY0((zv->zv_zilog->zl_header->zh_flags & VERIFY0((zv->zv_zilog->zl_header->zh_flags &
@ -1408,8 +1408,12 @@ zvol_os_create_minor(const char *name)
blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue); blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue);
#endif #endif
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
if (error)
goto out_dmu_objset_disown;
ASSERT3P(zv->zv_zilog, ==, NULL); ASSERT3P(zv->zv_zilog, ==, NULL);
zv->zv_zilog = zil_open(os, zvol_get_data); zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
if (spa_writeable(dmu_objset_spa(os))) { if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable) if (zil_replay_disable)
zil_destroy(zv->zv_zilog, B_FALSE); zil_destroy(zv->zv_zilog, B_FALSE);
@ -1418,8 +1422,6 @@ zvol_os_create_minor(const char *name)
} }
zil_close(zv->zv_zilog); zil_close(zv->zv_zilog);
zv->zv_zilog = NULL; zv->zv_zilog = NULL;
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
/* /*
* When udev detects the addition of the device it will immediately * When udev detects the addition of the device it will immediately

View File

@ -37,18 +37,33 @@ static dataset_kstat_values_t empty_dataset_kstats = {
{ "nread", KSTAT_DATA_UINT64 }, { "nread", KSTAT_DATA_UINT64 },
{ "nunlinks", KSTAT_DATA_UINT64 }, { "nunlinks", KSTAT_DATA_UINT64 },
{ "nunlinked", KSTAT_DATA_UINT64 }, { "nunlinked", KSTAT_DATA_UINT64 },
{
{ "zil_commit_count", KSTAT_DATA_UINT64 },
{ "zil_commit_writer_count", KSTAT_DATA_UINT64 },
{ "zil_itx_count", KSTAT_DATA_UINT64 },
{ "zil_itx_indirect_count", KSTAT_DATA_UINT64 },
{ "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_copied_count", KSTAT_DATA_UINT64 },
{ "zil_itx_copied_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_needcopy_count", KSTAT_DATA_UINT64 },
{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 }
}
}; };
static int static int
dataset_kstats_update(kstat_t *ksp, int rw) dataset_kstats_update(kstat_t *ksp, int rw)
{ {
dataset_kstats_t *dk = ksp->ks_private; dataset_kstats_t *dk = ksp->ks_private;
ASSERT3P(dk->dk_kstats->ks_data, ==, ksp->ks_data); dataset_kstat_values_t *dkv = ksp->ks_data;
ASSERT3P(dk->dk_kstats->ks_data, ==, dkv);
if (rw == KSTAT_WRITE) if (rw == KSTAT_WRITE)
return (EACCES); return (EACCES);
dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
dkv->dkv_writes.value.ui64 = dkv->dkv_writes.value.ui64 =
wmsum_value(&dk->dk_sums.dss_writes); wmsum_value(&dk->dk_sums.dss_writes);
dkv->dkv_nwritten.value.ui64 = dkv->dkv_nwritten.value.ui64 =
@ -62,10 +77,12 @@ dataset_kstats_update(kstat_t *ksp, int rw)
dkv->dkv_nunlinked.value.ui64 = dkv->dkv_nunlinked.value.ui64 =
wmsum_value(&dk->dk_sums.dss_nunlinked); wmsum_value(&dk->dk_sums.dss_nunlinked);
zil_kstat_values_update(&dkv->dkv_zil_stats, &dk->dk_zil_sums);
return (0); return (0);
} }
void int
dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
{ {
/* /*
@ -75,7 +92,7 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
* a filesystem with many snapshots, we skip them for now. * a filesystem with many snapshots, we skip them for now.
*/ */
if (dmu_objset_is_snapshot(objset)) if (dmu_objset_is_snapshot(objset))
return; return (0);
/* /*
* At the time of this writing, KSTAT_STRLEN is 255 in Linux, * At the time of this writing, KSTAT_STRLEN is 255 in Linux,
@ -94,13 +111,13 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
zfs_dbgmsg("failed to create dataset kstat for objset %lld: " zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
" snprintf() for kstat module name returned %d", " snprintf() for kstat module name returned %d",
(unsigned long long)dmu_objset_id(objset), n); (unsigned long long)dmu_objset_id(objset), n);
return; return (SET_ERROR(EINVAL));
} else if (n >= KSTAT_STRLEN) { } else if (n >= KSTAT_STRLEN) {
zfs_dbgmsg("failed to create dataset kstat for objset %lld: " zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
"kstat module name length (%d) exceeds limit (%d)", "kstat module name length (%d) exceeds limit (%d)",
(unsigned long long)dmu_objset_id(objset), (unsigned long long)dmu_objset_id(objset),
n, KSTAT_STRLEN); n, KSTAT_STRLEN);
return; return (SET_ERROR(ENAMETOOLONG));
} }
char kstat_name[KSTAT_STRLEN]; char kstat_name[KSTAT_STRLEN];
@ -110,7 +127,7 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
zfs_dbgmsg("failed to create dataset kstat for objset %lld: " zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
" snprintf() for kstat name returned %d", " snprintf() for kstat name returned %d",
(unsigned long long)dmu_objset_id(objset), n); (unsigned long long)dmu_objset_id(objset), n);
return; return (SET_ERROR(EINVAL));
} }
ASSERT3U(n, <, KSTAT_STRLEN); ASSERT3U(n, <, KSTAT_STRLEN);
@ -119,7 +136,7 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
sizeof (empty_dataset_kstats) / sizeof (kstat_named_t), sizeof (empty_dataset_kstats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL); KSTAT_FLAG_VIRTUAL);
if (kstat == NULL) if (kstat == NULL)
return; return (SET_ERROR(ENOMEM));
dataset_kstat_values_t *dk_kstats = dataset_kstat_values_t *dk_kstats =
kmem_alloc(sizeof (empty_dataset_kstats), KM_SLEEP); kmem_alloc(sizeof (empty_dataset_kstats), KM_SLEEP);
@ -137,15 +154,17 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
kstat->ks_private = dk; kstat->ks_private = dk;
kstat->ks_data_size += ZFS_MAX_DATASET_NAME_LEN; kstat->ks_data_size += ZFS_MAX_DATASET_NAME_LEN;
kstat_install(kstat);
dk->dk_kstats = kstat;
wmsum_init(&dk->dk_sums.dss_writes, 0); wmsum_init(&dk->dk_sums.dss_writes, 0);
wmsum_init(&dk->dk_sums.dss_nwritten, 0); wmsum_init(&dk->dk_sums.dss_nwritten, 0);
wmsum_init(&dk->dk_sums.dss_reads, 0); wmsum_init(&dk->dk_sums.dss_reads, 0);
wmsum_init(&dk->dk_sums.dss_nread, 0); wmsum_init(&dk->dk_sums.dss_nread, 0);
wmsum_init(&dk->dk_sums.dss_nunlinks, 0); wmsum_init(&dk->dk_sums.dss_nunlinks, 0);
wmsum_init(&dk->dk_sums.dss_nunlinked, 0); wmsum_init(&dk->dk_sums.dss_nunlinked, 0);
zil_sums_init(&dk->dk_zil_sums);
dk->dk_kstats = kstat;
kstat_install(kstat);
return (0);
} }
void void
@ -155,19 +174,19 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
return; return;
dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data; dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
kstat_delete(dk->dk_kstats);
dk->dk_kstats = NULL;
kmem_free(KSTAT_NAMED_STR_PTR(&dkv->dkv_ds_name), kmem_free(KSTAT_NAMED_STR_PTR(&dkv->dkv_ds_name),
KSTAT_NAMED_STR_BUFLEN(&dkv->dkv_ds_name)); KSTAT_NAMED_STR_BUFLEN(&dkv->dkv_ds_name));
kmem_free(dkv, sizeof (empty_dataset_kstats)); kmem_free(dkv, sizeof (empty_dataset_kstats));
kstat_delete(dk->dk_kstats);
dk->dk_kstats = NULL;
wmsum_fini(&dk->dk_sums.dss_writes); wmsum_fini(&dk->dk_sums.dss_writes);
wmsum_fini(&dk->dk_sums.dss_nwritten); wmsum_fini(&dk->dk_sums.dss_nwritten);
wmsum_fini(&dk->dk_sums.dss_reads); wmsum_fini(&dk->dk_sums.dss_reads);
wmsum_fini(&dk->dk_sums.dss_nread); wmsum_fini(&dk->dk_sums.dss_nread);
wmsum_fini(&dk->dk_sums.dss_nunlinks); wmsum_fini(&dk->dk_sums.dss_nunlinks);
wmsum_fini(&dk->dk_sums.dss_nunlinked); wmsum_fini(&dk->dk_sums.dss_nunlinked);
zil_sums_fini(&dk->dk_zil_sums);
} }
void void

View File

@ -43,6 +43,7 @@
#include <sys/metaslab.h> #include <sys/metaslab.h>
#include <sys/trace_zfs.h> #include <sys/trace_zfs.h>
#include <sys/abd.h> #include <sys/abd.h>
#include <sys/wmsum.h>
/* /*
* The ZFS Intent Log (ZIL) saves "transaction records" (itxs) of system * The ZFS Intent Log (ZIL) saves "transaction records" (itxs) of system
@ -94,7 +95,7 @@ static int zfs_commit_timeout_pct = 5;
/* /*
* See zil.h for more information about these fields. * See zil.h for more information about these fields.
*/ */
static zil_stats_t zil_stats = { static zil_kstat_values_t zil_stats = {
{ "zil_commit_count", KSTAT_DATA_UINT64 }, { "zil_commit_count", KSTAT_DATA_UINT64 },
{ "zil_commit_writer_count", KSTAT_DATA_UINT64 }, { "zil_commit_writer_count", KSTAT_DATA_UINT64 },
{ "zil_itx_count", KSTAT_DATA_UINT64 }, { "zil_itx_count", KSTAT_DATA_UINT64 },
@ -110,7 +111,8 @@ static zil_stats_t zil_stats = {
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 }, { "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
}; };
static kstat_t *zil_ksp; static zil_sums_t zil_sums_global;
static kstat_t *zil_kstats_global;
/* /*
* Disable intent logging replay. This global ZIL switch affects all pools. * Disable intent logging replay. This global ZIL switch affects all pools.
@ -213,6 +215,21 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
zc->zc_word[ZIL_ZC_SEQ] = 1ULL; zc->zc_word[ZIL_ZC_SEQ] = 1ULL;
} }
static int
zil_kstats_global_update(kstat_t *ksp, int rw)
{
zil_kstat_values_t *zs = ksp->ks_data;
ASSERT3P(&zil_stats, ==, zs);
if (rw == KSTAT_WRITE) {
return (SET_ERROR(EACCES));
}
zil_kstat_values_update(zs, &zil_sums_global);
return (0);
}
/* /*
* Read a log block and make sure it's valid. * Read a log block and make sure it's valid.
*/ */
@ -337,6 +354,73 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
return (error); return (error);
} }
void
zil_sums_init(zil_sums_t *zs)
{
wmsum_init(&zs->zil_commit_count, 0);
wmsum_init(&zs->zil_commit_writer_count, 0);
wmsum_init(&zs->zil_itx_count, 0);
wmsum_init(&zs->zil_itx_indirect_count, 0);
wmsum_init(&zs->zil_itx_indirect_bytes, 0);
wmsum_init(&zs->zil_itx_copied_count, 0);
wmsum_init(&zs->zil_itx_copied_bytes, 0);
wmsum_init(&zs->zil_itx_needcopy_count, 0);
wmsum_init(&zs->zil_itx_needcopy_bytes, 0);
wmsum_init(&zs->zil_itx_metaslab_normal_count, 0);
wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0);
wmsum_init(&zs->zil_itx_metaslab_slog_count, 0);
wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0);
}
void
zil_sums_fini(zil_sums_t *zs)
{
wmsum_fini(&zs->zil_commit_count);
wmsum_fini(&zs->zil_commit_writer_count);
wmsum_fini(&zs->zil_itx_count);
wmsum_fini(&zs->zil_itx_indirect_count);
wmsum_fini(&zs->zil_itx_indirect_bytes);
wmsum_fini(&zs->zil_itx_copied_count);
wmsum_fini(&zs->zil_itx_copied_bytes);
wmsum_fini(&zs->zil_itx_needcopy_count);
wmsum_fini(&zs->zil_itx_needcopy_bytes);
wmsum_fini(&zs->zil_itx_metaslab_normal_count);
wmsum_fini(&zs->zil_itx_metaslab_normal_bytes);
wmsum_fini(&zs->zil_itx_metaslab_slog_count);
wmsum_fini(&zs->zil_itx_metaslab_slog_bytes);
}
void
zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
{
zs->zil_commit_count.value.ui64 =
wmsum_value(&zil_sums->zil_commit_count);
zs->zil_commit_writer_count.value.ui64 =
wmsum_value(&zil_sums->zil_commit_writer_count);
zs->zil_itx_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_count);
zs->zil_itx_indirect_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_indirect_count);
zs->zil_itx_indirect_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_indirect_bytes);
zs->zil_itx_copied_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_copied_count);
zs->zil_itx_copied_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_copied_bytes);
zs->zil_itx_needcopy_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_needcopy_count);
zs->zil_itx_needcopy_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_needcopy_bytes);
zs->zil_itx_metaslab_normal_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_normal_count);
zs->zil_itx_metaslab_normal_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes);
zs->zil_itx_metaslab_slog_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_slog_count);
zs->zil_itx_metaslab_slog_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes);
}
/* /*
* Parse the intent log, and call parse_func for each valid record within. * Parse the intent log, and call parse_func for each valid record within.
*/ */
@ -1644,11 +1728,13 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
BP_ZERO(bp); BP_ZERO(bp);
error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog); error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
if (slog) { if (slog) {
ZIL_STAT_BUMP(zil_itx_metaslab_slog_count); ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused); ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
lwb->lwb_nused);
} else { } else {
ZIL_STAT_BUMP(zil_itx_metaslab_normal_count); ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
ZIL_STAT_INCR(zil_itx_metaslab_normal_bytes, lwb->lwb_nused); ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
lwb->lwb_nused);
} }
if (error == 0) { if (error == 0) {
ASSERT3U(bp->blk_birth, ==, txg); ASSERT3U(bp->blk_birth, ==, txg);
@ -1818,7 +1904,7 @@ cont:
lrcb = (lr_t *)lr_buf; /* Like lrc, but inside lwb. */ lrcb = (lr_t *)lr_buf; /* Like lrc, but inside lwb. */
lrwb = (lr_write_t *)lrcb; /* Like lrw, but inside lwb. */ lrwb = (lr_write_t *)lrcb; /* Like lrw, but inside lwb. */
ZIL_STAT_BUMP(zil_itx_count); ZIL_STAT_BUMP(zilog, zil_itx_count);
/* /*
* If it's a write, fetch the data or get its blkptr as appropriate. * If it's a write, fetch the data or get its blkptr as appropriate.
@ -1827,8 +1913,9 @@ cont:
if (txg > spa_freeze_txg(zilog->zl_spa)) if (txg > spa_freeze_txg(zilog->zl_spa))
txg_wait_synced(zilog->zl_dmu_pool, txg); txg_wait_synced(zilog->zl_dmu_pool, txg);
if (itx->itx_wr_state == WR_COPIED) { if (itx->itx_wr_state == WR_COPIED) {
ZIL_STAT_BUMP(zil_itx_copied_count); ZIL_STAT_BUMP(zilog, zil_itx_copied_count);
ZIL_STAT_INCR(zil_itx_copied_bytes, lrw->lr_length); ZIL_STAT_INCR(zilog, zil_itx_copied_bytes,
lrw->lr_length);
} else { } else {
char *dbuf; char *dbuf;
int error; int error;
@ -1840,13 +1927,14 @@ cont:
lrwb->lr_length = dnow; lrwb->lr_length = dnow;
lrw->lr_offset += dnow; lrw->lr_offset += dnow;
lrw->lr_length -= dnow; lrw->lr_length -= dnow;
ZIL_STAT_BUMP(zil_itx_needcopy_count); ZIL_STAT_BUMP(zilog, zil_itx_needcopy_count);
ZIL_STAT_INCR(zil_itx_needcopy_bytes, dnow); ZIL_STAT_INCR(zilog, zil_itx_needcopy_bytes,
dnow);
} else { } else {
ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT); ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT);
dbuf = NULL; dbuf = NULL;
ZIL_STAT_BUMP(zil_itx_indirect_count); ZIL_STAT_BUMP(zilog, zil_itx_indirect_count);
ZIL_STAT_INCR(zil_itx_indirect_bytes, ZIL_STAT_INCR(zilog, zil_itx_indirect_bytes,
lrw->lr_length); lrw->lr_length);
} }
@ -2611,7 +2699,7 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
goto out; goto out;
} }
ZIL_STAT_BUMP(zil_commit_writer_count); ZIL_STAT_BUMP(zilog, zil_commit_writer_count);
zil_get_commit_list(zilog); zil_get_commit_list(zilog);
zil_prune_commit_list(zilog); zil_prune_commit_list(zilog);
@ -3088,7 +3176,7 @@ zil_commit(zilog_t *zilog, uint64_t foid)
void void
zil_commit_impl(zilog_t *zilog, uint64_t foid) zil_commit_impl(zilog_t *zilog, uint64_t foid)
{ {
ZIL_STAT_BUMP(zil_commit_count); ZIL_STAT_BUMP(zilog, zil_commit_count);
/* /*
* Move the "async" itxs for the specified foid to the "sync" * Move the "async" itxs for the specified foid to the "sync"
@ -3271,13 +3359,16 @@ zil_init(void)
zil_zcw_cache = kmem_cache_create("zil_zcw_cache", zil_zcw_cache = kmem_cache_create("zil_zcw_cache",
sizeof (zil_commit_waiter_t), 0, NULL, NULL, NULL, NULL, NULL, 0); sizeof (zil_commit_waiter_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
zil_ksp = kstat_create("zfs", 0, "zil", "misc", zil_sums_init(&zil_sums_global);
zil_kstats_global = kstat_create("zfs", 0, "zil", "misc",
KSTAT_TYPE_NAMED, sizeof (zil_stats) / sizeof (kstat_named_t), KSTAT_TYPE_NAMED, sizeof (zil_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL); KSTAT_FLAG_VIRTUAL);
if (zil_ksp != NULL) { if (zil_kstats_global != NULL) {
zil_ksp->ks_data = &zil_stats; zil_kstats_global->ks_data = &zil_stats;
kstat_install(zil_ksp); zil_kstats_global->ks_update = zil_kstats_global_update;
zil_kstats_global->ks_private = NULL;
kstat_install(zil_kstats_global);
} }
} }
@ -3287,10 +3378,12 @@ zil_fini(void)
kmem_cache_destroy(zil_zcw_cache); kmem_cache_destroy(zil_zcw_cache);
kmem_cache_destroy(zil_lwb_cache); kmem_cache_destroy(zil_lwb_cache);
if (zil_ksp != NULL) { if (zil_kstats_global != NULL) {
kstat_delete(zil_ksp); kstat_delete(zil_kstats_global);
zil_ksp = NULL; zil_kstats_global = NULL;
} }
zil_sums_fini(&zil_sums_global);
} }
void void
@ -3388,7 +3481,7 @@ zil_free(zilog_t *zilog)
* Open an intent log. * Open an intent log.
*/ */
zilog_t * zilog_t *
zil_open(objset_t *os, zil_get_data_t *get_data) zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums)
{ {
zilog_t *zilog = dmu_objset_zil(os); zilog_t *zilog = dmu_objset_zil(os);
@ -3397,6 +3490,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT(list_is_empty(&zilog->zl_lwb_list));
zilog->zl_get_data = get_data; zilog->zl_get_data = get_data;
zilog->zl_sums = zil_sums;
return (zilog); return (zilog);
} }
@ -3838,6 +3932,9 @@ EXPORT_SYMBOL(zil_lwb_add_block);
EXPORT_SYMBOL(zil_bp_tree_add); EXPORT_SYMBOL(zil_bp_tree_add);
EXPORT_SYMBOL(zil_set_sync); EXPORT_SYMBOL(zil_set_sync);
EXPORT_SYMBOL(zil_set_logbias); EXPORT_SYMBOL(zil_set_logbias);
EXPORT_SYMBOL(zil_sums_init);
EXPORT_SYMBOL(zil_sums_fini);
EXPORT_SYMBOL(zil_kstat_values_update);
ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, INT, ZMOD_RW,
"ZIL block open timeout percentage"); "ZIL block open timeout percentage");