ddt: lookup and log stats

Adds per-DDT stats counting lookups and where they were serviced from
(either log or backing zap), number of log entries in memory, and flow
rates.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15895
This commit is contained in:
Rob Norris 2023-09-25 11:02:46 +10:00 committed by Brian Behlendorf
parent a1902f4950
commit 0d2707815d
2 changed files with 159 additions and 6 deletions

View File

@ -296,6 +296,8 @@ typedef struct {
uint64_t ddt_flush_force_txg; /* flush hard before this txg */ uint64_t ddt_flush_force_txg; /* flush hard before this txg */
kstat_t *ddt_ksp; /* kstats context */
enum zio_checksum ddt_checksum; /* checksum algorithm in use */ enum zio_checksum ddt_checksum; /* checksum algorithm in use */
spa_t *ddt_spa; /* pool this ddt is on */ spa_t *ddt_spa; /* pool this ddt is on */
objset_t *ddt_os; /* ddt objset (always MOS) */ objset_t *ddt_os; /* ddt objset (always MOS) */

View File

@ -271,6 +271,78 @@ static const uint64_t ddt_version_flags[] = {
/* Dummy version to signal that configure is still necessary */ /* Dummy version to signal that configure is still necessary */
#define DDT_VERSION_UNCONFIGURED (UINT64_MAX) #define DDT_VERSION_UNCONFIGURED (UINT64_MAX)
#ifdef _KERNEL
/* per-DDT kstats */
typedef struct {
/* total lookups and whether they returned new or existing entries */
kstat_named_t dds_lookup;
kstat_named_t dds_lookup_new;
kstat_named_t dds_lookup_existing;
/* entries found on live tree, and if we had to wait for load */
kstat_named_t dds_lookup_live_hit;
kstat_named_t dds_lookup_live_wait;
kstat_named_t dds_lookup_live_miss;
/* entries found on log trees */
kstat_named_t dds_lookup_log_hit;
kstat_named_t dds_lookup_log_active_hit;
kstat_named_t dds_lookup_log_flushing_hit;
kstat_named_t dds_lookup_log_miss;
/* entries found on store objects */
kstat_named_t dds_lookup_stored_hit;
kstat_named_t dds_lookup_stored_miss;
/* number of entries on log trees */
kstat_named_t dds_log_active_entries;
kstat_named_t dds_log_flushing_entries;
/* avg updated/flushed entries per txg */
kstat_named_t dds_log_ingest_rate;
kstat_named_t dds_log_flush_rate;
kstat_named_t dds_log_flush_time_rate;
} ddt_kstats_t;
static const ddt_kstats_t ddt_kstats_template = {
{ "lookup", KSTAT_DATA_UINT64 },
{ "lookup_new", KSTAT_DATA_UINT64 },
{ "lookup_existing", KSTAT_DATA_UINT64 },
{ "lookup_live_hit", KSTAT_DATA_UINT64 },
{ "lookup_live_wait", KSTAT_DATA_UINT64 },
{ "lookup_live_miss", KSTAT_DATA_UINT64 },
{ "lookup_log_hit", KSTAT_DATA_UINT64 },
{ "lookup_log_active_hit", KSTAT_DATA_UINT64 },
{ "lookup_log_flushing_hit", KSTAT_DATA_UINT64 },
{ "lookup_log_miss", KSTAT_DATA_UINT64 },
{ "lookup_stored_hit", KSTAT_DATA_UINT64 },
{ "lookup_stored_miss", KSTAT_DATA_UINT64 },
{ "log_active_entries", KSTAT_DATA_UINT64 },
{ "log_flushing_entries", KSTAT_DATA_UINT64 },
{ "log_ingest_rate", KSTAT_DATA_UINT32 },
{ "log_flush_rate", KSTAT_DATA_UINT32 },
{ "log_flush_time_rate", KSTAT_DATA_UINT32 },
};
#define _DDT_KSTAT_STAT(ddt, stat) \
&((ddt_kstats_t *)(ddt)->ddt_ksp->ks_data)->stat.value.ui64
#define DDT_KSTAT_BUMP(ddt, stat) \
do { atomic_inc_64(_DDT_KSTAT_STAT(ddt, stat)); } while (0)
#define DDT_KSTAT_ADD(ddt, stat, val) \
do { atomic_add_64(_DDT_KSTAT_STAT(ddt, stat), val); } while (0)
#define DDT_KSTAT_SUB(ddt, stat, val) \
do { atomic_sub_64(_DDT_KSTAT_STAT(ddt, stat), val); } while (0)
#define DDT_KSTAT_SET(ddt, stat, val) \
do { atomic_store_64(_DDT_KSTAT_STAT(ddt, stat), val); } while (0)
#define DDT_KSTAT_ZERO(ddt, stat) DDT_KSTAT_SET(ddt, stat, 0)
#else
#define DDT_KSTAT_BUMP(ddt, stat) do {} while (0)
#define DDT_KSTAT_ADD(ddt, stat, val) do {} while (0)
#define DDT_KSTAT_SUB(ddt, stat, val) do {} while (0)
#define DDT_KSTAT_SET(ddt, stat, val) do {} while (0)
#define DDT_KSTAT_ZERO(ddt, stat) do {} while (0)
#endif /* _KERNEL */
static void static void
ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dmu_tx_t *tx) dmu_tx_t *tx)
@ -969,6 +1041,8 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED); ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED);
} }
DDT_KSTAT_BUMP(ddt, dds_lookup);
ddt_key_fill(&search, bp); ddt_key_fill(&search, bp);
/* Find an existing live entry */ /* Find an existing live entry */
@ -979,11 +1053,13 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
return (NULL); return (NULL);
/* If it's already loaded, we can just return it. */ /* If it's already loaded, we can just return it. */
DDT_KSTAT_BUMP(ddt, dds_lookup_live_hit);
if (dde->dde_flags & DDE_FLAG_LOADED) if (dde->dde_flags & DDE_FLAG_LOADED)
return (dde); return (dde);
/* Someone else is loading it, wait for it. */ /* Someone else is loading it, wait for it. */
dde->dde_waiters++; dde->dde_waiters++;
DDT_KSTAT_BUMP(ddt, dds_lookup_live_wait);
while (!(dde->dde_flags & DDE_FLAG_LOADED)) while (!(dde->dde_flags & DDE_FLAG_LOADED))
cv_wait(&dde->dde_cv, &ddt->ddt_lock); cv_wait(&dde->dde_cv, &ddt->ddt_lock);
dde->dde_waiters--; dde->dde_waiters--;
@ -997,8 +1073,10 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
return (NULL); return (NULL);
} }
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
return (dde); return (dde);
} } else
DDT_KSTAT_BUMP(ddt, dds_lookup_live_miss);
/* Time to make a new entry. */ /* Time to make a new entry. */
dde = ddt_alloc(ddt, &search); dde = ddt_alloc(ddt, &search);
@ -1012,11 +1090,19 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* If its in the log tree, we can "load" it from there */ /* If its in the log tree, we can "load" it from there */
if (ddt->ddt_flags & DDT_FLAG_LOG) { if (ddt->ddt_flags & DDT_FLAG_LOG) {
ddt_lightweight_entry_t ddlwe; ddt_lightweight_entry_t ddlwe;
boolean_t found = B_FALSE;
if (ddt_log_take_key(ddt, ddt->ddt_log_active, if (ddt_log_take_key(ddt, ddt->ddt_log_active,
&search, &ddlwe) ||
ddt_log_take_key(ddt, ddt->ddt_log_flushing,
&search, &ddlwe)) { &search, &ddlwe)) {
DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);
found = B_TRUE;
} else if (ddt_log_take_key(ddt, ddt->ddt_log_flushing,
&search, &ddlwe)) {
DDT_KSTAT_BUMP(ddt, dds_lookup_log_flushing_hit);
found = B_TRUE;
}
if (found) {
dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED; dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;
dde->dde_type = ddlwe.ddlwe_type; dde->dde_type = ddlwe.ddlwe_type;
@ -1024,8 +1110,13 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
memcpy(dde->dde_phys, &ddlwe.ddlwe_phys, memcpy(dde->dde_phys, &ddlwe.ddlwe_phys,
DDT_PHYS_SIZE(ddt)); DDT_PHYS_SIZE(ddt));
DDT_KSTAT_BUMP(ddt, dds_lookup_log_hit);
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
return (dde); return (dde);
} }
DDT_KSTAT_BUMP(ddt, dds_lookup_log_miss);
} }
/* /*
@ -1069,6 +1160,9 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Flag cleanup required */ /* Flag cleanup required */
dde->dde_flags |= DDE_FLAG_OVERQUOTA; dde->dde_flags |= DDE_FLAG_OVERQUOTA;
} else if (error == 0) { } else if (error == 0) {
DDT_KSTAT_BUMP(ddt, dds_lookup_stored_hit);
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
/* /*
* The histograms only track inactive (stored or logged) blocks. * The histograms only track inactive (stored or logged) blocks.
* We've just put an entry onto the live list, so we need to * We've just put an entry onto the live list, so we need to
@ -1085,6 +1179,9 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
ddt_lightweight_entry_t ddlwe; ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe); DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
ddt_histogram_sub_entry(ddt, ddh, &ddlwe); ddt_histogram_sub_entry(ddt, ddh, &ddlwe);
} else {
DDT_KSTAT_BUMP(ddt, dds_lookup_stored_miss);
DDT_KSTAT_BUMP(ddt, dds_lookup_new);
} }
/* Entry loaded, everyone can proceed now */ /* Entry loaded, everyone can proceed now */
@ -1317,6 +1414,30 @@ not_found:
return (0); return (0);
} }
static void
ddt_table_alloc_kstats(ddt_t *ddt)
{
#ifdef _KERNEL
char *mod = kmem_asprintf("zfs/%s", spa_name(ddt->ddt_spa));
char *name = kmem_asprintf("ddt_stats_%s",
zio_checksum_table[ddt->ddt_checksum].ci_name);
ddt->ddt_ksp = kstat_create(mod, 0, name, "misc", KSTAT_TYPE_NAMED,
sizeof (ddt_kstats_t) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
if (ddt->ddt_ksp != NULL) {
ddt_kstats_t *dds = kmem_alloc(sizeof (ddt_kstats_t), KM_SLEEP);
memcpy(dds, &ddt_kstats_template, sizeof (ddt_kstats_t));
ddt->ddt_ksp->ks_data = dds;
kstat_install(ddt->ddt_ksp);
}
kmem_strfree(name);
kmem_strfree(mod);
#else
(void) ddt;
#endif /* _KERNEL */
}
static ddt_t * static ddt_t *
ddt_table_alloc(spa_t *spa, enum zio_checksum c) ddt_table_alloc(spa_t *spa, enum zio_checksum c)
{ {
@ -1336,6 +1457,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
ddt->ddt_version = DDT_VERSION_UNCONFIGURED; ddt->ddt_version = DDT_VERSION_UNCONFIGURED;
ddt_log_alloc(ddt); ddt_log_alloc(ddt);
ddt_table_alloc_kstats(ddt);
return (ddt); return (ddt);
} }
@ -1343,6 +1465,14 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
static void static void
ddt_table_free(ddt_t *ddt) ddt_table_free(ddt_t *ddt)
{ {
#ifdef _KERNEL
if (ddt->ddt_ksp != NULL) {
kmem_free(ddt->ddt_ksp->ks_data, sizeof (ddt_kstats_t));
ddt->ddt_ksp->ks_data = NULL;
kstat_delete(ddt->ddt_ksp);
}
#endif /* _KERNEL */
ddt_log_free(ddt); ddt_log_free(ddt);
ASSERT0(avl_numnodes(&ddt->ddt_tree)); ASSERT0(avl_numnodes(&ddt->ddt_tree));
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree)); ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
@ -1400,6 +1530,11 @@ ddt_load(spa_t *spa)
if (error != 0 && error != ENOENT) if (error != 0 && error != ENOENT)
return (error); return (error);
DDT_KSTAT_SET(ddt, dds_log_active_entries,
avl_numnodes(&ddt->ddt_log_active->ddl_tree));
DDT_KSTAT_SET(ddt, dds_log_flushing_entries,
avl_numnodes(&ddt->ddt_log_flushing->ddl_tree));
/* /*
* Seed the cached histograms. * Seed the cached histograms.
*/ */
@ -1860,12 +1995,15 @@ ddt_sync_flush_log_incremental(ddt_t *ddt, dmu_tx_t *tx)
if (avl_is_empty(&ddt->ddt_log_flushing->ddl_tree)) { if (avl_is_empty(&ddt->ddt_log_flushing->ddl_tree)) {
/* We emptied it, so truncate on-disk */ /* We emptied it, so truncate on-disk */
DDT_KSTAT_ZERO(ddt, dds_log_flushing_entries);
ddt_log_truncate(ddt, tx); ddt_log_truncate(ddt, tx);
/* No more passes needed this txg */ /* No more passes needed this txg */
ddt->ddt_flush_pass = 0; ddt->ddt_flush_pass = 0;
} else } else {
/* More to do next time, save checkpoint */ /* More to do next time, save checkpoint */
DDT_KSTAT_SUB(ddt, dds_log_flushing_entries, count);
ddt_log_checkpoint(ddt, &ddlwe, tx); ddt_log_checkpoint(ddt, &ddlwe, tx);
}
ddt_sync_update_stats(ddt, tx); ddt_sync_update_stats(ddt, tx);
@ -1928,7 +2066,11 @@ ddt_sync_flush_log(ddt_t *ddt, dmu_tx_t *tx)
* No more to flush, and the active list has stuff, so * No more to flush, and the active list has stuff, so
* try to swap the logs for next time. * try to swap the logs for next time.
*/ */
(void) ddt_log_swap(ddt, tx); if (ddt_log_swap(ddt, tx)) {
DDT_KSTAT_ZERO(ddt, dds_log_active_entries);
DDT_KSTAT_SET(ddt, dds_log_flushing_entries,
avl_numnodes(&ddt->ddt_log_flushing->ddl_tree));
}
} }
/* If force flush is no longer necessary, turn it off. */ /* If force flush is no longer necessary, turn it off. */
@ -1941,6 +2083,7 @@ ddt_sync_flush_log(ddt_t *ddt, dmu_tx_t *tx)
ddt->ddt_log_flush_rate = _ewma( ddt->ddt_log_flush_rate = _ewma(
ddt->ddt_flush_count, ddt->ddt_log_flush_rate, ddt->ddt_flush_count, ddt->ddt_log_flush_rate,
zfs_dedup_log_flush_flow_rate_txgs); zfs_dedup_log_flush_flow_rate_txgs);
DDT_KSTAT_SET(ddt, dds_log_flush_rate, ddt->ddt_log_flush_rate);
/* /*
* Update flush time rate. This is an exponential weighted moving * Update flush time rate. This is an exponential weighted moving
@ -1950,6 +2093,8 @@ ddt_sync_flush_log(ddt_t *ddt, dmu_tx_t *tx)
ddt->ddt_log_flush_time_rate, ddt->ddt_log_flush_time_rate,
((int32_t)(NSEC2MSEC(gethrtime() - ddt->ddt_flush_start))), ((int32_t)(NSEC2MSEC(gethrtime() - ddt->ddt_flush_start))),
zfs_dedup_log_flush_flow_rate_txgs); zfs_dedup_log_flush_flow_rate_txgs);
DDT_KSTAT_SET(ddt, dds_log_flush_time_rate,
ddt->ddt_log_flush_time_rate);
} }
static void static void
@ -1975,6 +2120,9 @@ ddt_sync_table_log(ddt_t *ddt, dmu_tx_t *tx)
ddt_log_commit(ddt, &dlu); ddt_log_commit(ddt, &dlu);
DDT_KSTAT_SET(ddt, dds_log_active_entries,
avl_numnodes(&ddt->ddt_log_active->ddl_tree));
/* /*
* Sync the stats for the store objects. Even though we haven't * Sync the stats for the store objects. Even though we haven't
* modified anything on those objects, they're no longer the * modified anything on those objects, they're no longer the
@ -1996,7 +2144,7 @@ ddt_sync_table_log(ddt_t *ddt, dmu_tx_t *tx)
ddt->ddt_spa->spa_dedup_dsize = ~0ULL; ddt->ddt_spa->spa_dedup_dsize = ~0ULL;
} }
if (spa_sync_pass(ddt->ddt_spa) == 1) if (spa_sync_pass(ddt->ddt_spa) == 1) {
/* /*
* Update ingest rate. This is an exponential weighted moving * Update ingest rate. This is an exponential weighted moving
* average of the number of entries changed over recent txgs. * average of the number of entries changed over recent txgs.
@ -2006,6 +2154,9 @@ ddt_sync_table_log(ddt_t *ddt, dmu_tx_t *tx)
ddt->ddt_log_ingest_rate = _ewma( ddt->ddt_log_ingest_rate = _ewma(
count, ddt->ddt_log_ingest_rate, count, ddt->ddt_log_ingest_rate,
zfs_dedup_log_flush_flow_rate_txgs); zfs_dedup_log_flush_flow_rate_txgs);
DDT_KSTAT_SET(ddt, dds_log_ingest_rate,
ddt->ddt_log_ingest_rate);
}
} }
static void static void