Add DDT prune command

Requires the new 'flat' physical data which has the start
time for a class entry.

The amount to prune can be based on a target percentage of
the unique entries or based on the age (i.e., every entry
older than N days).

Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@klarasystems.com>
Closes #16277
This commit is contained in:
Don Brady
2024-06-17 22:35:18 +00:00
committed by Brian Behlendorf
parent 4a4f7b019f
commit d4d79451cb
21 changed files with 905 additions and 85 deletions
+3
View File
@@ -305,6 +305,9 @@ _LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);
_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);
_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
uint64_t);
_LIBZFS_H int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
_LIBZFS_H int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
+3
View File
@@ -161,6 +161,9 @@ _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);
_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
_LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t,
uint64_t);
#ifdef __cplusplus
}
#endif
+3
View File
@@ -405,6 +405,9 @@ extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,
extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
extern int ddt_prune_unique_entries(spa_t *spa, zpool_ddt_prune_unit_t unit,
uint64_t amount);
#ifdef __cplusplus
}
#endif
+48 -4
View File
@@ -35,8 +35,11 @@ extern "C" {
#endif
/* DDT version numbers */
#define DDT_VERSION_LEGACY (0)
#define DDT_VERSION_FDT (1)
#define DDT_VERSION_LEGACY (0)
#define DDT_VERSION_FDT (1)
/* Dummy version to signal that configure is still necessary */
#define DDT_VERSION_UNCONFIGURED (UINT64_MAX)
/* Names of interesting objects in the DDT root dir */
#define DDT_DIR_VERSION "version"
@@ -187,8 +190,11 @@ extern void ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu);
extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_take_key(ddt_t *ddt, ddt_log_t *ddl,
const ddt_key_t *ddk, ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
const ddt_key_t *ddk);
extern void ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
dmu_tx_t *tx);
@@ -211,6 +217,44 @@ extern void ddt_log_fini(void);
* them up.
*/
/*
* We use a histogram to convert a percentage request into a
* cutoff value where entries older than the cutoff get pruned.
*
* The histogram bins represent hours in power-of-two increments.
* 16 bins covers up to four years.
*/
#define HIST_BINS 16
typedef struct ddt_age_histo {
uint64_t dah_entries;
uint64_t dah_age_histo[HIST_BINS];
} ddt_age_histo_t;
void ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram);
#if defined(_KERNEL) || !defined(ZFS_DEBUG)
#define ddt_dump_age_histogram(histo, cutoff) ((void)0)
#else
static inline void
ddt_dump_age_histogram(ddt_age_histo_t *histogram, uint64_t cutoff)
{
if (histogram->dah_entries == 0)
return;
(void) printf("DDT prune unique class age, %llu hour cutoff\n",
(u_longlong_t)(gethrestime_sec() - cutoff)/3600);
(void) printf("%5s %9s %4s\n", "age", "blocks", "amnt");
(void) printf("%5s %9s %4s\n", "-----", "---------", "----");
for (int i = 0; i < HIST_BINS; i++) {
(void) printf("%5d %9llu %4d%%\n", 1<<i,
(u_longlong_t)histogram->dah_age_histo[i],
(int)((histogram->dah_age_histo[i] * 100) /
histogram->dah_entries));
}
}
#endif
/*
* Enough room to expand DMU_POOL_DDT format for all possible DDT
* checksum/class/type combinations.
+14 -1
View File
@@ -1422,7 +1422,7 @@ typedef enum {
*/
typedef enum zfs_ioc {
/*
* Core features - 88/128 numbers reserved.
* Core features - 89/128 numbers reserved.
*/
#ifdef __FreeBSD__
ZFS_IOC_FIRST = 0,
@@ -1519,6 +1519,7 @@ typedef enum zfs_ioc {
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
ZFS_IOC_DDT_PRUNE, /* 0x5a59 */
/*
* Per-platform (Optional) - 8/128 numbers reserved.
@@ -1655,6 +1656,12 @@ typedef enum {
ZPOOL_PREFETCH_DDT
} zpool_prefetch_type_t;
typedef enum {
ZPOOL_DDT_PRUNE_NONE,
ZPOOL_DDT_PRUNE_AGE, /* in seconds */
ZPOOL_DDT_PRUNE_PERCENTAGE, /* 1 - 100 */
} zpool_ddt_prune_unit_t;
/*
* Bookmark name values.
*/
@@ -1753,6 +1760,12 @@ typedef enum {
*/
#define ZPOOL_PREFETCH_TYPE "prefetch_type"
/*
* The following are names used when invoking ZFS_IOC_DDT_PRUNE.
*/
#define DDT_PRUNE_UNIT "ddt_prune_unit"
#define DDT_PRUNE_AMOUNT "ddt_prune_amount"
/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
+1
View File
@@ -412,6 +412,7 @@ struct spa {
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
boolean_t spa_active_ddt_prune; /* ddt prune process active */
struct brt *spa_brt; /* in-core BRT */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */