mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Add DDT prune command
Requires the new 'flat' physical data which has the start time for a class entry. The amount to prune can be based on a target percentage of the unique entries or based on the age (i.e., every entry older than N days). Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #16277
This commit is contained in:
committed by
Brian Behlendorf
parent
4a4f7b019f
commit
d4d79451cb
+45
-10
@@ -2045,7 +2045,7 @@ dump_all_ddts(spa_t *spa)
|
||||
|
||||
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (!ddt)
|
||||
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
|
||||
continue;
|
||||
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (ddt_class_t class = 0; class < DDT_CLASSES;
|
||||
@@ -2072,6 +2072,32 @@ dump_all_ddts(spa_t *spa)
|
||||
}
|
||||
|
||||
dump_dedup_ratio(&dds_total);
|
||||
|
||||
/*
|
||||
* Dump a histogram of unique class entry age
|
||||
*/
|
||||
if (dump_opt['D'] == 3 && getenv("ZDB_DDT_UNIQUE_AGE_HIST") != NULL) {
|
||||
ddt_age_histo_t histogram;
|
||||
|
||||
(void) printf("DDT walk unique, building age histogram...\n");
|
||||
ddt_prune_walk(spa, 0, &histogram);
|
||||
|
||||
/*
|
||||
* print out histogram for unique entry class birth
|
||||
*/
|
||||
if (histogram.dah_entries > 0) {
|
||||
(void) printf("%5s %9s %4s\n",
|
||||
"age", "blocks", "amnt");
|
||||
(void) printf("%5s %9s %4s\n",
|
||||
"-----", "---------", "----");
|
||||
for (int i = 0; i < HIST_BINS; i++) {
|
||||
(void) printf("%5d %9d %4d%%\n", 1 << i,
|
||||
(int)histogram.dah_age_histo[i],
|
||||
(int)((histogram.dah_age_histo[i] * 100) /
|
||||
histogram.dah_entries));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -5749,12 +5775,17 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
ddt_entry_t *dde = ddt_lookup(ddt, bp);
|
||||
|
||||
/*
|
||||
* ddt_lookup() can only return NULL if this block didn't exist
|
||||
* ddt_lookup() can return NULL if this block didn't exist
|
||||
* in the DDT and creating it would take the DDT over its
|
||||
* quota. Since we got the block from disk, it must exist in
|
||||
* the DDT, so this can't happen.
|
||||
* the DDT, so this can't happen. However, when unique entries
|
||||
* are pruned, the dedup bit can be set with no corresponding
|
||||
* entry in the DDT.
|
||||
*/
|
||||
VERIFY3P(dde, !=, NULL);
|
||||
if (dde == NULL) {
|
||||
ddt_exit(ddt);
|
||||
goto skipped;
|
||||
}
|
||||
|
||||
/* Get the phys for this variant */
|
||||
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
|
||||
@@ -5774,8 +5805,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
(void *)(((uintptr_t)dde->dde_io) | (1 << v));
|
||||
|
||||
/* Consume a reference for this block. */
|
||||
VERIFY3U(ddt_phys_total_refcnt(ddt, dde->dde_phys), >, 0);
|
||||
ddt_phys_decref(dde->dde_phys, v);
|
||||
if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
|
||||
ddt_phys_decref(dde->dde_phys, v);
|
||||
|
||||
/*
|
||||
* If this entry has a single flat phys, it may have been
|
||||
@@ -5864,6 +5895,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
}
|
||||
}
|
||||
|
||||
skipped:
|
||||
for (i = 0; i < 4; i++) {
|
||||
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
|
||||
int t = (i & 1) ? type : ZDB_OT_TOTAL;
|
||||
@@ -8138,7 +8170,7 @@ dump_mos_leaks(spa_t *spa)
|
||||
|
||||
for (uint64_t c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (!ddt)
|
||||
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
|
||||
continue;
|
||||
|
||||
/* DDT store objects */
|
||||
@@ -8150,11 +8182,14 @@ dump_mos_leaks(spa_t *spa)
|
||||
}
|
||||
|
||||
/* FDT container */
|
||||
mos_obj_refd(ddt->ddt_dir_object);
|
||||
if (ddt->ddt_version == DDT_VERSION_FDT)
|
||||
mos_obj_refd(ddt->ddt_dir_object);
|
||||
|
||||
/* FDT log objects */
|
||||
mos_obj_refd(ddt->ddt_log[0].ddl_object);
|
||||
mos_obj_refd(ddt->ddt_log[1].ddl_object);
|
||||
if (ddt->ddt_flags & DDT_FLAG_LOG) {
|
||||
mos_obj_refd(ddt->ddt_log[0].ddl_object);
|
||||
mos_obj_refd(ddt->ddt_log[1].ddl_object);
|
||||
}
|
||||
}
|
||||
|
||||
if (spa->spa_brt != NULL) {
|
||||
|
||||
@@ -130,6 +130,8 @@ static int zpool_do_version(int, char **);
|
||||
|
||||
static int zpool_do_wait(int, char **);
|
||||
|
||||
static int zpool_do_ddt_prune(int, char **);
|
||||
|
||||
static int zpool_do_help(int argc, char **argv);
|
||||
|
||||
static zpool_compat_status_t zpool_do_load_compat(
|
||||
@@ -170,6 +172,7 @@ typedef enum {
|
||||
HELP_CLEAR,
|
||||
HELP_CREATE,
|
||||
HELP_CHECKPOINT,
|
||||
HELP_DDT_PRUNE,
|
||||
HELP_DESTROY,
|
||||
HELP_DETACH,
|
||||
HELP_EXPORT,
|
||||
@@ -426,6 +429,8 @@ static zpool_command_t command_table[] = {
|
||||
{ "sync", zpool_do_sync, HELP_SYNC },
|
||||
{ NULL },
|
||||
{ "wait", zpool_do_wait, HELP_WAIT },
|
||||
{ NULL },
|
||||
{ "ddtprune", zpool_do_ddt_prune, HELP_DDT_PRUNE },
|
||||
};
|
||||
|
||||
#define NCOMMAND (ARRAY_SIZE(command_table))
|
||||
@@ -545,6 +550,8 @@ get_usage(zpool_help_t idx)
|
||||
case HELP_WAIT:
|
||||
return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
|
||||
"<pool> [interval]\n"));
|
||||
case HELP_DDT_PRUNE:
|
||||
return (gettext("\tddtprune -d|-p <amount> <pool>\n"));
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@@ -13342,6 +13349,88 @@ found:;
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool ddtprune -d|-p <amount> <pool>
|
||||
*
|
||||
* -d <days> Prune entries <days> old and older
|
||||
* -p <percent> Prune <percent> amount of entries
|
||||
*
|
||||
* Prune single reference entries from DDT to satisfy the amount specified.
|
||||
*/
|
||||
int
|
||||
zpool_do_ddt_prune(int argc, char **argv)
|
||||
{
|
||||
zpool_ddt_prune_unit_t unit = ZPOOL_DDT_PRUNE_NONE;
|
||||
uint64_t amount = 0;
|
||||
zpool_handle_t *zhp;
|
||||
char *endptr;
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "d:p:")) != -1) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
|
||||
(void) fprintf(stderr, gettext("-d cannot be "
|
||||
"combined with -p option\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
errno = 0;
|
||||
amount = strtoull(optarg, &endptr, 0);
|
||||
if (errno != 0 || *endptr != '\0' || amount == 0) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("invalid days value\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
amount *= 86400; /* convert days to seconds */
|
||||
unit = ZPOOL_DDT_PRUNE_AGE;
|
||||
break;
|
||||
case 'p':
|
||||
if (unit == ZPOOL_DDT_PRUNE_AGE) {
|
||||
(void) fprintf(stderr, gettext("-p cannot be "
|
||||
"combined with -d option\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
errno = 0;
|
||||
amount = strtoull(optarg, &endptr, 0);
|
||||
if (errno != 0 || *endptr != '\0' ||
|
||||
amount == 0 || amount > 100) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("invalid percentage value\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
unit = ZPOOL_DDT_PRUNE_PERCENTAGE;
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (unit == ZPOOL_DDT_PRUNE_NONE) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("missing amount option (-d|-p <value>)\n"));
|
||||
usage(B_FALSE);
|
||||
} else if (argc < 1) {
|
||||
(void) fprintf(stderr, gettext("missing pool argument\n"));
|
||||
usage(B_FALSE);
|
||||
} else if (argc > 1) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
zhp = zpool_open(g_zfs, argv[0]);
|
||||
if (zhp == NULL)
|
||||
return (-1);
|
||||
|
||||
int error = zpool_ddt_prune(zhp, unit, amount);
|
||||
|
||||
zpool_close(zhp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
find_command_idx(const char *command, int *idx)
|
||||
{
|
||||
|
||||
+28
@@ -276,6 +276,8 @@ extern unsigned long zio_decompress_fail_fraction;
|
||||
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
|
||||
extern uint64_t raidz_expand_max_reflow_bytes;
|
||||
extern uint_t raidz_expand_pause_point;
|
||||
extern boolean_t ddt_prune_artificial_age;
|
||||
extern boolean_t ddt_dump_prune_histogram;
|
||||
|
||||
|
||||
static ztest_shared_opts_t *ztest_shared_opts;
|
||||
@@ -446,6 +448,7 @@ ztest_func_t ztest_fletcher;
|
||||
ztest_func_t ztest_fletcher_incr;
|
||||
ztest_func_t ztest_verify_dnode_bt;
|
||||
ztest_func_t ztest_pool_prefetch_ddt;
|
||||
ztest_func_t ztest_ddt_prune;
|
||||
|
||||
static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
|
||||
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
|
||||
@@ -502,6 +505,7 @@ static ztest_info_t ztest_info[] = {
|
||||
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
|
||||
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
|
||||
};
|
||||
|
||||
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
|
||||
@@ -7288,6 +7292,17 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
|
||||
mutex_exit(&ztest_vdev_lock);
|
||||
}
|
||||
|
||||
void
|
||||
ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
(void) zd, (void) id;
|
||||
|
||||
spa_t *spa = ztest_spa;
|
||||
uint64_t pct = ztest_random(15) + 1;
|
||||
|
||||
(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify pool integrity by running zdb.
|
||||
*/
|
||||
@@ -7469,6 +7484,13 @@ ztest_resume_thread(void *arg)
|
||||
{
|
||||
spa_t *spa = arg;
|
||||
|
||||
/*
|
||||
* Synthesize aged DDT entries for ddt prune testing
|
||||
*/
|
||||
ddt_prune_artificial_age = B_TRUE;
|
||||
if (ztest_opts.zo_verbose >= 3)
|
||||
ddt_dump_prune_histogram = B_TRUE;
|
||||
|
||||
while (!ztest_exiting) {
|
||||
if (spa_suspended(spa))
|
||||
ztest_resume(spa);
|
||||
@@ -8587,6 +8609,12 @@ ztest_init(ztest_shared_t *zs)
|
||||
if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* split 50/50 between legacy and fast dedup
|
||||
*/
|
||||
if (i == SPA_FEATURE_FAST_DEDUP && ztest_random(2) != 0)
|
||||
continue;
|
||||
|
||||
VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
|
||||
spa_feature_table[i].fi_uname));
|
||||
fnvlist_add_uint64(props, buf, 0);
|
||||
|
||||
Reference in New Issue
Block a user