ddt: add support for prefetching tables into the ARC

This change adds a new `zpool prefetch -t ddt $pool` command which
causes a pool's DDT to be loaded into the ARC. The primary goal is to
remove the need to "warm" a pool's cache before deduplication stops
slowing write performance. It may also provide a way to reload portions
of a DDT if they have been flushed due to inactivity.

Sponsored-by: iXsystems, Inc.
Sponsored-by: Catalogics, Inc.
Sponsored-by: Klara, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Will Andrews <will.andrews@klarasystems.com>
Signed-off-by: Fred Weigel <fred.weigel@klarasystems.com>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Signed-off-by: Don Brady <don.brady@klarasystems.com>
Co-authored-by: Will Andrews <will.andrews@klarasystems.com>
Co-authored-by: Don Brady <don.brady@klarasystems.com>
Closes #15890
This commit is contained in:
Allan Jude
2024-07-26 12:16:18 -04:00
committed by GitHub
parent 2ed1aebaf6
commit 62e7d3c89e
37 changed files with 1067 additions and 52 deletions
+104 -8
View File
@@ -32,7 +32,7 @@
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
* Copyright (c) 2021, Klara Inc.
* Copyright (c) 2021, 2023, Klara Inc.
* Copyright [2021] Hewlett Packard Enterprise Development LP
*/
@@ -90,6 +90,7 @@ static int zpool_do_remove(int, char **);
static int zpool_do_labelclear(int, char **);
static int zpool_do_checkpoint(int, char **);
static int zpool_do_prefetch(int, char **);
static int zpool_do_list(int, char **);
static int zpool_do_iostat(int, char **);
@@ -176,6 +177,7 @@ typedef enum {
HELP_LIST,
HELP_OFFLINE,
HELP_ONLINE,
HELP_PREFETCH,
HELP_REPLACE,
HELP_REMOVE,
HELP_INITIALIZE,
@@ -307,6 +309,7 @@ static zpool_command_t command_table[] = {
{ "labelclear", zpool_do_labelclear, HELP_LABELCLEAR },
{ NULL },
{ "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT },
{ "prefetch", zpool_do_prefetch, HELP_PREFETCH },
{ NULL },
{ "list", zpool_do_list, HELP_LIST },
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
@@ -398,6 +401,9 @@ get_usage(zpool_help_t idx)
return (gettext("\tlist [-gHLpPv] [-o property[,...]] "
"[-T d|u] [pool] ... \n"
"\t [interval [count]]\n"));
case HELP_PREFETCH:
return (gettext("\tprefetch -t <type> [<type opts>] <pool>\n"
"\t -t ddt <pool>\n"));
case HELP_OFFLINE:
return (gettext("\toffline [--power]|[[-f][-t]] <pool> "
"<device> ...\n"));
@@ -3827,6 +3833,72 @@ zpool_do_checkpoint(int argc, char **argv)
#define CHECKPOINT_OPT 1024
/*
* zpool prefetch <type> [<type opts>] <pool>
*
* Prefetchs a particular type of data in the specified pool.
*/
int
zpool_do_prefetch(int argc, char **argv)
{
int c;
char *poolname;
char *typestr = NULL;
zpool_prefetch_type_t type;
zpool_handle_t *zhp;
int err = 0;
while ((c = getopt(argc, argv, "t:")) != -1) {
switch (c) {
case 't':
typestr = optarg;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
poolname = argv[0];
argc--;
argv++;
if (strcmp(typestr, "ddt") == 0) {
type = ZPOOL_PREFETCH_DDT;
} else {
(void) fprintf(stderr, gettext("unsupported prefetch type\n"));
usage(B_FALSE);
}
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
err = zpool_prefetch(zhp, type);
zpool_close(zhp);
return (err);
}
/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
@@ -6446,6 +6518,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str,
case ZPOOL_PROP_EXPANDSZ:
case ZPOOL_PROP_CHECKPOINT:
case ZPOOL_PROP_DEDUPRATIO:
case ZPOOL_PROP_DEDUPCACHED:
if (value == 0)
(void) strlcpy(propval, "-", sizeof (propval));
else
@@ -8792,13 +8865,17 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache,
}
static void
print_dedup_stats(nvlist_t *config)
print_dedup_stats(zpool_handle_t *zhp, nvlist_t *config, boolean_t literal)
{
ddt_histogram_t *ddh;
ddt_stat_t *dds;
ddt_object_t *ddo;
uint_t c;
char dspace[6], mspace[6];
/* Extra space provided for literal display */
char dspace[32], mspace[32], cspace[32];
uint64_t cspace_prop;
enum zfs_nicenum_format format;
zprop_source_t src;
/*
* If the pool was faulted then we may not have been able to
@@ -8816,12 +8893,26 @@ print_dedup_stats(nvlist_t *config)
return;
}
zfs_nicebytes(ddo->ddo_dspace, dspace, sizeof (dspace));
zfs_nicebytes(ddo->ddo_mspace, mspace, sizeof (mspace));
(void) printf("DDT entries %llu, size %s on disk, %s in core\n",
/*
* Squash cached size into in-core size to handle race.
* Only include cached size if it is available.
*/
cspace_prop = zpool_get_prop_int(zhp, ZPOOL_PROP_DEDUPCACHED, &src);
cspace_prop = MIN(cspace_prop, ddo->ddo_mspace);
format = literal ? ZFS_NICENUM_RAW : ZFS_NICENUM_1024;
zfs_nicenum_format(cspace_prop, cspace, sizeof (cspace), format);
zfs_nicenum_format(ddo->ddo_dspace, dspace, sizeof (dspace), format);
zfs_nicenum_format(ddo->ddo_mspace, mspace, sizeof (mspace), format);
(void) printf("DDT entries %llu, size %s on disk, %s in core",
(u_longlong_t)ddo->ddo_count,
dspace,
mspace);
if (src != ZPROP_SRC_DEFAULT) {
(void) printf(", %s cached (%.02f%%)",
cspace,
(double)cspace_prop / (double)ddo->ddo_mspace * 100.0);
}
(void) printf("\n");
verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
(uint64_t **)&dds, &c) == 0);
@@ -8857,6 +8948,10 @@ status_callback(zpool_handle_t *zhp, void *data)
uint_t c;
vdev_stat_t *vs;
/* If dedup stats were requested, also fetch dedupcached. */
if (cbp->cb_dedup_stats > 1)
zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME);
config = zpool_get_config(zhp, NULL);
reason = zpool_get_status(zhp, &msgid, &errata);
@@ -9338,7 +9433,7 @@ status_callback(zpool_handle_t *zhp, void *data)
}
if (cbp->cb_dedup_stats)
print_dedup_stats(config);
print_dedup_stats(zhp, config, cbp->cb_literal);
} else {
(void) printf(gettext("config: The configuration cannot be "
"determined.\n"));
@@ -9412,7 +9507,8 @@ zpool_do_status(int argc, char **argv)
cmd = optarg;
break;
case 'D':
cb.cb_dedup_stats = B_TRUE;
if (++cb.cb_dedup_stats > 2)
cb.cb_dedup_stats = 2;
break;
case 'e':
cb.cb_print_unhealthy = B_TRUE;