mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
ddt: add support for prefetching tables into the ARC
This change adds a new `zpool prefetch -t ddt $pool` command which causes a pool's DDT to be loaded into the ARC. The primary goal is to remove the need to "warm" a pool's cache before deduplication stops slowing write performance. It may also provide a way to reload portions of a DDT if they have been flushed due to inactivity. Sponsored-by: iXsystems, Inc. Sponsored-by: Catalogics, Inc. Sponsored-by: Klara, Inc. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Will Andrews <will.andrews@klarasystems.com> Signed-off-by: Fred Weigel <fred.weigel@klarasystems.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Signed-off-by: Don Brady <don.brady@klarasystems.com> Co-authored-by: Will Andrews <will.andrews@klarasystems.com> Co-authored-by: Don Brady <don.brady@klarasystems.com> Closes #15890
This commit is contained in:
+2
-2
@@ -1985,8 +1985,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
|
||||
(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
|
||||
name,
|
||||
(u_longlong_t)count,
|
||||
(u_longlong_t)(dspace / count),
|
||||
(u_longlong_t)(mspace / count));
|
||||
(u_longlong_t)dspace,
|
||||
(u_longlong_t)mspace);
|
||||
|
||||
if (dump_opt['D'] < 3)
|
||||
return;
|
||||
|
||||
+104
-8
@@ -32,7 +32,7 @@
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
|
||||
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
* Copyright (c) 2021, Klara Inc.
|
||||
* Copyright (c) 2021, 2023, Klara Inc.
|
||||
* Copyright [2021] Hewlett Packard Enterprise Development LP
|
||||
*/
|
||||
|
||||
@@ -90,6 +90,7 @@ static int zpool_do_remove(int, char **);
|
||||
static int zpool_do_labelclear(int, char **);
|
||||
|
||||
static int zpool_do_checkpoint(int, char **);
|
||||
static int zpool_do_prefetch(int, char **);
|
||||
|
||||
static int zpool_do_list(int, char **);
|
||||
static int zpool_do_iostat(int, char **);
|
||||
@@ -176,6 +177,7 @@ typedef enum {
|
||||
HELP_LIST,
|
||||
HELP_OFFLINE,
|
||||
HELP_ONLINE,
|
||||
HELP_PREFETCH,
|
||||
HELP_REPLACE,
|
||||
HELP_REMOVE,
|
||||
HELP_INITIALIZE,
|
||||
@@ -307,6 +309,7 @@ static zpool_command_t command_table[] = {
|
||||
{ "labelclear", zpool_do_labelclear, HELP_LABELCLEAR },
|
||||
{ NULL },
|
||||
{ "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT },
|
||||
{ "prefetch", zpool_do_prefetch, HELP_PREFETCH },
|
||||
{ NULL },
|
||||
{ "list", zpool_do_list, HELP_LIST },
|
||||
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
|
||||
@@ -398,6 +401,9 @@ get_usage(zpool_help_t idx)
|
||||
return (gettext("\tlist [-gHLpPv] [-o property[,...]] "
|
||||
"[-T d|u] [pool] ... \n"
|
||||
"\t [interval [count]]\n"));
|
||||
case HELP_PREFETCH:
|
||||
return (gettext("\tprefetch -t <type> [<type opts>] <pool>\n"
|
||||
"\t -t ddt <pool>\n"));
|
||||
case HELP_OFFLINE:
|
||||
return (gettext("\toffline [--power]|[[-f][-t]] <pool> "
|
||||
"<device> ...\n"));
|
||||
@@ -3827,6 +3833,72 @@ zpool_do_checkpoint(int argc, char **argv)
|
||||
|
||||
#define CHECKPOINT_OPT 1024
|
||||
|
||||
/*
|
||||
* zpool prefetch <type> [<type opts>] <pool>
|
||||
*
|
||||
* Prefetchs a particular type of data in the specified pool.
|
||||
*/
|
||||
int
|
||||
zpool_do_prefetch(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
char *poolname;
|
||||
char *typestr = NULL;
|
||||
zpool_prefetch_type_t type;
|
||||
zpool_handle_t *zhp;
|
||||
int err = 0;
|
||||
|
||||
while ((c = getopt(argc, argv, "t:")) != -1) {
|
||||
switch (c) {
|
||||
case 't':
|
||||
typestr = optarg;
|
||||
break;
|
||||
case ':':
|
||||
(void) fprintf(stderr, gettext("missing argument for "
|
||||
"'%c' option\n"), optopt);
|
||||
usage(B_FALSE);
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc < 1) {
|
||||
(void) fprintf(stderr, gettext("missing pool name argument\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
if (argc > 1) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
poolname = argv[0];
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (strcmp(typestr, "ddt") == 0) {
|
||||
type = ZPOOL_PREFETCH_DDT;
|
||||
} else {
|
||||
(void) fprintf(stderr, gettext("unsupported prefetch type\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
|
||||
return (1);
|
||||
|
||||
err = zpool_prefetch(zhp, type);
|
||||
|
||||
zpool_close(zhp);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool import [-d dir] [-D]
|
||||
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
|
||||
@@ -6446,6 +6518,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str,
|
||||
case ZPOOL_PROP_EXPANDSZ:
|
||||
case ZPOOL_PROP_CHECKPOINT:
|
||||
case ZPOOL_PROP_DEDUPRATIO:
|
||||
case ZPOOL_PROP_DEDUPCACHED:
|
||||
if (value == 0)
|
||||
(void) strlcpy(propval, "-", sizeof (propval));
|
||||
else
|
||||
@@ -8792,13 +8865,17 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache,
|
||||
}
|
||||
|
||||
static void
|
||||
print_dedup_stats(nvlist_t *config)
|
||||
print_dedup_stats(zpool_handle_t *zhp, nvlist_t *config, boolean_t literal)
|
||||
{
|
||||
ddt_histogram_t *ddh;
|
||||
ddt_stat_t *dds;
|
||||
ddt_object_t *ddo;
|
||||
uint_t c;
|
||||
char dspace[6], mspace[6];
|
||||
/* Extra space provided for literal display */
|
||||
char dspace[32], mspace[32], cspace[32];
|
||||
uint64_t cspace_prop;
|
||||
enum zfs_nicenum_format format;
|
||||
zprop_source_t src;
|
||||
|
||||
/*
|
||||
* If the pool was faulted then we may not have been able to
|
||||
@@ -8816,12 +8893,26 @@ print_dedup_stats(nvlist_t *config)
|
||||
return;
|
||||
}
|
||||
|
||||
zfs_nicebytes(ddo->ddo_dspace, dspace, sizeof (dspace));
|
||||
zfs_nicebytes(ddo->ddo_mspace, mspace, sizeof (mspace));
|
||||
(void) printf("DDT entries %llu, size %s on disk, %s in core\n",
|
||||
/*
|
||||
* Squash cached size into in-core size to handle race.
|
||||
* Only include cached size if it is available.
|
||||
*/
|
||||
cspace_prop = zpool_get_prop_int(zhp, ZPOOL_PROP_DEDUPCACHED, &src);
|
||||
cspace_prop = MIN(cspace_prop, ddo->ddo_mspace);
|
||||
format = literal ? ZFS_NICENUM_RAW : ZFS_NICENUM_1024;
|
||||
zfs_nicenum_format(cspace_prop, cspace, sizeof (cspace), format);
|
||||
zfs_nicenum_format(ddo->ddo_dspace, dspace, sizeof (dspace), format);
|
||||
zfs_nicenum_format(ddo->ddo_mspace, mspace, sizeof (mspace), format);
|
||||
(void) printf("DDT entries %llu, size %s on disk, %s in core",
|
||||
(u_longlong_t)ddo->ddo_count,
|
||||
dspace,
|
||||
mspace);
|
||||
if (src != ZPROP_SRC_DEFAULT) {
|
||||
(void) printf(", %s cached (%.02f%%)",
|
||||
cspace,
|
||||
(double)cspace_prop / (double)ddo->ddo_mspace * 100.0);
|
||||
}
|
||||
(void) printf("\n");
|
||||
|
||||
verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
|
||||
(uint64_t **)&dds, &c) == 0);
|
||||
@@ -8857,6 +8948,10 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
uint_t c;
|
||||
vdev_stat_t *vs;
|
||||
|
||||
/* If dedup stats were requested, also fetch dedupcached. */
|
||||
if (cbp->cb_dedup_stats > 1)
|
||||
zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME);
|
||||
|
||||
config = zpool_get_config(zhp, NULL);
|
||||
reason = zpool_get_status(zhp, &msgid, &errata);
|
||||
|
||||
@@ -9338,7 +9433,7 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
}
|
||||
|
||||
if (cbp->cb_dedup_stats)
|
||||
print_dedup_stats(config);
|
||||
print_dedup_stats(zhp, config, cbp->cb_literal);
|
||||
} else {
|
||||
(void) printf(gettext("config: The configuration cannot be "
|
||||
"determined.\n"));
|
||||
@@ -9412,7 +9507,8 @@ zpool_do_status(int argc, char **argv)
|
||||
cmd = optarg;
|
||||
break;
|
||||
case 'D':
|
||||
cb.cb_dedup_stats = B_TRUE;
|
||||
if (++cb.cb_dedup_stats > 2)
|
||||
cb.cb_dedup_stats = 2;
|
||||
break;
|
||||
case 'e':
|
||||
cb.cb_print_unhealthy = B_TRUE;
|
||||
|
||||
+18
@@ -26,6 +26,7 @@
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
* Copyright 2017 Joyent, Inc.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2023, Klara, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -444,6 +445,7 @@ ztest_func_t ztest_blake3;
|
||||
ztest_func_t ztest_fletcher;
|
||||
ztest_func_t ztest_fletcher_incr;
|
||||
ztest_func_t ztest_verify_dnode_bt;
|
||||
ztest_func_t ztest_pool_prefetch_ddt;
|
||||
|
||||
static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
|
||||
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
|
||||
@@ -499,6 +501,7 @@ static ztest_info_t ztest_info[] = {
|
||||
ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
|
||||
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
|
||||
};
|
||||
|
||||
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
|
||||
@@ -6993,6 +6996,21 @@ ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ztest_pool_prefetch_ddt(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
(void) zd, (void) id;
|
||||
spa_t *spa;
|
||||
|
||||
(void) pthread_rwlock_rdlock(&ztest_name_lock);
|
||||
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
|
||||
|
||||
ddt_prefetch_all(spa);
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
ztest_set_global_vars(void)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user