mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-01-14 01:02:04 +03:00
Add BRT support to zpool prefetch command
Implement BRT (Block Reference Table) prefetch functionality similar to existing DDT prefetch. This allows preloading BRT metadata into ARC to improve performance for block cloning operations and frees of earlier cloned blocks. Make -t parameter optional. When omitted, prefetch all supported metadata types (both DDT and BRT now). Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #17890
This commit is contained in:
parent
002bc3da6a
commit
41878d57ea
@ -494,8 +494,7 @@ get_usage(zpool_help_t idx)
|
||||
"[--json-int, --json-pool-key-guid]] ...\n"
|
||||
"\t [-T d|u] [pool] [interval [count]]\n"));
|
||||
case HELP_PREFETCH:
|
||||
return (gettext("\tprefetch -t <type> [<type opts>] <pool>\n"
|
||||
"\t -t ddt <pool>\n"));
|
||||
return (gettext("\tprefetch [-t <type>] <pool>\n"));
|
||||
case HELP_OFFLINE:
|
||||
return (gettext("\toffline [--power]|[[-f][-t]] <pool> "
|
||||
"<device> ...\n"));
|
||||
@ -4200,7 +4199,7 @@ zpool_do_checkpoint(int argc, char **argv)
|
||||
#define CHECKPOINT_OPT 1024
|
||||
|
||||
/*
|
||||
* zpool prefetch <type> [<type opts>] <pool>
|
||||
* zpool prefetch [-t <type>] <pool>
|
||||
*
|
||||
* Prefetchs a particular type of data in the specified pool.
|
||||
*/
|
||||
@ -4245,20 +4244,27 @@ zpool_do_prefetch(int argc, char **argv)
|
||||
|
||||
poolname = argv[0];
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (strcmp(typestr, "ddt") == 0) {
|
||||
type = ZPOOL_PREFETCH_DDT;
|
||||
} else {
|
||||
(void) fprintf(stderr, gettext("unsupported prefetch type\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
|
||||
return (1);
|
||||
|
||||
err = zpool_prefetch(zhp, type);
|
||||
if (typestr == NULL) {
|
||||
/* Prefetch all types */
|
||||
err = zpool_prefetch(zhp, ZPOOL_PREFETCH_DDT);
|
||||
if (err == 0)
|
||||
err = zpool_prefetch(zhp, ZPOOL_PREFETCH_BRT);
|
||||
} else {
|
||||
if (strcmp(typestr, "ddt") == 0) {
|
||||
type = ZPOOL_PREFETCH_DDT;
|
||||
} else if (strcmp(typestr, "brt") == 0) {
|
||||
type = ZPOOL_PREFETCH_BRT;
|
||||
} else {
|
||||
(void) fprintf(stderr,
|
||||
gettext("unsupported prefetch type\n"));
|
||||
zpool_close(zhp);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
err = zpool_prefetch(zhp, type);
|
||||
}
|
||||
|
||||
zpool_close(zhp);
|
||||
|
||||
|
||||
@ -56,6 +56,7 @@ extern void brt_create(spa_t *spa);
|
||||
extern int brt_load(spa_t *spa);
|
||||
extern void brt_unload(spa_t *spa);
|
||||
extern void brt_sync(spa_t *spa, uint64_t txg);
|
||||
extern void brt_prefetch_all(spa_t *spa);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@ -1713,7 +1713,8 @@ typedef enum {
|
||||
|
||||
typedef enum {
|
||||
ZPOOL_PREFETCH_NONE = 0,
|
||||
ZPOOL_PREFETCH_DDT
|
||||
ZPOOL_PREFETCH_DDT,
|
||||
ZPOOL_PREFETCH_BRT
|
||||
} zpool_prefetch_type_t;
|
||||
|
||||
typedef enum {
|
||||
|
||||
@ -1745,9 +1745,13 @@ zpool_prefetch(zpool_handle_t *zhp, zpool_prefetch_type_t type)
|
||||
|
||||
error = lzc_pool_prefetch(zhp->zpool_name, type);
|
||||
if (error != 0) {
|
||||
const char *typename = "unknown";
|
||||
if (type == ZPOOL_PREFETCH_DDT)
|
||||
typename = "ddt";
|
||||
else if (type == ZPOOL_PREFETCH_BRT)
|
||||
typename = "brt";
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
"cannot prefetch %s in '%s'"),
|
||||
type == ZPOOL_PREFETCH_DDT ? "ddt" : "", zhp->zpool_name);
|
||||
"cannot prefetch %s in '%s'"), typename, zhp->zpool_name);
|
||||
(void) zpool_standard_error(hdl, error, msg);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@ -28,20 +28,25 @@
|
||||
.
|
||||
.Sh NAME
|
||||
.Nm zpool-prefetch
|
||||
.Nd Loads specific types of data for the given pool
|
||||
.Nd Prefetches pool metadata into ARC
|
||||
.Sh SYNOPSIS
|
||||
.Nm zpool
|
||||
.Cm prefetch
|
||||
.Fl t Ar type
|
||||
.Op Fl t Ar type
|
||||
.Ar pool
|
||||
.Sh DESCRIPTION
|
||||
.Bl -tag -width Ds
|
||||
.It Xo
|
||||
.Nm zpool
|
||||
.Cm prefetch
|
||||
.Fl t Li ddt
|
||||
.Ar pool
|
||||
.Xc
|
||||
Prefetch data of a specific type for the given pool; specifically the DDT,
|
||||
which will improve write I/O performance when the DDT is resident in the ARC.
|
||||
Massively prefetch metadata of a specific type for the given pool into the ARC
|
||||
to reduce latency of some operations later.
|
||||
If no type is specified, all types are prefetched.
|
||||
.Pp
|
||||
The following types are supported:
|
||||
.Bl -tag -width "brt"
|
||||
.It Sy brt
|
||||
Prefetch the BRT (block reference table).
|
||||
This may improve performance for block cloning operations,
|
||||
and frees for earlier cloned blocks.
|
||||
.It Sy ddt
|
||||
Prefetch the DDT (deduplication table).
|
||||
This may improve performance of writes when deduplication is enabled,
|
||||
and frees for earlier deduplicated blocks.
|
||||
.El
|
||||
|
||||
@ -1510,6 +1510,31 @@ brt_load(spa_t *spa)
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
brt_prefetch_all(spa_t *spa)
|
||||
{
|
||||
/*
|
||||
* Load all BRT entries for each vdev. This is intended to perform
|
||||
* a prefetch on all such blocks. For the same reason that brt_prefetch
|
||||
* (called from brt_pending_add) isn't locked, this is also not locked.
|
||||
*/
|
||||
brt_rlock(spa);
|
||||
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
|
||||
brt_unlock(spa);
|
||||
|
||||
rw_enter(&brtvd->bv_mos_entries_lock, RW_READER);
|
||||
if (brtvd->bv_mos_entries != 0) {
|
||||
(void) zap_prefetch_object(spa->spa_meta_objset,
|
||||
brtvd->bv_mos_entries);
|
||||
}
|
||||
rw_exit(&brtvd->bv_mos_entries_lock);
|
||||
|
||||
brt_rlock(spa);
|
||||
}
|
||||
brt_unlock(spa);
|
||||
}
|
||||
|
||||
void
|
||||
brt_unload(spa_t *spa)
|
||||
{
|
||||
|
||||
@ -850,12 +850,15 @@ dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size)
|
||||
return (err);
|
||||
|
||||
/*
|
||||
* Chunk the requests (16 indirects worth) so that we can be interrupted
|
||||
* Chunk the requests (16 indirects worth) so that we can be
|
||||
* interrupted. Prefetch at least SPA_MAXBLOCKSIZE at a time
|
||||
* to better utilize pools with smaller block sizes.
|
||||
*/
|
||||
uint64_t chunksize;
|
||||
if (dn->dn_indblkshift) {
|
||||
uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1);
|
||||
chunksize = (nbps * 16) << dn->dn_datablkshift;
|
||||
chunksize = MAX(chunksize, SPA_MAXBLOCKSIZE);
|
||||
} else {
|
||||
chunksize = dn->dn_datablksz;
|
||||
}
|
||||
|
||||
@ -212,6 +212,8 @@
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_initialize.h>
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/brt.h>
|
||||
#include <sys/ddt.h>
|
||||
|
||||
#include "zfs_namecheck.h"
|
||||
#include "zfs_prop.h"
|
||||
@ -4276,13 +4278,11 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
spa_t *spa;
|
||||
int32_t type;
|
||||
|
||||
/*
|
||||
* Currently, only ZPOOL_PREFETCH_DDT is supported
|
||||
*/
|
||||
if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 ||
|
||||
type != ZPOOL_PREFETCH_DDT) {
|
||||
if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0)
|
||||
return (EINVAL);
|
||||
|
||||
if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
error = spa_open(poolname, &spa, FTAG);
|
||||
if (error != 0)
|
||||
@ -4290,10 +4290,17 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
hrtime_t start_time = gethrtime();
|
||||
|
||||
ddt_prefetch_all(spa);
|
||||
|
||||
zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name,
|
||||
(u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
|
||||
if (type == ZPOOL_PREFETCH_DDT) {
|
||||
ddt_prefetch_all(spa);
|
||||
zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms",
|
||||
spa->spa_name,
|
||||
(u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
|
||||
} else {
|
||||
brt_prefetch_all(spa);
|
||||
zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms",
|
||||
spa->spa_name,
|
||||
(u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
|
||||
}
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
|
||||
|
||||
@ -215,7 +215,7 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
|
||||
tags = ['functional', 'cli_root', 'zfs_create']
|
||||
|
||||
[tests/functional/cli_root/zpool_prefetch]
|
||||
tests = ['zpool_prefetch_001_pos']
|
||||
tests = ['zpool_prefetch_001_pos', 'zpool_prefetch_002_pos']
|
||||
tags = ['functional', 'cli_root', 'zpool_prefetch']
|
||||
|
||||
[tests/functional/cli_root/zfs_destroy]
|
||||
|
||||
@ -1217,6 +1217,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/cli_root/zpool_prefetch/cleanup.ksh \
|
||||
functional/cli_root/zpool_prefetch/setup.ksh \
|
||||
functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh \
|
||||
functional/cli_root/zpool_prefetch/zpool_prefetch_002_pos.ksh \
|
||||
functional/cli_root/zpool_reguid/cleanup.ksh \
|
||||
functional/cli_root/zpool_reguid/setup.ksh \
|
||||
functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh \
|
||||
|
||||
@ -42,6 +42,15 @@ verify_runnable "both"
|
||||
|
||||
log_assert "'zpool prefetch -t ddt <pool>' can successfully load the DDT for a pool."
|
||||
|
||||
DATASET=$TESTPOOL/ddt
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $DATASET && destroy_dataset $DATASET -f
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
function getddtstats
|
||||
{
|
||||
typeset -n gds=$1
|
||||
@ -75,9 +84,8 @@ log_must zpool prefetch -t ddt $TESTPOOL
|
||||
# Build up the deduplicated dataset. This consists of creating enough files
|
||||
# to generate a reasonable size DDT for testing purposes.
|
||||
|
||||
DATASET=$TESTPOOL/ddt
|
||||
log_must zfs create -o compression=off -o dedup=on $DATASET
|
||||
MNTPOINT=$(get_prop mountpoint $TESTPOOL/ddt)
|
||||
MNTPOINT=$(get_prop mountpoint $DATASET)
|
||||
|
||||
log_note "Generating dataset ..."
|
||||
typeset -i i=0
|
||||
|
||||
@ -0,0 +1,95 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2025 by iXsystems, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# 'zpool prefetch -t brt <pool>' can successfully load a pool's BRT on demand.
|
||||
# 'zpool prefetch <pool>' without -t prefetches both DDT and BRT.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a dataset with block cloning enabled.
|
||||
# 2. Create files and clone them to populate the BRT.
|
||||
# 3. Export and import the pool to flush caches.
|
||||
# 4. Use zpool prefetch -t brt to load BRT.
|
||||
# 5. Test zpool prefetch without -t to prefetch all types.
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
if ! command -v clonefile > /dev/null ; then
|
||||
log_unsupported "clonefile program required to test block cloning"
|
||||
fi
|
||||
|
||||
log_assert "'zpool prefetch' can successfully load BRT and prefetch all types"
|
||||
|
||||
DATASET=$TESTPOOL/brt
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $DATASET && destroy_dataset $DATASET -f
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
log_must zfs create $DATASET
|
||||
MNTPOINT=$(get_prop mountpoint $DATASET)
|
||||
|
||||
log_note "Generating cloned blocks for BRT ..."
|
||||
|
||||
# Create source file
|
||||
log_must dd if=/dev/urandom of=$MNTPOINT/source bs=1M count=100
|
||||
|
||||
# Create clones using clonefile
|
||||
typeset -i i=0
|
||||
while (( i < 50 )); do
|
||||
log_must clonefile -f $MNTPOINT/source $MNTPOINT/clone.$i
|
||||
((i += 1))
|
||||
done
|
||||
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# Verify BRT has entries (non-zero saved space)
|
||||
brt_saved=$(zpool get -Hp -o value bclone_saved $TESTPOOL)
|
||||
log_note "BRT saved space: $brt_saved"
|
||||
log_must test "$brt_saved" -gt "0"
|
||||
|
||||
# Export/import to flush caches
|
||||
log_must zpool export $TESTPOOL
|
||||
log_must zpool import $TESTPOOL
|
||||
|
||||
# Test BRT prefetch - verify command succeeds
|
||||
# Note: BRT does not expose cache statistics like DDT, so we can only
|
||||
# verify the prefetch command completes successfully
|
||||
log_must zpool prefetch -t brt $TESTPOOL
|
||||
|
||||
# Test prefetch without -t (should prefetch all types including BRT)
|
||||
log_must zpool export $TESTPOOL
|
||||
log_must zpool import $TESTPOOL
|
||||
log_must zpool prefetch $TESTPOOL
|
||||
|
||||
log_pass "'zpool prefetch' successfully loads BRT and all types"
|
||||
Loading…
Reference in New Issue
Block a user