From cbb9ef0a4c8e04358f7d5ddae0eb99d0f703ee21 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Fri, 6 Oct 2023 17:06:34 +1100 Subject: [PATCH] ddt: tuneable to override copies= on dedup metadata objects All objects stored in the MOS get copies=3. For a large dedup table, this requires significant extra IO and disk space, when its not really necessary - the dedup table itself isn't needed to read or write data, only to keep data usage down. Losing the dedup table does not render the pool unusable, it just messes up the accounting somewhat. This adds a dmu_ddt_copies tuneable. When set to 0, the existing behaviour is used. When set higher, dedup table blocks (ZAP and log) will have this many copies rather than the usual 3, while indirect blocks will have one more again. This is a tuneable for now mostly for testing. Losing a dedup table can cause blocks to be leaked, and we currently have no facilities to repair that. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Co-authored-by: Allan Jude Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15895 --- module/zfs/dmu.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 3dcf49ceb..b3eda8ea5 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -95,6 +95,12 @@ uint_t dmu_prefetch_max = 8 * 1024 * 1024; uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE; #endif +/* + * Override copies= for dedup state objects. 0 means the traditional behaviour + * (ie the default for the containing objset ie 3 for the MOS). + */ +uint_t dmu_ddt_copies = 0; + const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" }, @@ -2272,6 +2278,28 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) case ZFS_REDUNDANT_METADATA_NONE: break; } + + if (dmu_ddt_copies > 0) { + /* + * If this tuneable is set, and this is a write for a + * dedup entry store (zap or log), then we treat it + * something like ZFS_REDUNDANT_METADATA_MOST on a + * regular dataset: this many copies, and one more for + * "higher" indirect blocks. This specific exception is + * necessary because dedup objects are stored in the + * MOS, which always has the highest possible copies. + */ + dmu_object_type_t stype = + dn ? dn->dn_storage_type : DMU_OT_NONE; + if (stype == DMU_OT_NONE) + stype = type; + if (stype == DMU_OT_DDT_ZAP) { + copies = dmu_ddt_copies; + if (level >= + zfs_redundant_metadata_most_ditto_level) + copies++; + } + } } else if (wp & WP_NOFILL) { ASSERT(level == 0); @@ -2824,3 +2852,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW, /* CSTYLED */ ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW, "Limit one prefetch call to this size"); + +/* CSTYLED */ +ZFS_MODULE_PARAM(zfs, , dmu_ddt_copies, UINT, ZMOD_RW, + "Override copies= for dedup objects");