Increase limit of redaction list by using spill block

Currently redaction bookmarks and their associated redaction lists
have a relatively low limit of 36 redaction snapshots. This is imposed
by the number of snapshot GUIDs that fit in the bonus buffer of the
redaction list object. While this is more than enough for most use
cases, there are some limited cases where larger numbers would be
useful to support.

We tweak the redaction list creation code to use a spill block if
the number of redaction snapshots is above the amount that would fit
in the bonus buffer. We also make a small change to allow spill blocks
to be use for types of data besides SA. In order to fully leverage
this logic, we also change the redaction code to use vmem_alloc, to
handle extremely large allocations if needed. Finally, small tweaks
were made to the zfs commands and the test suite.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15018
This commit is contained in:
Paul Dagnelie
2023-08-26 11:34:43 -07:00
committed by GitHub
parent 11326f8eb1
commit bee9cfb813
13 changed files with 128 additions and 34 deletions
+8 -9
View File
@@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
bqueue_enqueue(q, record, sizeof (*record));
return (0);
}
redact_nodes = kmem_zalloc(num_threads *
redact_nodes = vmem_zalloc(num_threads *
sizeof (*redact_nodes), KM_SLEEP);
avl_create(&start_tree, redact_node_compare_start,
@@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
avl_destroy(&start_tree);
avl_destroy(&end_tree);
kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
@@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
numsnaps = fnvlist_num_pairs(redactnvl);
if (numsnaps > 0)
args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
nvpair_t *pair = NULL;
for (int i = 0; i < numsnaps; i++) {
@@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
kmem_free(newredactbook,
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
@@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
} else {
uint64_t *guids = NULL;
if (numsnaps > 0) {
guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
KM_SLEEP);
}
for (int i = 0; i < numsnaps; i++) {
@@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
dp = NULL;
err = dsl_bookmark_create_redacted(newredactbook, snapname,
numsnaps, guids, FTAG, &new_rl);
kmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0) {
vmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0)
goto out;
}
}
for (int i = 0; i < numsnaps; i++) {
@@ -1188,7 +1187,7 @@ out:
}
if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
if (dp != NULL)
dsl_pool_rele(dp, FTAG);
if (ds != NULL) {
+1
View File
@@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
+52 -15
View File
@@ -34,6 +34,7 @@
#include <sys/dsl_bookmark.h>
#include <zfs_namecheck.h>
#include <sys/dmu_send.h>
#include <sys/dbuf.h>
static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
@@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
if (redaction_list != NULL || bookmark_redacted) {
redaction_list_t *local_rl;
boolean_t spill = B_FALSE;
if (bookmark_redacted) {
redact_snaps = dsredactsnaps;
num_redact_snaps = dsnumsnaps;
}
int bonuslen = sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t);
if (bonuslen > dmu_bonus_max())
spill = B_TRUE;
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t), tx);
DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
if (spill) {
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
}
VERIFY0(dsl_redaction_list_hold_obj(dp,
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
dsl_redaction_list_long_hold(dp, local_rl, tag);
ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
sizeof (redaction_list_phys_t) + num_redact_snaps *
sizeof (uint64_t));
dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
if (!spill) {
ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
dmu_buf_will_dirty(local_rl->rl_bonus, tx);
} else {
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
DB_RF_MUST_SUCCEED, FTAG, &db));
dmu_buf_will_fill(db, tx);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;
local_rl->rl_dbuf = db;
}
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
@@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
SPA_FEATURE_REDACTION_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
/*
* If the list of redact snaps will not fit in the bonus buffer with
* the furthest reached object and offset, fail.
* If the list of redact snaps will not fit in the bonus buffer (or
* spill block, with the REDACTION_LIST_SPILL feature) with the
* furthest reached object and offset, fail.
*/
if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
dmu_bonus_max()) -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
if (dbcra->dbcra_numsnaps > snaplimit)
return (SET_ERROR(E2BIG));
if (dsl_bookmark_create_nvl_validate_pair(
@@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
}
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
if (rl->dn_have_spill) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
}
dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
@@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu)
void
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
{
dmu_buf_rele(rl->rl_dbuf, tag);
if (rl->rl_bonus != rl->rl_dbuf)
dmu_buf_rele(rl->rl_dbuf, tag);
dmu_buf_rele(rl->rl_bonus, tag);
}
int
@@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t **rlp)
{
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
dmu_buf_t *dbuf, *spill_dbuf;
redaction_list_t *rl;
int err;
@@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t *winner = NULL;
rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
rl->rl_dbuf = dbuf;
rl->rl_bonus = dbuf;
if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
rl->rl_dbuf = spill_dbuf;
} else {
rl->rl_dbuf = dbuf;
}
rl->rl_object = rlobj;
rl->rl_phys = dbuf->db_data;
rl->rl_phys = rl->rl_dbuf->db_data;
rl->rl_mos = dp->dp_meta_objset;
zfs_refcount_create(&rl->rl_longholds);
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
&rl->rl_dbuf);
&rl->rl_bonus);
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
kmem_free(rl, sizeof (*rl));
rl = winner;
+10
View File
@@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
NULL) {
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG,
&rl));
if (rl->dn_have_spill) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_LIST_SPILL,
tx);
}
dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),