mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Increase limit of redaction list by using spill block
Currently redaction bookmarks and their associated redaction lists have a relatively low limit of 36 redaction snapshots. This is imposed by the number of snapshot GUIDs that fit in the bonus buffer of the redaction list object. While this is more than enough for most use cases, there are some limited cases where larger numbers would be useful to support. We tweak the redaction list creation code to use a spill block if the number of redaction snapshots is above the amount that would fit in the bonus buffer. We also make a small change to allow spill blocks to be use for types of data besides SA. In order to fully leverage this logic, we also change the redaction code to use vmem_alloc, to handle extremely large allocations if needed. Finally, small tweaks were made to the zfs commands and the test suite. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #15018
This commit is contained in:
@@ -737,6 +737,18 @@ zpool_feature_init(void)
|
||||
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL,
|
||||
sfeatures);
|
||||
|
||||
{
|
||||
static const spa_feature_t redact_list_spill_deps[] = {
|
||||
SPA_FEATURE_REDACTION_BOOKMARKS,
|
||||
SPA_FEATURE_NONE
|
||||
};
|
||||
zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL,
|
||||
"com.delphix:redaction_list_spill", "redaction_list_spill",
|
||||
"Support for increased number of redaction_snapshot "
|
||||
"arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN,
|
||||
redact_list_spill_deps, sfeatures);
|
||||
}
|
||||
|
||||
zfs_mod_list_supported_free(sfeatures);
|
||||
}
|
||||
|
||||
|
||||
@@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
|
||||
bqueue_enqueue(q, record, sizeof (*record));
|
||||
return (0);
|
||||
}
|
||||
redact_nodes = kmem_zalloc(num_threads *
|
||||
redact_nodes = vmem_zalloc(num_threads *
|
||||
sizeof (*redact_nodes), KM_SLEEP);
|
||||
|
||||
avl_create(&start_tree, redact_node_compare_start,
|
||||
@@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
|
||||
|
||||
avl_destroy(&start_tree);
|
||||
avl_destroy(&end_tree);
|
||||
kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
|
||||
vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
|
||||
if (current_record != NULL)
|
||||
bqueue_enqueue(q, current_record, sizeof (*current_record));
|
||||
return (err);
|
||||
@@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
|
||||
|
||||
numsnaps = fnvlist_num_pairs(redactnvl);
|
||||
if (numsnaps > 0)
|
||||
args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
|
||||
args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
|
||||
|
||||
nvpair_t *pair = NULL;
|
||||
for (int i = 0; i < numsnaps; i++) {
|
||||
@@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
|
||||
kmem_free(newredactbook,
|
||||
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
|
||||
if (args != NULL)
|
||||
kmem_free(args, numsnaps * sizeof (*args));
|
||||
vmem_free(args, numsnaps * sizeof (*args));
|
||||
return (SET_ERROR(ENAMETOOLONG));
|
||||
}
|
||||
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
|
||||
@@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
|
||||
} else {
|
||||
uint64_t *guids = NULL;
|
||||
if (numsnaps > 0) {
|
||||
guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
|
||||
guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
|
||||
KM_SLEEP);
|
||||
}
|
||||
for (int i = 0; i < numsnaps; i++) {
|
||||
@@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
|
||||
dp = NULL;
|
||||
err = dsl_bookmark_create_redacted(newredactbook, snapname,
|
||||
numsnaps, guids, FTAG, &new_rl);
|
||||
kmem_free(guids, numsnaps * sizeof (uint64_t));
|
||||
if (err != 0) {
|
||||
vmem_free(guids, numsnaps * sizeof (uint64_t));
|
||||
if (err != 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < numsnaps; i++) {
|
||||
@@ -1188,7 +1187,7 @@ out:
|
||||
}
|
||||
|
||||
if (args != NULL)
|
||||
kmem_free(args, numsnaps * sizeof (*args));
|
||||
vmem_free(args, numsnaps * sizeof (*args));
|
||||
if (dp != NULL)
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
if (ds != NULL) {
|
||||
|
||||
@@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
ASSERT(DMU_OT_IS_VALID(ot));
|
||||
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
|
||||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
|
||||
(bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
|
||||
(bonustype != DMU_OT_NONE && bonuslen != 0));
|
||||
ASSERT(DMU_OT_IS_VALID(bonustype));
|
||||
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
|
||||
|
||||
+52
-15
@@ -34,6 +34,7 @@
|
||||
#include <sys/dsl_bookmark.h>
|
||||
#include <zfs_namecheck.h>
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/dbuf.h>
|
||||
|
||||
static int
|
||||
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
|
||||
@@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
|
||||
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
|
||||
if (redaction_list != NULL || bookmark_redacted) {
|
||||
redaction_list_t *local_rl;
|
||||
boolean_t spill = B_FALSE;
|
||||
if (bookmark_redacted) {
|
||||
redact_snaps = dsredactsnaps;
|
||||
num_redact_snaps = dsnumsnaps;
|
||||
}
|
||||
int bonuslen = sizeof (redaction_list_phys_t) +
|
||||
num_redact_snaps * sizeof (uint64_t);
|
||||
if (bonuslen > dmu_bonus_max())
|
||||
spill = B_TRUE;
|
||||
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
|
||||
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
|
||||
DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
|
||||
num_redact_snaps * sizeof (uint64_t), tx);
|
||||
DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
|
||||
spa_feature_incr(dp->dp_spa,
|
||||
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
|
||||
if (spill) {
|
||||
spa_feature_incr(dp->dp_spa,
|
||||
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
|
||||
}
|
||||
|
||||
VERIFY0(dsl_redaction_list_hold_obj(dp,
|
||||
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
|
||||
dsl_redaction_list_long_hold(dp, local_rl, tag);
|
||||
|
||||
ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
|
||||
sizeof (redaction_list_phys_t) + num_redact_snaps *
|
||||
sizeof (uint64_t));
|
||||
dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
|
||||
if (!spill) {
|
||||
ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
|
||||
dmu_buf_will_dirty(local_rl->rl_bonus, tx);
|
||||
} else {
|
||||
dmu_buf_t *db;
|
||||
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
|
||||
DB_RF_MUST_SUCCEED, FTAG, &db));
|
||||
dmu_buf_will_fill(db, tx);
|
||||
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
|
||||
SPA_MINBLOCKSIZE), tx));
|
||||
local_rl->rl_phys = db->db_data;
|
||||
local_rl->rl_dbuf = db;
|
||||
}
|
||||
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
|
||||
sizeof (uint64_t) * num_redact_snaps);
|
||||
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
|
||||
@@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
|
||||
SPA_FEATURE_REDACTION_BOOKMARKS))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
/*
|
||||
* If the list of redact snaps will not fit in the bonus buffer with
|
||||
* the furthest reached object and offset, fail.
|
||||
* If the list of redact snaps will not fit in the bonus buffer (or
|
||||
* spill block, with the REDACTION_LIST_SPILL feature) with the
|
||||
* furthest reached object and offset, fail.
|
||||
*/
|
||||
if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
|
||||
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
|
||||
uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
|
||||
SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
|
||||
dmu_bonus_max()) -
|
||||
sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
|
||||
if (dbcra->dbcra_numsnaps > snaplimit)
|
||||
return (SET_ERROR(E2BIG));
|
||||
|
||||
if (dsl_bookmark_create_nvl_validate_pair(
|
||||
@@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
|
||||
}
|
||||
|
||||
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
|
||||
dnode_t *rl;
|
||||
VERIFY0(dnode_hold(mos,
|
||||
dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
|
||||
if (rl->dn_have_spill) {
|
||||
spa_feature_decr(dmu_objset_spa(mos),
|
||||
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
|
||||
}
|
||||
dnode_rele(rl, FTAG);
|
||||
VERIFY0(dmu_object_free(mos,
|
||||
dbn->dbn_phys.zbm_redaction_obj, tx));
|
||||
spa_feature_decr(dmu_objset_spa(mos),
|
||||
@@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu)
|
||||
void
|
||||
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
|
||||
{
|
||||
dmu_buf_rele(rl->rl_dbuf, tag);
|
||||
if (rl->rl_bonus != rl->rl_dbuf)
|
||||
dmu_buf_rele(rl->rl_dbuf, tag);
|
||||
dmu_buf_rele(rl->rl_bonus, tag);
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
|
||||
redaction_list_t **rlp)
|
||||
{
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
dmu_buf_t *dbuf;
|
||||
dmu_buf_t *dbuf, *spill_dbuf;
|
||||
redaction_list_t *rl;
|
||||
int err;
|
||||
|
||||
@@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
|
||||
redaction_list_t *winner = NULL;
|
||||
|
||||
rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
|
||||
rl->rl_dbuf = dbuf;
|
||||
rl->rl_bonus = dbuf;
|
||||
if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
|
||||
rl->rl_dbuf = spill_dbuf;
|
||||
} else {
|
||||
rl->rl_dbuf = dbuf;
|
||||
}
|
||||
rl->rl_object = rlobj;
|
||||
rl->rl_phys = dbuf->db_data;
|
||||
rl->rl_phys = rl->rl_dbuf->db_data;
|
||||
rl->rl_mos = dp->dp_meta_objset;
|
||||
zfs_refcount_create(&rl->rl_longholds);
|
||||
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
|
||||
&rl->rl_dbuf);
|
||||
&rl->rl_bonus);
|
||||
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
|
||||
kmem_free(rl, sizeof (*rl));
|
||||
rl = winner;
|
||||
|
||||
@@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
|
||||
NULL) {
|
||||
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
|
||||
dnode_t *rl;
|
||||
VERIFY0(dnode_hold(mos,
|
||||
dbn->dbn_phys.zbm_redaction_obj, FTAG,
|
||||
&rl));
|
||||
if (rl->dn_have_spill) {
|
||||
spa_feature_decr(dmu_objset_spa(mos),
|
||||
SPA_FEATURE_REDACTION_LIST_SPILL,
|
||||
tx);
|
||||
}
|
||||
dnode_rele(rl, FTAG);
|
||||
VERIFY0(dmu_object_free(mos,
|
||||
dbn->dbn_phys.zbm_redaction_obj, tx));
|
||||
spa_feature_decr(dmu_objset_spa(mos),
|
||||
|
||||
Reference in New Issue
Block a user