Implement bookmark copying

This feature allows copying existing bookmarks using

    zfs bookmark fs#target fs#newbookmark

There are some niche use cases for such functionality,
e.g. when using bookmarks as markers for replication progress.

Copying redaction bookmarks produces a normal bookmark that
cannot be used for redacted send (we are not duplicating
the redaction object).

ZCP support for bookmarking (both creation and copying) will be
implemented in a separate patch based on this work.

Overview:

- Terminology:
    - source = existing snapshot or bookmark
    - new/bmark = new bookmark
- Implement bookmark copying in `dsl_bookmark.c`
  - create new bookmark node
  - copy source's `zbn_phys` to new's `zbn_phys`
  - zero-out redaction object id in copy
- Extend existing bookmark ioctl nvlist schema to accept
  bookmarks as sources
  - => `dsl_bookmark_create_nvl_validate` is authoritative
- use `dsl_dataset_is_before` check for both snapshot
  and bookmark sources
- Adjust CLI
  - refactor shortname expansion logic in `zfs_do_bookmark`
- Update man pages
  - warn about redaction bookmark handling
- Add test cases
  - CLI
  - pyyzfs libzfs_core bindings

Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Paul Dagnelie <pcd@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Christian Schwarz <me@cschwarz.com>
Closes #9571
This commit is contained in:
Christian Schwarz
2019-11-10 23:24:14 -08:00
committed by Brian Behlendorf
parent 7b49bbc816
commit a73f361fdb
19 changed files with 672 additions and 142 deletions
+269 -45
View File
@@ -16,6 +16,7 @@
/*
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
* Copyright 2019, 2020 by Christian Schwarz. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -55,6 +56,9 @@ dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
}
/*
* When reading BOOKMARK_V1 bookmarks, the BOOKMARK_V2 fields are guaranteed
* to be zeroed.
*
* Returns ESRCH if bookmark is not found.
* Note, we need to use the ZAP rather than the AVL to look up bookmarks
* by name, because only the ZAP honors the casesensitivity setting.
@@ -116,6 +120,91 @@ dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname,
return (error);
}
/*
* Validates that
* - bmark is a full dataset path of a bookmark (bookmark_namecheck)
* - source is a full path of a snaphot or bookmark
* ({bookmark,snapshot}_namecheck)
*
* Returns 0 if valid, -1 otherwise.
*/
static int
dsl_bookmark_create_nvl_validate_pair(const char *bmark, const char *source)
{
if (bookmark_namecheck(bmark, NULL, NULL) != 0)
return (-1);
int is_bmark, is_snap;
is_bmark = bookmark_namecheck(source, NULL, NULL) == 0;
is_snap = snapshot_namecheck(source, NULL, NULL) == 0;
if (!is_bmark && !is_snap)
return (-1);
return (0);
}
/*
* Check that the given nvlist corresponds to the following schema:
* { newbookmark -> source, ... }
* where
* - each pair passes dsl_bookmark_create_nvl_validate_pair
* - all newbookmarks are in the same pool
* - all newbookmarks have unique names
*
* Note that this function is only validates above schema. Callers must ensure
* that the bookmarks can be created, e.g. that sources exist.
*
* Returns 0 if the nvlist adheres to above schema.
* Returns -1 if it doesn't.
*/
int
dsl_bookmark_create_nvl_validate(nvlist_t *bmarks)
{
char *first;
size_t first_len;
first = NULL;
for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) {
char *bmark = nvpair_name(pair);
char *source;
/* list structure: values must be snapshots XOR bookmarks */
if (nvpair_value_string(pair, &source) != 0)
return (-1);
if (dsl_bookmark_create_nvl_validate_pair(bmark, source) != 0)
return (-1);
/* same pool check */
if (first == NULL) {
char *cp = strpbrk(bmark, "/#");
if (cp == NULL)
return (-1);
first = bmark;
first_len = cp - bmark;
}
if (strncmp(first, bmark, first_len) != 0)
return (-1);
switch (*(bmark + first_len)) {
case '/': /* fallthrough */
case '#':
break;
default:
return (-1);
}
/* unique newbookmark names; todo: O(n^2) */
for (nvpair_t *pair2 = nvlist_next_nvpair(bmarks, pair);
pair2 != NULL; pair2 = nvlist_next_nvpair(bmarks, pair2)) {
if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
return (-1);
}
}
return (0);
}
typedef struct dsl_bookmark_create_redacted_arg {
const char *dbcra_bmark;
const char *dbcra_snap;
@@ -130,36 +219,85 @@ typedef struct dsl_bookmark_create_arg {
nvlist_t *dbca_errors;
} dsl_bookmark_create_arg_t;
/*
* expects that newbm and source have been validated using
* dsl_bookmark_create_nvl_validate_pair
*/
static int
dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name,
dmu_tx_t *tx)
dsl_bookmark_create_check_impl(dsl_pool_t *dp,
const char *newbm, const char *source)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *bmark_fs;
char *shortname;
ASSERT0(dsl_bookmark_create_nvl_validate_pair(newbm, source));
/* defer source namecheck until we know it's a snapshot or bookmark */
int error;
zfs_bookmark_phys_t bmark_phys = { 0 };
dsl_dataset_t *newbm_ds;
char *newbm_short;
zfs_bookmark_phys_t bmark_phys;
if (!snapds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
error = dsl_bookmark_hold_ds(dp, bookmark_name,
&bmark_fs, FTAG, &shortname);
error = dsl_bookmark_hold_ds(dp, newbm, &newbm_ds, FTAG, &newbm_short);
if (error != 0)
return (error);
if (!dsl_dataset_is_before(bmark_fs, snapds, 0)) {
dsl_dataset_rele(bmark_fs, FTAG);
return (SET_ERROR(EINVAL));
/* Verify that the new bookmark does not already exist */
error = dsl_bookmark_lookup_impl(newbm_ds, newbm_short, &bmark_phys);
switch (error) {
case ESRCH:
/* happy path: new bmark doesn't exist, proceed after switch */
error = 0;
break;
case 0:
error = SET_ERROR(EEXIST);
goto eholdnewbmds;
default:
/* dsl_bookmark_lookup_impl already did SET_ERRROR */
goto eholdnewbmds;
}
error = dsl_bookmark_lookup_impl(bmark_fs, shortname,
&bmark_phys);
dsl_dataset_rele(bmark_fs, FTAG);
if (error == 0)
return (SET_ERROR(EEXIST));
if (error == ESRCH)
return (0);
/* error is retval of the following if-cascade */
if (strchr(source, '@') != NULL) {
dsl_dataset_t *source_snap_ds;
ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0);
error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds);
if (error == 0) {
VERIFY(source_snap_ds->ds_is_snapshot);
/*
* Verify that source snapshot is an earlier point in
* newbm_ds's timeline (source may be newbm_ds's origin)
*/
if (!dsl_dataset_is_before(newbm_ds, source_snap_ds, 0))
error = SET_ERROR(
ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
dsl_dataset_rele(source_snap_ds, FTAG);
}
} else if (strchr(source, '#') != NULL) {
zfs_bookmark_phys_t source_phys;
ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0);
/*
* Source must exists and be an earlier point in newbm_ds's
* timeline (newbm_ds's origin may be a snap of source's ds)
*/
error = dsl_bookmark_lookup(dp, source, newbm_ds, &source_phys);
switch (error) {
case 0:
break; /* happy path */
case EXDEV:
error = SET_ERROR(ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
break;
default:
/* dsl_bookmark_lookup already did SET_ERRROR */
break;
}
} else {
/*
* dsl_bookmark_create_nvl_validate validates that source is
* either snapshot or bookmark
*/
panic("unreachable code: %s", source);
}
eholdnewbmds:
dsl_dataset_rele(newbm_ds, FTAG);
return (error);
}
@@ -167,33 +305,37 @@ static int
dsl_bookmark_create_check(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_arg_t *dbca = arg;
int rv = 0;
int schema_err = 0;
ASSERT3P(dbca, !=, NULL);
ASSERT3P(dbca->dbca_bmarks, !=, NULL);
/* dbca->dbca_errors is allowed to be NULL */
dsl_pool_t *dp = dmu_tx_pool(tx);
int rv = 0;
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
if (dsl_bookmark_create_nvl_validate(dbca->dbca_bmarks) != 0)
rv = schema_err = SET_ERROR(EINVAL);
for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
dsl_dataset_t *snapds;
int error;
char *new = nvpair_name(pair);
/* note: validity of nvlist checked by ioctl layer */
error = dsl_dataset_hold(dp, fnvpair_value_string(pair),
FTAG, &snapds);
int error = schema_err;
if (error == 0) {
error = dsl_bookmark_create_check_impl(snapds,
nvpair_name(pair), tx);
dsl_dataset_rele(snapds, FTAG);
char *source = fnvpair_value_string(pair);
error = dsl_bookmark_create_check_impl(dp, new, source);
if (error != 0)
error = SET_ERROR(error);
}
if (error != 0) {
rv = error;
if (dbca->dbca_errors != NULL)
fnvlist_add_int32(dbca->dbca_errors,
nvpair_name(pair), error);
new, error);
}
}
@@ -259,6 +401,10 @@ dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap)
}
}
/*
* Add dsl_bookmark_node_t `dbn` to the given dataset and increment appropriate
* SPA feature counters.
*/
void
dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn,
dmu_tx_t *tx)
@@ -308,7 +454,7 @@ dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn,
* list, and store the object number of the redaction list in redact_obj.
*/
static void
dsl_bookmark_create_sync_impl(const char *bookmark, const char *snapshot,
dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
dmu_tx_t *tx, uint64_t num_redact_snaps, uint64_t *redact_snaps, void *tag,
redaction_list_t **redaction_list)
{
@@ -381,7 +527,76 @@ dsl_bookmark_create_sync_impl(const char *bookmark, const char *snapshot,
dsl_dataset_rele(snapds, FTAG);
}
static void
dsl_bookmark_create_sync_impl_book(
const char *new_name, const char *source_name, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *bmark_fs_source, *bmark_fs_new;
char *source_shortname, *new_shortname;
zfs_bookmark_phys_t source_phys;
VERIFY0(dsl_bookmark_hold_ds(dp, source_name, &bmark_fs_source, FTAG,
&source_shortname));
VERIFY0(dsl_bookmark_hold_ds(dp, new_name, &bmark_fs_new, FTAG,
&new_shortname));
/*
* create a copy of the source bookmark by copying most of its members
*
* Caveat: bookmarking a redaction bookmark yields a normal bookmark
* -----------------------------------------------------------------
* Reasoning:
* - The zbm_redaction_obj would be referred to by both source and new
* bookmark, but would be destroyed once either source or new is
* destroyed, resulting in use-after-free of the referrred object.
* - User expectation when issuing the `zfs bookmark` command is that
* a normal bookmark of the source is created
*
* Design Alternatives For Full Redaction Bookmark Copying:
* - reference-count the redaction object => would require on-disk
* format change for existing redaction objects
* - Copy the redaction object => cannot be done in syncing context
* because the redaction object might be too large
*/
VERIFY0(dsl_bookmark_lookup_impl(bmark_fs_source, source_shortname,
&source_phys));
dsl_bookmark_node_t *new_dbn = dsl_bookmark_node_alloc(new_shortname);
memcpy(&new_dbn->dbn_phys, &source_phys, sizeof (source_phys));
new_dbn->dbn_phys.zbm_redaction_obj = 0;
/* update feature counters */
if (new_dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_BOOKMARK_WRITTEN, tx);
}
/* no need for redaction bookmark counter; nulled zbm_redaction_obj */
/* dsl_bookmark_node_add bumps bookmarks and v2-bookmarks counter */
/*
* write new bookmark
*
* Note that dsl_bookmark_lookup_impl guarantees that, if source is a
* v1 bookmark, the v2-only fields are zeroed.
* And dsl_bookmark_node_add writes back a v1-sized bookmark if
* v2 bookmarks are disabled and/or v2-only fields are zeroed.
* => bookmark copying works on pre-bookmark-v2 pools
*/
dsl_bookmark_node_add(bmark_fs_new, new_dbn, tx);
spa_history_log_internal_ds(bmark_fs_source, "bookmark", tx,
"name=%s creation_txg=%llu source_guid=%llu",
new_shortname, (longlong_t)new_dbn->dbn_phys.zbm_creation_txg,
(longlong_t)source_phys.zbm_guid);
dsl_dataset_rele(bmark_fs_source, FTAG);
dsl_dataset_rele(bmark_fs_new, FTAG);
}
void
dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_arg_t *dbca = arg;
@@ -391,8 +606,19 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
dsl_bookmark_create_sync_impl(nvpair_name(pair),
fnvpair_value_string(pair), tx, 0, NULL, NULL, NULL);
char *new = nvpair_name(pair);
char *source = fnvpair_value_string(pair);
if (strchr(source, '@') != NULL) {
dsl_bookmark_create_sync_impl_snap(new, source, tx,
0, NULL, NULL, NULL);
} else if (strchr(source, '#') != NULL) {
dsl_bookmark_create_sync_impl_book(new, source, tx);
} else {
panic("unreachable code");
}
}
}
@@ -422,7 +648,6 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_redacted_arg_t *dbcra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *snapds;
int rv = 0;
if (!spa_feature_is_enabled(dp->dp_spa,
@@ -436,13 +661,12 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
return (SET_ERROR(E2BIG));
rv = dsl_dataset_hold(dp, dbcra->dbcra_snap,
FTAG, &snapds);
if (rv == 0) {
rv = dsl_bookmark_create_check_impl(snapds, dbcra->dbcra_bmark,
tx);
dsl_dataset_rele(snapds, FTAG);
}
if (dsl_bookmark_create_nvl_validate_pair(
dbcra->dbcra_bmark, dbcra->dbcra_snap) != 0)
return (SET_ERROR(EINVAL));
rv = dsl_bookmark_create_check_impl(dp,
dbcra->dbcra_bmark, dbcra->dbcra_snap);
return (rv);
}
@@ -450,9 +674,9 @@ static void
dsl_bookmark_create_redacted_sync(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_redacted_arg_t *dbcra = arg;
dsl_bookmark_create_sync_impl(dbcra->dbcra_bmark, dbcra->dbcra_snap, tx,
dbcra->dbcra_numsnaps, dbcra->dbcra_snaps, dbcra->dbcra_tag,
dbcra->dbcra_rl);
dsl_bookmark_create_sync_impl_snap(dbcra->dbcra_bmark,
dbcra->dbcra_snap, tx, dbcra->dbcra_numsnaps, dbcra->dbcra_snaps,
dbcra->dbcra_tag, dbcra->dbcra_rl);
}
int
+7 -23
View File
@@ -37,6 +37,7 @@
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
*/
/*
@@ -3614,11 +3615,13 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
}
/*
* Create bookmarks. Bookmark names are of the form <fs>#<bmark>.
* All bookmarks must be in the same pool.
* Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
* All bookmarks and snapshots must be in the same pool.
* dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
*
* innvl: {
* bookmark1 -> snapshot1, bookmark2 -> snapshot2
* new_bookmark1 -> existing_snapshot,
* new_bookmark2 -> existing_bookmark,
* }
*
* outnvl: bookmark -> error code (int32)
@@ -3632,25 +3635,6 @@ static const zfs_ioc_key_t zfs_keys_bookmark[] = {
static int
zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
{
for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
char *snap_name;
/*
* Verify the snapshot argument.
*/
if (nvpair_value_string(pair, &snap_name) != 0)
return (SET_ERROR(EINVAL));
/* Verify that the keys (bookmarks) are unique */
for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
return (SET_ERROR(EINVAL));
}
}
return (dsl_bookmark_create(innvl, outnvl));
}
@@ -4164,7 +4148,7 @@ recursive_unmount(const char *fsname, void *arg)
* snapname is the snapshot to redact.
* innvl: {
* "bookname" -> (string)
* name of the redaction bookmark to generate
* shortname of the redaction bookmark to generate
* "snapnv" -> (nvlist, values ignored)
* snapshots to redact snapname with respect to
* }