mirror_zfs/module/zfs/dsl_bookmark.c
Tom Caputi f00ab3f22c Detect and prevent mixed raw and non-raw sends
Currently, there is an issue in the raw receive code where
raw receives are allowed to happen on top of previously
non-raw received datasets. This is a problem because the
source-side dataset doesn't know about how the blocks on
the destination were encrypted. As a result, any MAC in
the objset's checksum-of-MACs tree that is a parent of both
blocks encrypted on the source and blocks encrypted by the
destination will be incorrect. This will result in
authentication errors when we decrypt the dataset.

This patch fixes this issue by adding a new check to the
raw receive code. The code now maintains an "IVset guid",
which acts as an identifier for the set of IVs used to
encrypt a given snapshot. When a snapshot is raw received,
the destination snapshot will take this value from the
DRR_BEGIN payload. Non-raw receives and normal "zfs snap"
operations will cause ZFS to generate a new IVset guid.
When a raw incremental stream is received, ZFS will check
that the "from" IVset guid in the stream matches that of
the "from" destination snapshot. If they do not match, the
code will error out the receive, preventing the problem.

This patch requires an on-disk format change to add the
IVset guids to snapshots and bookmarks. As a result, this
patch has errata handling and a tunable to help affected
users resolve the issue with as little interruption as
possible.

Reviewed-by: Paul Dagnelie <pcd@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #8308
2019-03-13 11:00:43 -07:00

506 lines
13 KiB
C

/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
#include <sys/arc.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/spa.h>
#include <sys/dsl_bookmark.h>
#include <zfs_namecheck.h>
static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
dsl_dataset_t **dsp, void *tag, char **shortnamep)
{
char buf[ZFS_MAX_DATASET_NAME_LEN];
char *hashp;
if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
hashp = strchr(fullname, '#');
if (hashp == NULL)
return (SET_ERROR(EINVAL));
*shortnamep = hashp + 1;
if (zfs_component_namecheck(*shortnamep, NULL, NULL))
return (SET_ERROR(EINVAL));
(void) strlcpy(buf, fullname, hashp - fullname + 1);
return (dsl_dataset_hold(dp, buf, tag, dsp));
}
/*
* Returns ESRCH if bookmark is not found.
*/
static int
dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname,
zfs_bookmark_phys_t *bmark_phys)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t bmark_zapobj = ds->ds_bookmarks;
matchtype_t mt = 0;
int err;
if (bmark_zapobj == 0)
return (SET_ERROR(ESRCH));
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
mt = MT_NORMALIZE;
/*
* Zero out the bookmark in case the one stored on disk
* is in an older, shorter format.
*/
bzero(bmark_phys, sizeof (*bmark_phys));
err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t),
sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt,
NULL, 0, NULL);
return (err == ENOENT ? ESRCH : err);
}
/*
* If later_ds is non-NULL, this will return EXDEV if the the specified bookmark
* does not represents an earlier point in later_ds's timeline.
*
* Returns ENOENT if the dataset containing the bookmark does not exist.
* Returns ESRCH if the dataset exists but the bookmark was not found in it.
*/
int
dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname,
dsl_dataset_t *later_ds, zfs_bookmark_phys_t *bmp)
{
char *shortname;
dsl_dataset_t *ds;
int error;
error = dsl_bookmark_hold_ds(dp, fullname, &ds, FTAG, &shortname);
if (error != 0)
return (error);
error = dsl_dataset_bmark_lookup(ds, shortname, bmp);
if (error == 0 && later_ds != NULL) {
if (!dsl_dataset_is_before(later_ds, ds, bmp->zbm_creation_txg))
error = SET_ERROR(EXDEV);
}
dsl_dataset_rele(ds, FTAG);
return (error);
}
typedef struct dsl_bookmark_create_arg {
nvlist_t *dbca_bmarks;
nvlist_t *dbca_errors;
} dsl_bookmark_create_arg_t;
static int
dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name,
dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *bmark_fs;
char *shortname;
int error;
zfs_bookmark_phys_t bmark_phys;
if (!snapds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
error = dsl_bookmark_hold_ds(dp, bookmark_name,
&bmark_fs, FTAG, &shortname);
if (error != 0)
return (error);
if (!dsl_dataset_is_before(bmark_fs, snapds, 0)) {
dsl_dataset_rele(bmark_fs, FTAG);
return (SET_ERROR(EINVAL));
}
error = dsl_dataset_bmark_lookup(bmark_fs, shortname,
&bmark_phys);
dsl_dataset_rele(bmark_fs, FTAG);
if (error == 0)
return (SET_ERROR(EEXIST));
if (error == ESRCH)
return (0);
return (error);
}
static int
dsl_bookmark_create_check(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_arg_t *dbca = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
int rv = 0;
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
dsl_dataset_t *snapds;
int error;
/* note: validity of nvlist checked by ioctl layer */
error = dsl_dataset_hold(dp, fnvpair_value_string(pair),
FTAG, &snapds);
if (error == 0) {
error = dsl_bookmark_create_check_impl(snapds,
nvpair_name(pair), tx);
dsl_dataset_rele(snapds, FTAG);
}
if (error != 0) {
fnvlist_add_int32(dbca->dbca_errors,
nvpair_name(pair), error);
rv = error;
}
}
return (rv);
}
static void
dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_arg_t *dbca = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
ASSERT(spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS));
for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
dsl_dataset_t *snapds, *bmark_fs;
zfs_bookmark_phys_t bmark_phys = { 0 };
char *shortname;
uint32_t bmark_len = BOOKMARK_PHYS_SIZE_V1;
VERIFY0(dsl_dataset_hold(dp, fnvpair_value_string(pair),
FTAG, &snapds));
VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair),
&bmark_fs, FTAG, &shortname));
if (bmark_fs->ds_bookmarks == 0) {
bmark_fs->ds_bookmarks =
zap_create_norm(mos, U8_TEXTPREP_TOUPPER,
DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
dsl_dataset_zapify(bmark_fs, tx);
VERIFY0(zap_add(mos, bmark_fs->ds_object,
DS_FIELD_BOOKMARK_NAMES,
sizeof (bmark_fs->ds_bookmarks), 1,
&bmark_fs->ds_bookmarks, tx));
}
bmark_phys.zbm_guid = dsl_dataset_phys(snapds)->ds_guid;
bmark_phys.zbm_creation_txg =
dsl_dataset_phys(snapds)->ds_creation_txg;
bmark_phys.zbm_creation_time =
dsl_dataset_phys(snapds)->ds_creation_time;
/*
* If the dataset is encrypted create a larger bookmark to
* accommodate the IVset guid. The IVset guid was added
* after the encryption feature to prevent a problem with
* raw sends. If we encounter an encrypted dataset without
* an IVset guid we fall back to a normal bookmark.
*/
if (snapds->ds_dir->dd_crypto_obj != 0 &&
spa_feature_is_enabled(dp->dp_spa,
SPA_FEATURE_BOOKMARK_V2)) {
int err = zap_lookup(mos, snapds->ds_object,
DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
&bmark_phys.zbm_ivset_guid);
if (err == 0) {
bmark_len = BOOKMARK_PHYS_SIZE_V2;
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_BOOKMARK_V2, tx);
}
}
VERIFY0(zap_add(mos, bmark_fs->ds_bookmarks,
shortname, sizeof (uint64_t),
bmark_len / sizeof (uint64_t), &bmark_phys, tx));
spa_history_log_internal_ds(bmark_fs, "bookmark", tx,
"name=%s creation_txg=%llu target_snap=%llu",
shortname,
(longlong_t)bmark_phys.zbm_creation_txg,
(longlong_t)snapds->ds_object);
dsl_dataset_rele(bmark_fs, FTAG);
dsl_dataset_rele(snapds, FTAG);
}
}
/*
* The bookmarks must all be in the same pool.
*/
int
dsl_bookmark_create(nvlist_t *bmarks, nvlist_t *errors)
{
nvpair_t *pair;
dsl_bookmark_create_arg_t dbca;
pair = nvlist_next_nvpair(bmarks, NULL);
if (pair == NULL)
return (0);
dbca.dbca_bmarks = bmarks;
dbca.dbca_errors = errors;
return (dsl_sync_task(nvpair_name(pair), dsl_bookmark_create_check,
dsl_bookmark_create_sync, &dbca,
fnvlist_num_pairs(bmarks), ZFS_SPACE_CHECK_NORMAL));
}
int
dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl)
{
int err = 0;
zap_cursor_t zc;
zap_attribute_t attr;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
uint64_t bmark_zapobj = ds->ds_bookmarks;
if (bmark_zapobj == 0)
return (0);
for (zap_cursor_init(&zc, dp->dp_meta_objset, bmark_zapobj);
zap_cursor_retrieve(&zc, &attr) == 0;
zap_cursor_advance(&zc)) {
char *bmark_name = attr.za_name;
zfs_bookmark_phys_t bmark_phys = { 0 };
err = dsl_dataset_bmark_lookup(ds, bmark_name, &bmark_phys);
ASSERT3U(err, !=, ENOENT);
if (err != 0)
break;
nvlist_t *out_props = fnvlist_alloc();
if (nvlist_exists(props,
zfs_prop_to_name(ZFS_PROP_GUID))) {
dsl_prop_nvlist_add_uint64(out_props,
ZFS_PROP_GUID, bmark_phys.zbm_guid);
}
if (nvlist_exists(props,
zfs_prop_to_name(ZFS_PROP_CREATETXG))) {
dsl_prop_nvlist_add_uint64(out_props,
ZFS_PROP_CREATETXG, bmark_phys.zbm_creation_txg);
}
if (nvlist_exists(props,
zfs_prop_to_name(ZFS_PROP_CREATION))) {
dsl_prop_nvlist_add_uint64(out_props,
ZFS_PROP_CREATION, bmark_phys.zbm_creation_time);
}
if (nvlist_exists(props,
zfs_prop_to_name(ZFS_PROP_IVSET_GUID))) {
dsl_prop_nvlist_add_uint64(out_props,
ZFS_PROP_IVSET_GUID, bmark_phys.zbm_ivset_guid);
}
fnvlist_add_nvlist(outnvl, bmark_name, out_props);
fnvlist_free(out_props);
}
zap_cursor_fini(&zc);
return (err);
}
/*
* Retrieve the bookmarks that exist in the specified dataset, and the
* requested properties of each bookmark.
*
* The "props" nvlist specifies which properties are requested.
* See lzc_get_bookmarks() for the list of valid properties.
*/
int
dsl_get_bookmarks(const char *dsname, nvlist_t *props, nvlist_t *outnvl)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
err = dsl_pool_hold(dsname, FTAG, &dp);
if (err != 0)
return (err);
err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
err = dsl_get_bookmarks_impl(ds, props, outnvl);
dsl_dataset_rele(ds, FTAG);
dsl_pool_rele(dp, FTAG);
return (err);
}
typedef struct dsl_bookmark_destroy_arg {
nvlist_t *dbda_bmarks;
nvlist_t *dbda_success;
nvlist_t *dbda_errors;
} dsl_bookmark_destroy_arg_t;
static int
dsl_dataset_bookmark_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
{
int err;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t bmark_zapobj = ds->ds_bookmarks;
matchtype_t mt = 0;
uint64_t int_size, num_ints;
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
mt = MT_NORMALIZE;
err = zap_length(mos, bmark_zapobj, name, &int_size, &num_ints);
if (err != 0)
return (err);
ASSERT3U(int_size, ==, sizeof (uint64_t));
if (num_ints * int_size > BOOKMARK_PHYS_SIZE_V1) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_BOOKMARK_V2, tx);
}
return (zap_remove_norm(mos, bmark_zapobj, name, mt, tx));
}
static int
dsl_bookmark_destroy_check(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_destroy_arg_t *dbda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
int rv = 0;
ASSERT(nvlist_empty(dbda->dbda_success));
ASSERT(nvlist_empty(dbda->dbda_errors));
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
return (0);
for (nvpair_t *pair = nvlist_next_nvpair(dbda->dbda_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_bmarks, pair)) {
const char *fullname = nvpair_name(pair);
dsl_dataset_t *ds;
zfs_bookmark_phys_t bm;
int error;
char *shortname;
error = dsl_bookmark_hold_ds(dp, fullname, &ds,
FTAG, &shortname);
if (error == ENOENT) {
/* ignore it; the bookmark is "already destroyed" */
continue;
}
if (error == 0) {
error = dsl_dataset_bmark_lookup(ds, shortname, &bm);
dsl_dataset_rele(ds, FTAG);
if (error == ESRCH) {
/*
* ignore it; the bookmark is
* "already destroyed"
*/
continue;
}
}
if (error == 0) {
if (dmu_tx_is_syncing(tx)) {
fnvlist_add_boolean(dbda->dbda_success,
fullname);
}
} else {
fnvlist_add_int32(dbda->dbda_errors, fullname, error);
rv = error;
}
}
return (rv);
}
static void
dsl_bookmark_destroy_sync(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_destroy_arg_t *dbda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
for (nvpair_t *pair = nvlist_next_nvpair(dbda->dbda_success, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_success, pair)) {
dsl_dataset_t *ds;
char *shortname;
uint64_t zap_cnt;
VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair),
&ds, FTAG, &shortname));
VERIFY0(dsl_dataset_bookmark_remove(ds, shortname, tx));
/*
* If all of this dataset's bookmarks have been destroyed,
* free the zap object and decrement the feature's use count.
*/
VERIFY0(zap_count(mos, ds->ds_bookmarks,
&zap_cnt));
if (zap_cnt == 0) {
dmu_buf_will_dirty(ds->ds_dbuf, tx);
VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
ds->ds_bookmarks = 0;
spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
VERIFY0(zap_remove(mos, ds->ds_object,
DS_FIELD_BOOKMARK_NAMES, tx));
}
spa_history_log_internal_ds(ds, "remove bookmark", tx,
"name=%s", shortname);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* The bookmarks must all be in the same pool.
*/
int
dsl_bookmark_destroy(nvlist_t *bmarks, nvlist_t *errors)
{
int rv;
dsl_bookmark_destroy_arg_t dbda;
nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL);
if (pair == NULL)
return (0);
dbda.dbda_bmarks = bmarks;
dbda.dbda_errors = errors;
dbda.dbda_success = fnvlist_alloc();
rv = dsl_sync_task(nvpair_name(pair), dsl_bookmark_destroy_check,
dsl_bookmark_destroy_sync, &dbda, fnvlist_num_pairs(bmarks),
ZFS_SPACE_CHECK_RESERVED);
fnvlist_free(dbda.dbda_success);
return (rv);
}