Native Encryption for ZFS on Linux

This change incorporates three major pieces:

The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.

The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.

The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494 
Closes #5769
This commit is contained in:
Tom Caputi 2017-08-14 13:36:48 -04:00 committed by Brian Behlendorf
parent 376994828f
commit b525630342
163 changed files with 16091 additions and 1204 deletions

View File

@ -64,6 +64,7 @@
#include <sys/zfeature.h> #include <sys/zfeature.h>
#include <sys/abd.h> #include <sys/abd.h>
#include <sys/blkptr.h> #include <sys/blkptr.h>
#include <sys/dsl_crypt.h>
#include <zfs_comutil.h> #include <zfs_comutil.h>
#include <libzfs.h> #include <libzfs.h>
@ -1631,14 +1632,14 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
uint64_t version = 0; uint64_t version = 0;
VERIFY3P(sa_os, ==, NULL); VERIFY3P(sa_os, ==, NULL);
err = dmu_objset_own(path, type, B_TRUE, tag, osp); err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp);
if (err != 0) { if (err != 0) {
(void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
strerror(err)); strerror(err));
return (err); return (err);
} }
if (dmu_objset_type(*osp) == DMU_OST_ZFS) { if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) {
(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
8, 1, &version); 8, 1, &version);
if (version >= ZPL_VERSION_SA) { if (version >= ZPL_VERSION_SA) {
@ -1650,7 +1651,7 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
if (err != 0) { if (err != 0) {
(void) fprintf(stderr, "sa_setup failed: %s\n", (void) fprintf(stderr, "sa_setup failed: %s\n",
strerror(err)); strerror(err));
dmu_objset_disown(*osp, tag); dmu_objset_disown(*osp, B_FALSE, tag);
*osp = NULL; *osp = NULL;
} }
} }
@ -1665,7 +1666,7 @@ close_objset(objset_t *os, void *tag)
VERIFY3P(os, ==, sa_os); VERIFY3P(os, ==, sa_os);
if (os->os_sa != NULL) if (os->os_sa != NULL)
sa_tear_down(os); sa_tear_down(os);
dmu_objset_disown(os, tag); dmu_objset_disown(os, B_FALSE, tag);
sa_attr_table = NULL; sa_attr_table = NULL;
sa_os = NULL; sa_os = NULL;
} }
@ -1938,6 +1939,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
dmu_buf_t *db = NULL; dmu_buf_t *db = NULL;
dmu_object_info_t doi; dmu_object_info_t doi;
dnode_t *dn; dnode_t *dn;
boolean_t dnode_held = B_FALSE;
void *bonus = NULL; void *bonus = NULL;
size_t bsize = 0; size_t bsize = 0;
char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
@ -1954,16 +1956,33 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (object == 0) { if (object == 0) {
dn = DMU_META_DNODE(os); dn = DMU_META_DNODE(os);
dmu_object_info_from_dnode(dn, &doi);
} else { } else {
error = dmu_bonus_hold(os, object, FTAG, &db); /*
* Encrypted datasets will have sensitive bonus buffers
* encrypted. Therefore we cannot hold the bonus buffer and
* must hold the dnode itself instead.
*/
error = dmu_object_info(os, object, &doi);
if (error) if (error)
fatal("dmu_bonus_hold(%llu) failed, errno %u", fatal("dmu_object_info() failed, errno %u", error);
object, error);
bonus = db->db_data; if (os->os_encrypted &&
bsize = db->db_size; DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) {
dn = DB_DNODE((dmu_buf_impl_t *)db); error = dnode_hold(os, object, FTAG, &dn);
if (error)
fatal("dnode_hold() failed, errno %u", error);
dnode_held = B_TRUE;
} else {
error = dmu_bonus_hold(os, object, FTAG, &db);
if (error)
fatal("dmu_bonus_hold(%llu) failed, errno %u",
object, error);
bonus = db->db_data;
bsize = db->db_size;
dn = DB_DNODE((dmu_buf_impl_t *)db);
}
} }
dmu_object_info_from_dnode(dn, &doi);
zdb_nicenum(doi.doi_metadata_block_size, iblk); zdb_nicenum(doi.doi_metadata_block_size, iblk);
zdb_nicenum(doi.doi_data_block_size, dblk); zdb_nicenum(doi.doi_data_block_size, dblk);
@ -2010,9 +2029,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
(void) printf("\tdnode maxblkid: %llu\n", (void) printf("\tdnode maxblkid: %llu\n",
(longlong_t)dn->dn_phys->dn_maxblkid); (longlong_t)dn->dn_phys->dn_maxblkid);
object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object, if (!dnode_held) {
bonus, bsize); object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os,
object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0); object, bonus, bsize);
} else {
(void) printf("\t\t(bonus encrypted)\n");
}
if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) {
object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object,
NULL, 0);
} else {
(void) printf("\t\t(object encrypted)\n");
}
*print_header = 1; *print_header = 1;
} }
@ -2054,6 +2084,8 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (db != NULL) if (db != NULL)
dmu_buf_rele(db, FTAG); dmu_buf_rele(db, FTAG);
if (dnode_held)
dnode_rele(dn, FTAG);
} }
static char *objset_types[DMU_OST_NUMTYPES] = { static char *objset_types[DMU_OST_NUMTYPES] = {
@ -2639,7 +2671,7 @@ dump_path(char *ds, char *path)
if (err != 0) { if (err != 0) {
(void) fprintf(stderr, "can't lookup root znode: %s\n", (void) fprintf(stderr, "can't lookup root znode: %s\n",
strerror(err)); strerror(err));
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_FALSE, FTAG);
return (EINVAL); return (EINVAL);
} }
@ -3289,7 +3321,8 @@ dump_block_stats(spa_t *spa)
zdb_cb_t zcb; zdb_cb_t zcb;
zdb_blkstats_t *zb, *tzb; zdb_blkstats_t *zb, *tzb;
uint64_t norm_alloc, norm_space, total_alloc, total_found; uint64_t norm_alloc, norm_space, total_alloc, total_found;
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
TRAVERSE_NO_DECRYPT | TRAVERSE_HARD;
boolean_t leaks = B_FALSE; boolean_t leaks = B_FALSE;
int e, c; int e, c;
bp_embedded_type_t i; bp_embedded_type_t i;
@ -3594,8 +3627,8 @@ dump_simulated_ddt(spa_t *spa)
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
zdb_ddt_add_cb, &t); TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t);
spa_config_exit(spa, SCL_CONFIG, FTAG); spa_config_exit(spa, SCL_CONFIG, FTAG);

View File

@ -311,8 +311,13 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
(u_longlong_t)lr->lrc_txg, (u_longlong_t)lr->lrc_txg,
(u_longlong_t)lr->lrc_seq); (u_longlong_t)lr->lrc_seq);
if (txtype && verbose >= 3) if (txtype && verbose >= 3) {
zil_rec_info[txtype].zri_print(zilog, txtype, lr); if (!zilog->zl_os->os_encrypted) {
zil_rec_info[txtype].zri_print(zilog, txtype, lr);
} else {
(void) printf("%s(encrypted)\n", prefix);
}
}
zil_rec_info[txtype].zri_count++; zil_rec_info[txtype].zri_count++;
zil_rec_info[0].zri_count++; zil_rec_info[0].zri_count++;
@ -399,7 +404,7 @@ dump_intent_log(zilog_t *zilog)
if (verbose >= 2) { if (verbose >= 2) {
(void) printf("\n"); (void) printf("\n");
(void) zil_parse(zilog, print_log_block, print_log_record, NULL, (void) zil_parse(zilog, print_log_block, print_log_record, NULL,
zh->zh_claim_txg); zh->zh_claim_txg, B_FALSE);
print_log_stats(verbose); print_log_stats(verbose);
} }
} }

View File

@ -106,6 +106,9 @@ static int zfs_do_holds(int argc, char **argv);
static int zfs_do_release(int argc, char **argv); static int zfs_do_release(int argc, char **argv);
static int zfs_do_diff(int argc, char **argv); static int zfs_do_diff(int argc, char **argv);
static int zfs_do_bookmark(int argc, char **argv); static int zfs_do_bookmark(int argc, char **argv);
static int zfs_do_load_key(int argc, char **argv);
static int zfs_do_unload_key(int argc, char **argv);
static int zfs_do_change_key(int argc, char **argv);
/* /*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds. * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
@ -153,6 +156,9 @@ typedef enum {
HELP_RELEASE, HELP_RELEASE,
HELP_DIFF, HELP_DIFF,
HELP_BOOKMARK, HELP_BOOKMARK,
HELP_LOAD_KEY,
HELP_UNLOAD_KEY,
HELP_CHANGE_KEY,
} zfs_help_t; } zfs_help_t;
typedef struct zfs_command { typedef struct zfs_command {
@ -206,6 +212,9 @@ static zfs_command_t command_table[] = {
{ "holds", zfs_do_holds, HELP_HOLDS }, { "holds", zfs_do_holds, HELP_HOLDS },
{ "release", zfs_do_release, HELP_RELEASE }, { "release", zfs_do_release, HELP_RELEASE },
{ "diff", zfs_do_diff, HELP_DIFF }, { "diff", zfs_do_diff, HELP_DIFF },
{ "load-key", zfs_do_load_key, HELP_LOAD_KEY },
{ "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
{ "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
}; };
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
@ -247,7 +256,7 @@ get_usage(zfs_help_t idx)
"[filesystem|volume|snapshot] ...\n")); "[filesystem|volume|snapshot] ...\n"));
case HELP_MOUNT: case HELP_MOUNT:
return (gettext("\tmount\n" return (gettext("\tmount\n"
"\tmount [-vO] [-o opts] <-a | filesystem>\n")); "\tmount [-lvO] [-o opts] <-a | filesystem>\n"));
case HELP_PROMOTE: case HELP_PROMOTE:
return (gettext("\tpromote <clone-filesystem>\n")); return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE: case HELP_RECEIVE:
@ -266,16 +275,16 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK: case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n")); return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND: case HELP_SEND:
return (gettext("\tsend [-DnPpRvLec] [-[i|I] snapshot] " return (gettext("\tsend [-DnPpRvLecr] [-[i|I] snapshot] "
"<snapshot>\n" "<snapshot>\n"
"\tsend [-Lec] [-i snapshot|bookmark] " "\tsend [-Lecr] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n" "<filesystem|volume|snapshot>\n"
"\tsend [-nvPe] -t <receive_resume_token>\n")); "\tsend [-nvPe] -t <receive_resume_token>\n"));
case HELP_SET: case HELP_SET:
return (gettext("\tset <property=value> ... " return (gettext("\tset <property=value> ... "
"<filesystem|volume|snapshot> ...\n")); "<filesystem|volume|snapshot> ...\n"));
case HELP_SHARE: case HELP_SHARE:
return (gettext("\tshare <-a [nfs|smb] | filesystem>\n")); return (gettext("\tshare [-l] <-a [nfs|smb] | filesystem>\n"));
case HELP_SNAPSHOT: case HELP_SNAPSHOT:
return (gettext("\tsnapshot|snap [-r] [-o property=value] ... " return (gettext("\tsnapshot|snap [-r] [-o property=value] ... "
"<filesystem|volume>@<snap> ...\n")); "<filesystem|volume>@<snap> ...\n"));
@ -326,6 +335,17 @@ get_usage(zfs_help_t idx)
"[snapshot|filesystem]\n")); "[snapshot|filesystem]\n"));
case HELP_BOOKMARK: case HELP_BOOKMARK:
return (gettext("\tbookmark <snapshot> <bookmark>\n")); return (gettext("\tbookmark <snapshot> <bookmark>\n"));
case HELP_LOAD_KEY:
return (gettext("\tload-key [-rn] [-L <keylocation>] "
"<-a | filesystem|volume>\n"));
case HELP_UNLOAD_KEY:
return (gettext("\tunload-key [-r] "
"<-a | filesystem|volume>\n"));
case HELP_CHANGE_KEY:
return (gettext("\tchange-key [-l] [-o keyformat=<value>]"
"\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]"
"\t <filesystem|volume>\n"
"\tchange-key -i [-l] <filesystem|volume>\n"));
} }
abort(); abort();
@ -901,7 +921,7 @@ zfs_do_create(int argc, char **argv)
(void) snprintf(msg, sizeof (msg), (void) snprintf(msg, sizeof (msg),
gettext("cannot create '%s'"), argv[0]); gettext("cannot create '%s'"), argv[0]);
if (props && (real_props = zfs_valid_proplist(g_zfs, type, if (props && (real_props = zfs_valid_proplist(g_zfs, type,
props, 0, NULL, zpool_handle, msg)) == NULL) { props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) {
zpool_close(zpool_handle); zpool_close(zpool_handle);
goto error; goto error;
} }
@ -3830,11 +3850,12 @@ zfs_do_send(int argc, char **argv)
{"embed", no_argument, NULL, 'e'}, {"embed", no_argument, NULL, 'e'},
{"resume", required_argument, NULL, 't'}, {"resume", required_argument, NULL, 't'},
{"compressed", no_argument, NULL, 'c'}, {"compressed", no_argument, NULL, 'c'},
{"raw", no_argument, NULL, 'w'},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
/* check options */ /* check options */
while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLet:c", long_options, while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLet:cw", long_options,
NULL)) != -1) { NULL)) != -1) {
switch (c) { switch (c) {
case 'i': case 'i':
@ -3882,6 +3903,12 @@ zfs_do_send(int argc, char **argv)
case 'c': case 'c':
flags.compress = B_TRUE; flags.compress = B_TRUE;
break; break;
case 'w':
flags.raw = B_TRUE;
flags.compress = B_TRUE;
flags.embed_data = B_TRUE;
flags.largeblock = B_TRUE;
break;
case ':': case ':':
/* /*
* If a parameter was not passed, optopt contains the * If a parameter was not passed, optopt contains the
@ -3989,6 +4016,8 @@ zfs_do_send(int argc, char **argv)
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (flags.compress) if (flags.compress)
lzc_flags |= LZC_SEND_FLAG_COMPRESS; lzc_flags |= LZC_SEND_FLAG_COMPRESS;
if (flags.raw)
lzc_flags |= LZC_SEND_FLAG_RAW;
if (fromname != NULL && if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) { (fromname[0] == '#' || fromname[0] == '@')) {
@ -4236,6 +4265,8 @@ zfs_do_receive(int argc, char **argv)
#define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark" #define ZFS_DELEG_PERM_BOOKMARK "bookmark"
#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"
#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE #define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
@ -4256,6 +4287,8 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
{ ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY },
{ ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY },
{ ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
{ ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
@ -4831,6 +4864,12 @@ deleg_perm_comment(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_SNAPSHOT: case ZFS_DELEG_NOTE_SNAPSHOT:
str = gettext(""); str = gettext("");
break; break;
case ZFS_DELEG_NOTE_LOAD_KEY:
str = gettext("Allows loading or unloading an encryption key");
break;
case ZFS_DELEG_NOTE_CHANGE_KEY:
str = gettext("Allows changing or adding an encryption key");
break;
/* /*
* case ZFS_DELEG_NOTE_VSCAN: * case ZFS_DELEG_NOTE_VSCAN:
* str = gettext(""); * str = gettext("");
@ -6107,7 +6146,7 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
} }
if (!zfs_is_mounted(zhp, NULL) && if (!zfs_is_mounted(zhp, NULL) &&
zfs_mount(zhp, NULL, 0) != 0) zfs_mount(zhp, NULL, flags) != 0)
return (1); return (1);
if (protocol == NULL) { if (protocol == NULL) {
@ -6214,7 +6253,7 @@ share_mount(int op, int argc, char **argv)
int flags = 0; int flags = 0;
/* check options */ /* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al"))
!= -1) { != -1) {
switch (c) { switch (c) {
case 'a': case 'a':
@ -6223,6 +6262,9 @@ share_mount(int op, int argc, char **argv)
case 'v': case 'v':
verbose = B_TRUE; verbose = B_TRUE;
break; break;
case 'l':
flags |= MS_CRYPT;
break;
case 'o': case 'o':
if (*optarg == '\0') { if (*optarg == '\0') {
(void) fprintf(stderr, gettext("empty mount " (void) fprintf(stderr, gettext("empty mount "
@ -7036,6 +7078,230 @@ usage:
return (-1); return (-1);
} }
typedef struct loadkey_cbdata {
boolean_t cb_loadkey;
boolean_t cb_recursive;
boolean_t cb_noop;
char *cb_keylocation;
uint64_t cb_numfailed;
uint64_t cb_numattempted;
} loadkey_cbdata_t;
static int
load_key_callback(zfs_handle_t *zhp, void *data)
{
int ret;
boolean_t is_encroot;
loadkey_cbdata_t *cb = data;
uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
/*
* If we are working recursively, we want to skip loading / unloading
* keys for non-encryption roots and datasets whose keys are already
* in the desired end-state.
*/
if (cb->cb_recursive) {
ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
if (ret != 0)
return (ret);
if (!is_encroot)
return (0);
if ((cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_AVAILABLE) ||
(!cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_UNAVAILABLE))
return (0);
}
cb->cb_numattempted++;
if (cb->cb_loadkey)
ret = zfs_crypto_load_key(zhp, cb->cb_noop, cb->cb_keylocation);
else
ret = zfs_crypto_unload_key(zhp);
if (ret != 0) {
cb->cb_numfailed++;
return (ret);
}
return (0);
}
static int
load_unload_keys(int argc, char **argv, boolean_t loadkey)
{
int c, ret = 0, flags = 0;
boolean_t do_all = B_FALSE;
loadkey_cbdata_t cb = { 0 };
cb.cb_loadkey = loadkey;
while ((c = getopt(argc, argv, "anrL:")) != -1) {
/* noop and alternate keylocations only apply to zfs load-key */
if (loadkey) {
switch (c) {
case 'n':
cb.cb_noop = B_TRUE;
continue;
case 'L':
cb.cb_keylocation = optarg;
continue;
default:
break;
}
}
switch (c) {
case 'a':
do_all = B_TRUE;
cb.cb_recursive = B_TRUE;
break;
case 'r':
flags |= ZFS_ITER_RECURSE;
cb.cb_recursive = B_TRUE;
break;
default:
(void) fprintf(stderr,
gettext("invalid option '%c'\n"), optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (!do_all && argc == 0) {
(void) fprintf(stderr,
gettext("Missing dataset argument or -a option\n"));
usage(B_FALSE);
}
if (do_all && argc != 0) {
(void) fprintf(stderr,
gettext("Cannot specify dataset with -a option\n"));
usage(B_FALSE);
}
if (cb.cb_recursive && cb.cb_keylocation != NULL &&
strcmp(cb.cb_keylocation, "prompt") != 0) {
(void) fprintf(stderr, gettext("alternate keylocation may only "
"be 'prompt' with -r or -a\n"));
usage(B_FALSE);
}
ret = zfs_for_each(argc, argv, flags,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL, 0,
load_key_callback, &cb);
if (cb.cb_noop || (cb.cb_recursive && cb.cb_numattempted != 0)) {
(void) printf(gettext("%llu / %llu key(s) successfully %s\n"),
(u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed),
(u_longlong_t)cb.cb_numattempted,
loadkey ? (cb.cb_noop ? "verified" : "loaded") :
"unloaded");
}
if (cb.cb_numfailed != 0)
ret = -1;
return (ret);
}
static int
zfs_do_load_key(int argc, char **argv)
{
return (load_unload_keys(argc, argv, B_TRUE));
}
static int
zfs_do_unload_key(int argc, char **argv)
{
return (load_unload_keys(argc, argv, B_FALSE));
}
static int
zfs_do_change_key(int argc, char **argv)
{
int c, ret;
uint64_t keystatus;
boolean_t loadkey = B_FALSE, inheritkey = B_FALSE;
zfs_handle_t *zhp = NULL;
nvlist_t *props = fnvlist_alloc();
while ((c = getopt(argc, argv, "lio:")) != -1) {
switch (c) {
case 'l':
loadkey = B_TRUE;
break;
case 'i':
inheritkey = B_TRUE;
break;
case 'o':
if (!parseprop(props, optarg)) {
nvlist_free(props);
return (1);
}
break;
default:
(void) fprintf(stderr,
gettext("invalid option '%c'\n"), optopt);
usage(B_FALSE);
}
}
if (inheritkey && !nvlist_empty(props)) {
(void) fprintf(stderr,
gettext("Properties not allowed for inheriting\n"));
usage(B_FALSE);
}
argc -= optind;
argv += optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("Missing dataset argument\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("Too many arguments\n"));
usage(B_FALSE);
}
zhp = zfs_open(g_zfs, argv[argc - 1],
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
if (zhp == NULL)
usage(B_FALSE);
if (loadkey) {
keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
if (keystatus != ZFS_KEYSTATUS_AVAILABLE) {
ret = zfs_crypto_load_key(zhp, B_FALSE, NULL);
if (ret != 0)
goto error;
}
/* refresh the properties so the new keystatus is visable */
zfs_refresh_properties(zhp);
}
ret = zfs_crypto_rewrap(zhp, props, inheritkey);
if (ret != 0)
goto error;
nvlist_free(props);
zfs_close(zhp);
return (0);
error:
if (props != NULL)
nvlist_free(props);
if (zhp != NULL)
zfs_close(zhp);
return (-1);
}
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {

View File

@ -179,7 +179,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
*/ */
sync(); sync();
err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os);
if (err != 0) { if (err != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n", (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err)); dataset, strerror(err));
@ -189,7 +189,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
record->zi_objset = dmu_objset_id(os); record->zi_objset = dmu_objset_id(os);
record->zi_object = statbuf->st_ino; record->zi_object = statbuf->st_ino;
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_FALSE, FTAG);
return (0); return (0);
} }
@ -267,7 +267,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
* size. * size.
*/ */
if ((err = dmu_objset_own(dataset, DMU_OST_ANY, if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
B_TRUE, FTAG, &os)) != 0) { B_TRUE, B_FALSE, FTAG, &os)) != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n", (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err)); dataset, strerror(err));
goto out; goto out;
@ -329,7 +329,7 @@ out:
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
} }
if (os) if (os)
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_FALSE, FTAG);
return (ret); return (ret);
} }

View File

@ -55,7 +55,7 @@
#include <sys/fm/util.h> #include <sys/fm/util.h>
#include <sys/fm/protocol.h> #include <sys/fm/protocol.h>
#include <sys/zfs_ioctl.h> #include <sys/zfs_ioctl.h>
#include <sys/mount.h>
#include <math.h> #include <math.h>
#include <libzfs.h> #include <libzfs.h>
@ -313,12 +313,13 @@ get_usage(zpool_help_t idx)
return (gettext("\thistory [-il] [<pool>] ...\n")); return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT: case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n" return (gettext("\timport [-d dir] [-D]\n"
"\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n" "\timport [-d dir | -c cachefile] [-F [-n]] [-l] "
"<pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n" "\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]] -a\n" "[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n" "\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]]\n" "[-R root] [-F [-n]]\n"
"\t <pool | id> [newpool]\n")); "\t <pool | id> [newpool]\n"));
case HELP_IOSTAT: case HELP_IOSTAT:
@ -359,7 +360,7 @@ get_usage(zpool_help_t idx)
case HELP_SET: case HELP_SET:
return (gettext("\tset <property=value> <pool> \n")); return (gettext("\tset <property=value> <pool> \n"));
case HELP_SPLIT: case HELP_SPLIT:
return (gettext("\tsplit [-gLnP] [-R altroot] [-o mntopts]\n" return (gettext("\tsplit [-gLnPl] [-R altroot] [-o mntopts]\n"
"\t [-o property=value] <pool> <newpool> " "\t [-o property=value] <pool> <newpool> "
"[<device> ...]\n")); "[<device> ...]\n"));
case HELP_REGUID: case HELP_REGUID:
@ -2261,6 +2262,7 @@ static int
do_import(nvlist_t *config, const char *newname, const char *mntopts, do_import(nvlist_t *config, const char *newname, const char *mntopts,
nvlist_t *props, int flags) nvlist_t *props, int flags)
{ {
int ret = 0;
zpool_handle_t *zhp; zpool_handle_t *zhp;
char *name; char *name;
uint64_t state; uint64_t state;
@ -2343,6 +2345,16 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
return (1); return (1);
/*
* Loading keys is best effort. We don't want to return immediately
* if it fails but we do want to give the error to the caller.
*/
if (flags & ZFS_IMPORT_LOAD_KEYS) {
ret = zfs_crypto_attempt_load_keys(g_zfs, name);
if (ret != 0)
ret = 1;
}
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
!(flags & ZFS_IMPORT_ONLY) && !(flags & ZFS_IMPORT_ONLY) &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_enable_datasets(zhp, mntopts, 0) != 0) {
@ -2351,14 +2363,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
} }
zpool_close(zhp); zpool_close(zhp);
return (0); return (ret);
} }
/* /*
* zpool import [-d dir] [-D] * zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] -a * [-d dir | -c cachefile] [-f] -a
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool] * [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
* *
* -c Read pool information from a cachefile instead of searching * -c Read pool information from a cachefile instead of searching
@ -2393,6 +2405,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* *
* -a Import all pools found. * -a Import all pools found.
* *
* -l Load encryption keys while importing.
*
* -o Set property=value and/or temporary mount options (without '='). * -o Set property=value and/or temporary mount options (without '=').
* *
* -s Scan using the default search path, the libblkid cache will * -s Scan using the default search path, the libblkid cache will
@ -2434,7 +2448,7 @@ zpool_do_import(int argc, char **argv)
char *endptr; char *endptr;
/* check options */ /* check options */
while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:R:stT:VX")) != -1) { while ((c = getopt(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX")) != -1) {
switch (c) { switch (c) {
case 'a': case 'a':
do_all = B_TRUE; do_all = B_TRUE;
@ -2464,6 +2478,9 @@ zpool_do_import(int argc, char **argv)
case 'F': case 'F':
do_rewind = B_TRUE; do_rewind = B_TRUE;
break; break;
case 'l':
flags |= ZFS_IMPORT_LOAD_KEYS;
break;
case 'm': case 'm':
flags |= ZFS_IMPORT_MISSING_LOG; flags |= ZFS_IMPORT_MISSING_LOG;
break; break;
@ -2538,6 +2555,17 @@ zpool_do_import(int argc, char **argv)
usage(B_FALSE); usage(B_FALSE);
} }
if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) {
(void) fprintf(stderr, gettext("-l is incompatible with -N\n"));
usage(B_FALSE);
}
if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) {
(void) fprintf(stderr, gettext("-l is only meaningful during "
"an import\n"));
usage(B_FALSE);
}
if ((dryrun || xtreme_rewind) && !do_rewind) { if ((dryrun || xtreme_rewind) && !do_rewind) {
(void) fprintf(stderr, (void) fprintf(stderr,
gettext("-n or -X only meaningful with -F\n")); gettext("-n or -X only meaningful with -F\n"));
@ -5370,6 +5398,7 @@ zpool_do_detach(int argc, char **argv)
* -o Set property=value, or set mount options. * -o Set property=value, or set mount options.
* -P Display full path for vdev name. * -P Display full path for vdev name.
* -R Mount the split-off pool under an alternate root. * -R Mount the split-off pool under an alternate root.
* -l Load encryption keys while importing.
* *
* Splits the named pool and gives it the new pool name. Devices to be split * Splits the named pool and gives it the new pool name. Devices to be split
* off may be listed, provided that no more than one device is specified * off may be listed, provided that no more than one device is specified
@ -5387,6 +5416,7 @@ zpool_do_split(int argc, char **argv)
char *mntopts = NULL; char *mntopts = NULL;
splitflags_t flags; splitflags_t flags;
int c, ret = 0; int c, ret = 0;
boolean_t loadkeys = B_FALSE;
zpool_handle_t *zhp; zpool_handle_t *zhp;
nvlist_t *config, *props = NULL; nvlist_t *config, *props = NULL;
@ -5395,7 +5425,7 @@ zpool_do_split(int argc, char **argv)
flags.name_flags = 0; flags.name_flags = 0;
/* check options */ /* check options */
while ((c = getopt(argc, argv, ":gLR:no:P")) != -1) { while ((c = getopt(argc, argv, ":gLR:lno:P")) != -1) {
switch (c) { switch (c) {
case 'g': case 'g':
flags.name_flags |= VDEV_NAME_GUID; flags.name_flags |= VDEV_NAME_GUID;
@ -5412,6 +5442,9 @@ zpool_do_split(int argc, char **argv)
usage(B_FALSE); usage(B_FALSE);
} }
break; break;
case 'l':
loadkeys = B_TRUE;
break;
case 'n': case 'n':
flags.dryrun = B_TRUE; flags.dryrun = B_TRUE;
break; break;
@ -5450,6 +5483,12 @@ zpool_do_split(int argc, char **argv)
usage(B_FALSE); usage(B_FALSE);
} }
if (!flags.import && loadkeys) {
(void) fprintf(stderr, gettext("loading keys is only "
"valid when importing the pool\n"));
usage(B_FALSE);
}
argc -= optind; argc -= optind;
argv += optind; argv += optind;
@ -5502,6 +5541,13 @@ zpool_do_split(int argc, char **argv)
nvlist_free(props); nvlist_free(props);
return (1); return (1);
} }
if (loadkeys) {
ret = zfs_crypto_attempt_load_keys(g_zfs, newpool);
if (ret != 0)
ret = 1;
}
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_enable_datasets(zhp, mntopts, 0) != 0) {
ret = 1; ret = 1;

View File

@ -197,12 +197,33 @@ print_block(char *buf, int length)
} }
} }
/*
* Print an array of bytes to stdout as hexidecimal characters. str must
* have buf_len * 2 + 1 bytes of space.
*/
static void
sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
{
int i, n;
for (i = 0; i < buf_len; i++) {
n = sprintf(str, "%02x", buf[i] & 0xff);
str += n;
}
str[0] = '\0';
}
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
char *buf = safe_malloc(SPA_MAXBLOCKSIZE); char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
char salt[ZIO_DATA_SALT_LEN * 2 + 1];
char iv[ZIO_DATA_IV_LEN * 2 + 1];
char mac[ZIO_DATA_MAC_LEN * 2 + 1];
uint64_t total_records = 0; uint64_t total_records = 0;
uint64_t payload_size;
dmu_replay_record_t thedrr; dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr; dmu_replay_record_t *drr = &thedrr;
struct drr_begin *drrb = &thedrr.drr_u.drr_begin; struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
@ -214,6 +235,7 @@ main(int argc, char *argv[])
struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
char c; char c;
boolean_t verbose = B_FALSE; boolean_t verbose = B_FALSE;
@ -418,26 +440,35 @@ main(int argc, char *argv[])
drro->drr_blksz = BSWAP_32(drro->drr_blksz); drro->drr_blksz = BSWAP_32(drro->drr_blksz);
drro->drr_bonuslen = drro->drr_bonuslen =
BSWAP_32(drro->drr_bonuslen); BSWAP_32(drro->drr_bonuslen);
drro->drr_raw_bonuslen =
BSWAP_32(drro->drr_raw_bonuslen);
drro->drr_toguid = BSWAP_64(drro->drr_toguid); drro->drr_toguid = BSWAP_64(drro->drr_toguid);
} }
payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
if (verbose) { if (verbose) {
(void) printf("OBJECT object = %llu type = %u " (void) printf("OBJECT object = %llu type = %u "
"bonustype = %u blksz = %u bonuslen = %u " "bonustype = %u blksz = %u bonuslen = %u "
"dn_slots = %u\n", "dn_slots = %u raw_bonuslen = %u "
"flags = %u indblkshift = %u nlevels = %u "
"nblkptr = %u\n",
(u_longlong_t)drro->drr_object, (u_longlong_t)drro->drr_object,
drro->drr_type, drro->drr_type,
drro->drr_bonustype, drro->drr_bonustype,
drro->drr_blksz, drro->drr_blksz,
drro->drr_bonuslen, drro->drr_bonuslen,
drro->drr_dn_slots); drro->drr_dn_slots,
drro->drr_raw_bonuslen,
drro->drr_flags,
drro->drr_indblkshift,
drro->drr_nlevels,
drro->drr_nblkptr);
} }
if (drro->drr_bonuslen > 0) { if (drro->drr_bonuslen > 0) {
(void) ssread(buf, (void) ssread(buf, payload_size, &zc);
P2ROUNDUP(drro->drr_bonuslen, 8), &zc); if (dump)
if (dump) { print_block(buf, payload_size);
print_block(buf,
P2ROUNDUP(drro->drr_bonuslen, 8));
}
} }
break; break;
@ -471,28 +502,40 @@ main(int argc, char *argv[])
BSWAP_64(drrw->drr_compressed_size); BSWAP_64(drrw->drr_compressed_size);
} }
uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
/* /*
* If this is verbose and/or dump output, * If this is verbose and/or dump output,
* print info on the modified block * print info on the modified block
*/ */
if (verbose) { if (verbose) {
sprintf_bytes(salt, drrw->drr_salt,
ZIO_DATA_SALT_LEN);
sprintf_bytes(iv, drrw->drr_iv,
ZIO_DATA_IV_LEN);
sprintf_bytes(mac, drrw->drr_mac,
ZIO_DATA_MAC_LEN);
(void) printf("WRITE object = %llu type = %u " (void) printf("WRITE object = %llu type = %u "
"checksum type = %u compression type = %u\n" "checksum type = %u compression type = %u\n"
" offset = %llu logical_size = %llu " " flags = %u offset = %llu "
"logical_size = %llu "
"compressed_size = %llu " "compressed_size = %llu "
"payload_size = %llu " "payload_size = %llu props = %llx "
"props = %llx\n", "salt = %s iv = %s mac = %s\n",
(u_longlong_t)drrw->drr_object, (u_longlong_t)drrw->drr_object,
drrw->drr_type, drrw->drr_type,
drrw->drr_checksumtype, drrw->drr_checksumtype,
drrw->drr_compressiontype, drrw->drr_compressiontype,
drrw->drr_flags,
(u_longlong_t)drrw->drr_offset, (u_longlong_t)drrw->drr_offset,
(u_longlong_t)drrw->drr_logical_size, (u_longlong_t)drrw->drr_logical_size,
(u_longlong_t)drrw->drr_compressed_size, (u_longlong_t)drrw->drr_compressed_size,
(u_longlong_t)payload_size, (u_longlong_t)payload_size,
(u_longlong_t)drrw->drr_key.ddk_prop); (u_longlong_t)drrw->drr_key.ddk_prop,
salt,
iv,
mac);
} }
/* /*
@ -563,12 +606,31 @@ main(int argc, char *argv[])
if (do_byteswap) { if (do_byteswap) {
drrs->drr_object = BSWAP_64(drrs->drr_object); drrs->drr_object = BSWAP_64(drrs->drr_object);
drrs->drr_length = BSWAP_64(drrs->drr_length); drrs->drr_length = BSWAP_64(drrs->drr_length);
drrs->drr_compressed_size =
BSWAP_64(drrs->drr_compressed_size);
drrs->drr_type = BSWAP_32(drrs->drr_type);
} }
if (verbose) { if (verbose) {
sprintf_bytes(salt, drrs->drr_salt,
ZIO_DATA_SALT_LEN);
sprintf_bytes(iv, drrs->drr_iv,
ZIO_DATA_IV_LEN);
sprintf_bytes(mac, drrs->drr_mac,
ZIO_DATA_MAC_LEN);
(void) printf("SPILL block for object = %llu " (void) printf("SPILL block for object = %llu "
"length = %llu\n", "length = %llu flags = %u "
(long long unsigned int)drrs->drr_object, "compression type = %u "
(long long unsigned int)drrs->drr_length); "compressed_size = %llu "
"salt = %s iv = %s mac = %s\n",
(u_longlong_t)drrs->drr_object,
(u_longlong_t)drrs->drr_length,
drrs->drr_flags,
drrs->drr_compressiontype,
(u_longlong_t)drrs->drr_compressed_size,
salt,
iv,
mac);
} }
(void) ssread(buf, drrs->drr_length, &zc); (void) ssread(buf, drrs->drr_length, &zc);
if (dump) { if (dump) {
@ -607,6 +669,33 @@ main(int argc, char *argv[])
(void) ssread(buf, (void) ssread(buf,
P2ROUNDUP(drrwe->drr_psize, 8), &zc); P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break; break;
case DRR_OBJECT_RANGE:
if (do_byteswap) {
drror->drr_firstobj =
BSWAP_64(drror->drr_firstobj);
drror->drr_numslots =
BSWAP_64(drror->drr_numslots);
drror->drr_toguid = BSWAP_64(drror->drr_toguid);
}
if (verbose) {
sprintf_bytes(salt, drror->drr_salt,
ZIO_DATA_SALT_LEN);
sprintf_bytes(iv, drror->drr_iv,
ZIO_DATA_IV_LEN);
sprintf_bytes(mac, drror->drr_mac,
ZIO_DATA_MAC_LEN);
(void) printf("OBJECT_RANGE firstobj = %llu "
"numslots = %llu flags = %u "
"salt = %s iv = %s mac = %s\n",
(u_longlong_t)drror->drr_firstobj,
(u_longlong_t)drror->drr_numslots,
drror->drr_flags,
salt,
iv,
mac);
}
break;
case DRR_NUMTYPES: case DRR_NUMTYPES:
/* should never be reached */ /* should never be reached */
exit(1); exit(1);

View File

@ -2636,7 +2636,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/ */
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(ENOENT, ==, VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_file", nvroot, NULL, NULL)); spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot); nvlist_free(nvroot);
/* /*
@ -2644,7 +2644,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/ */
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
VERIFY3U(ENOENT, ==, VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot); nvlist_free(nvroot);
/* /*
@ -2653,7 +2653,8 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/ */
(void) rw_rdlock(&ztest_name_lock); (void) rw_rdlock(&ztest_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); VERIFY3U(EEXIST, ==,
spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot); nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
@ -2755,7 +2756,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
props = fnvlist_alloc(); props = fnvlist_alloc();
fnvlist_add_uint64(props, fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), version); zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
VERIFY3S(spa_create(name, nvroot, props, NULL), ==, 0); VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0);
fnvlist_free(nvroot); fnvlist_free(nvroot);
fnvlist_free(props); fnvlist_free(props);
@ -3530,7 +3531,7 @@ static int
ztest_dataset_create(char *dsname) ztest_dataset_create(char *dsname)
{ {
uint64_t zilset = ztest_random(100); uint64_t zilset = ztest_random(100);
int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, NULL,
ztest_objset_create_cb, NULL); ztest_objset_create_cb, NULL);
if (err || zilset < 80) if (err || zilset < 80)
@ -3553,7 +3554,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
/* /*
* Verify that the dataset contains a directory object. * Verify that the dataset contains a directory object.
*/ */
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE, FTAG, &os));
error = dmu_object_info(os, ZTEST_DIROBJ, &doi); error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) { if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */ /* We could have crashed in the middle of destroying it */
@ -3561,7 +3562,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
ASSERT3S(doi.doi_physical_blocks_512, >=, 0); ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
} }
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
/* /*
* Destroy the dataset. * Destroy the dataset.
@ -3637,11 +3638,12 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* (invoked from ztest_objset_destroy_cb()) should just throw it away. * (invoked from ztest_objset_destroy_cb()) should just throw it away.
*/ */
if (ztest_random(2) == 0 && if (ztest_random(2) == 0 &&
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
B_TRUE, FTAG, &os) == 0) {
ztest_zd_init(zdtmp, NULL, os); ztest_zd_init(zdtmp, NULL, os);
zil_replay(os, zdtmp, ztest_replay_vector); zil_replay(os, zdtmp, ztest_replay_vector);
ztest_zd_fini(zdtmp); ztest_zd_fini(zdtmp);
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
} }
/* /*
@ -3655,7 +3657,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/* /*
* Verify that the destroyed dataset is no longer in the namespace. * Verify that the destroyed dataset is no longer in the namespace.
*/ */
VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE,
FTAG, &os)); FTAG, &os));
/* /*
@ -3670,7 +3672,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", name, error); fatal(0, "dmu_objset_create(%s) = %d", name, error);
} }
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE,
FTAG, &os));
ztest_zd_init(zdtmp, NULL, os); ztest_zd_init(zdtmp, NULL, os);
@ -3694,7 +3697,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* Verify that we cannot create an existing dataset. * Verify that we cannot create an existing dataset.
*/ */
VERIFY3U(EEXIST, ==, VERIFY3U(EEXIST, ==,
dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL));
/* /*
* Verify that we can hold an objset that is also owned. * Verify that we can hold an objset that is also owned.
@ -3706,10 +3709,10 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* Verify that we cannot own an objset that is already owned. * Verify that we cannot own an objset that is already owned.
*/ */
VERIFY3U(EBUSY, ==, VERIFY3U(EBUSY, ==,
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, FTAG, &os2));
zil_close(zilog); zil_close(zilog);
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
ztest_zd_fini(zdtmp); ztest_zd_fini(zdtmp);
out: out:
(void) rw_unlock(&ztest_name_lock); (void) rw_unlock(&ztest_name_lock);
@ -3863,19 +3866,20 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
} }
error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE,
FTAG, &os);
if (error) if (error)
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL); error = dsl_dataset_promote(clone2name, NULL);
if (error == ENOSPC) { if (error == ENOSPC) {
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
ztest_record_enospc(FTAG); ztest_record_enospc(FTAG);
goto out; goto out;
} }
if (error != EBUSY) if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error); error);
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
out: out:
ztest_dsl_dataset_cleanup(osname, id); ztest_dsl_dataset_cleanup(osname, id);
@ -6253,7 +6257,7 @@ ztest_dataset_open(int d)
} }
ASSERT(error == 0 || error == EEXIST); ASSERT(error == 0 || error == EEXIST);
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, zd, &os));
(void) rw_unlock(&ztest_name_lock); (void) rw_unlock(&ztest_name_lock);
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@ -6294,7 +6298,7 @@ ztest_dataset_close(int d)
ztest_ds_t *zd = &ztest_ds[d]; ztest_ds_t *zd = &ztest_ds[d];
zil_close(zd->zd_zilog); zil_close(zd->zd_zilog);
dmu_objset_disown(zd->zd_os, zd); dmu_objset_disown(zd->zd_os, B_TRUE, zd);
ztest_zd_fini(zd); ztest_zd_fini(zd);
} }
@ -6347,12 +6351,12 @@ ztest_run(ztest_shared_t *zs)
dmu_objset_stats_t dds; dmu_objset_stats_t dds;
VERIFY0(dmu_objset_own(ztest_opts.zo_pool, VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
DMU_OST_ANY, B_TRUE, FTAG, &os)); DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds); dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG); dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
zs->zs_guid = dds.dds_guid; zs->zs_guid = dds.dds_guid;
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
@ -6705,7 +6709,8 @@ ztest_init(ztest_shared_t *zs)
VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
free(buf); free(buf);
} }
VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); VERIFY3U(0, ==,
spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
nvlist_free(nvroot); nvlist_free(nvroot);
nvlist_free(props); nvlist_free(props);

View File

@ -186,12 +186,14 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/clean_mirror/Makefile tests/zfs-tests/tests/functional/clean_mirror/Makefile
tests/zfs-tests/tests/functional/cli_root/Makefile tests/zfs-tests/tests/functional/cli_root/Makefile
tests/zfs-tests/tests/functional/cli_root/zdb/Makefile tests/zfs-tests/tests/functional/cli_root/zdb/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_copies/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_copies/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_get/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_get/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_inherit/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_inherit/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs/Makefile tests/zfs-tests/tests/functional/cli_root/zfs/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile
@ -204,6 +206,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile

View File

@ -242,7 +242,7 @@ void uu_list_pool_destroy(uu_list_pool_t *);
* usage: * usage:
* *
* foo_t *a; * foo_t *a;
* a = malloc(sizeof(*a)); * a = malloc(sizeof (*a));
* uu_list_node_init(a, &a->foo_list, pool); * uu_list_node_init(a, &a->foo_list, pool);
* ... * ...
* uu_list_node_fini(a, &a->foo_list, pool); * uu_list_node_fini(a, &a->foo_list, pool);
@ -345,7 +345,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *);
* usage: * usage:
* *
* foo_t *a; * foo_t *a;
* a = malloc(sizeof(*a)); * a = malloc(sizeof (*a));
* uu_avl_node_init(a, &a->foo_avl, pool); * uu_avl_node_init(a, &a->foo_avl, pool);
* ... * ...
* uu_avl_node_fini(a, &a->foo_avl, pool); * uu_avl_node_fini(a, &a->foo_avl, pool);

View File

@ -149,6 +149,7 @@ typedef enum zfs_error {
EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */ EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */ EZFS_ACTIVE_POOL, /* pool is imported on a different system */
EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_UNKNOWN EZFS_UNKNOWN
} zfs_error_t; } zfs_error_t;
@ -474,8 +475,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
extern const char *zfs_prop_column_name(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t);
extern boolean_t zfs_prop_align_right(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t);
extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *,
nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *);
extern const char *zfs_prop_to_name(zfs_prop_t); extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
@ -505,6 +506,19 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
/*
* zfs encryption management
*/
extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *);
extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *,
uint8_t **, uint_t *);
extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *,
nvlist_t *);
extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *);
extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *);
extern int zfs_crypto_unload_key(zfs_handle_t *);
extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);
typedef struct zprop_list { typedef struct zprop_list {
int pl_prop; int pl_prop;
char *pl_user_prop; char *pl_user_prop;
@ -654,6 +668,9 @@ typedef struct sendflags {
/* compressed WRITE records are permitted */ /* compressed WRITE records are permitted */
boolean_t compress; boolean_t compress;
/* raw encrypted records are permitted */
boolean_t raw;
} sendflags_t; } sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
@ -737,6 +754,7 @@ extern const char *zfs_type_to_name(zfs_type_t);
extern void zfs_refresh_properties(zfs_handle_t *); extern void zfs_refresh_properties(zfs_handle_t *);
extern int zfs_name_valid(const char *, zfs_type_t); extern int zfs_name_valid(const char *, zfs_type_t);
extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
zfs_type_t); zfs_type_t);
extern int zfs_spa_version(zfs_handle_t *, int *); extern int zfs_spa_version(zfs_handle_t *, int *);

View File

@ -49,13 +49,17 @@ enum lzc_dataset_type {
}; };
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *); int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *,
uint_t);
int lzc_clone(const char *, const char *, nvlist_t *); int lzc_clone(const char *, const char *, nvlist_t *);
int lzc_promote(const char *, char *, int); int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **); int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
int lzc_unload_key(const char *);
int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t);
int lzc_snaprange_space(const char *, const char *, uint64_t *); int lzc_snaprange_space(const char *, const char *, uint64_t *);
@ -66,7 +70,8 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags { enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0, LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1, LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
LZC_SEND_FLAG_COMPRESS = 1 << 2 LZC_SEND_FLAG_COMPRESS = 1 << 2,
LZC_SEND_FLAG_RAW = 1 << 3,
}; };
int lzc_send(const char *, const char *, int, enum lzc_send_flags); int lzc_send(const char *, const char *, int, enum lzc_send_flags);
@ -76,17 +81,19 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
struct dmu_replay_record; struct dmu_replay_record;
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
int lzc_receive_resumable(const char *, nvlist_t *, const char *, int);
int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int); boolean_t, int);
int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int, const struct dmu_replay_record *); boolean_t, boolean_t, int, const struct dmu_replay_record *);
int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t, int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int, const struct dmu_replay_record *, int, uint64_t *, boolean_t, boolean_t, int, const struct dmu_replay_record *, int,
uint64_t *, uint64_t *, nvlist_t **); uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *, int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *, const char *, boolean_t, boolean_t, boolean_t, int,
int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **); const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);
boolean_t lzc_exists(const char *); boolean_t lzc_exists(const char *);

View File

@ -27,6 +27,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_deleg.h \ $(top_srcdir)/include/sys/dsl_deleg.h \
$(top_srcdir)/include/sys/dsl_destroy.h \ $(top_srcdir)/include/sys/dsl_destroy.h \
$(top_srcdir)/include/sys/dsl_dir.h \ $(top_srcdir)/include/sys/dsl_dir.h \
$(top_srcdir)/include/sys/dsl_crypt.h \
$(top_srcdir)/include/sys/dsl_pool.h \ $(top_srcdir)/include/sys/dsl_pool.h \
$(top_srcdir)/include/sys/dsl_prop.h \ $(top_srcdir)/include/sys/dsl_prop.h \
$(top_srcdir)/include/sys/dsl_scan.h \ $(top_srcdir)/include/sys/dsl_scan.h \
@ -109,6 +110,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zil_impl.h \ $(top_srcdir)/include/sys/zil_impl.h \
$(top_srcdir)/include/sys/zio_checksum.h \ $(top_srcdir)/include/sys/zio_checksum.h \
$(top_srcdir)/include/sys/zio_compress.h \ $(top_srcdir)/include/sys/zio_compress.h \
$(top_srcdir)/include/sys/zio_crypt.h \
$(top_srcdir)/include/sys/zio.h \ $(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \ $(top_srcdir)/include/sys/zio_impl.h \
$(top_srcdir)/include/sys/zio_priority.h \ $(top_srcdir)/include/sys/zio_priority.h \

View File

@ -60,15 +60,26 @@ _NOTE(CONSTCOND) } while (0)
typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t; typedef struct arc_buf arc_buf_t;
typedef struct arc_prune arc_prune_t; typedef struct arc_prune arc_prune_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
/*
* Because the ARC can store encrypted data, errors (not due to bugs) may arise
* while transforming data into its desired format - specifically, when
* decrypting, the key may not be present, or the HMAC may not be correct
* which signifies deliberate tampering with the on-disk state
* (assuming that the checksum was correct). The "error" parameter will be
* nonzero in this case, even if there is no associated zio.
*/
typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf,
void *private);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef void arc_prune_func_t(int64_t bytes, void *private); typedef void arc_prune_func_t(int64_t bytes, void *private);
/* Shared module parameters */ /* Shared module parameters */
extern int zfs_arc_average_blocksize; extern int zfs_arc_average_blocksize;
/* generic arc_done_func_t's which you can use */ /* generic arc_done_func_t's which you can use */
arc_done_func_t arc_bcopy_func; arc_read_done_func_t arc_bcopy_func;
arc_done_func_t arc_getbuf_func; arc_read_done_func_t arc_getbuf_func;
/* generic arc_prune_func_t wrapper for callbacks */ /* generic arc_prune_func_t wrapper for callbacks */
struct arc_prune { struct arc_prune {
@ -110,20 +121,29 @@ typedef enum arc_flags
ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */ ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */ ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */ ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
/*
* Encrypted or authenticated on disk (may be plaintext in memory).
* This header has b_crypt_hdr allocated. Does not include indirect
* blocks with checksums of MACs which will also have their X
* (encrypted) bit set in the bp.
*/
ARC_FLAG_PROTECTED = 1 << 14,
/* data has not been authenticated yet */
ARC_FLAG_NOAUTH = 1 << 15,
/* indicates that the buffer contains metadata (otherwise, data) */ /* indicates that the buffer contains metadata (otherwise, data) */
ARC_FLAG_BUFC_METADATA = 1 << 14, ARC_FLAG_BUFC_METADATA = 1 << 16,
/* Flags specifying whether optional hdr struct fields are defined */ /* Flags specifying whether optional hdr struct fields are defined */
ARC_FLAG_HAS_L1HDR = 1 << 15, ARC_FLAG_HAS_L1HDR = 1 << 17,
ARC_FLAG_HAS_L2HDR = 1 << 16, ARC_FLAG_HAS_L2HDR = 1 << 18,
/* /*
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
* This allows the l2arc to use the blkptr's checksum to verify * This allows the l2arc to use the blkptr's checksum to verify
* the data without having to store the checksum in the hdr. * the data without having to store the checksum in the hdr.
*/ */
ARC_FLAG_COMPRESSED_ARC = 1 << 17, ARC_FLAG_COMPRESSED_ARC = 1 << 19,
ARC_FLAG_SHARED_DATA = 1 << 18, ARC_FLAG_SHARED_DATA = 1 << 20,
/* /*
* The arc buffer's compression mode is stored in the top 7 bits of the * The arc buffer's compression mode is stored in the top 7 bits of the
@ -142,7 +162,12 @@ typedef enum arc_flags
typedef enum arc_buf_flags { typedef enum arc_buf_flags {
ARC_BUF_FLAG_SHARED = 1 << 0, ARC_BUF_FLAG_SHARED = 1 << 0,
ARC_BUF_FLAG_COMPRESSED = 1 << 1 ARC_BUF_FLAG_COMPRESSED = 1 << 1,
/*
* indicates whether this arc_buf_t is encrypted, regardless of
* state on-disk
*/
ARC_BUF_FLAG_ENCRYPTED = 1 << 2
} arc_buf_flags_t; } arc_buf_flags_t;
struct arc_buf { struct arc_buf {
@ -206,15 +231,31 @@ typedef struct arc_buf_info {
void arc_space_consume(uint64_t space, arc_space_type_t type); void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type); void arc_space_return(uint64_t space, arc_space_type_t type);
boolean_t arc_is_metadata(arc_buf_t *buf); boolean_t arc_is_metadata(arc_buf_t *buf);
boolean_t arc_is_encrypted(arc_buf_t *buf);
boolean_t arc_is_unauthenticated(arc_buf_t *buf);
enum zio_compress arc_get_compression(arc_buf_t *buf); enum zio_compress arc_get_compression(arc_buf_t *buf);
int arc_decompress(arc_buf_t *buf); void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
uint8_t *iv, uint8_t *mac);
int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj,
boolean_t in_place);
void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
const uint8_t *mac);
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size); int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag, arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type); uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type); enum zio_compress compression_type);
arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag); void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag); void arc_buf_destroy(arc_buf_t *buf, void *tag);
@ -231,12 +272,12 @@ int arc_referenced(arc_buf_t *buf);
#endif #endif
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_done_func_t *done, void *private, zio_priority_t priority, int flags, arc_read_done_func_t *done, void *private, zio_priority_t priority,
arc_flags_t *arc_flags, const zbookmark_phys_t *zb); int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
arc_done_func_t *ready, arc_done_func_t *child_ready, arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
arc_done_func_t *physdone, arc_done_func_t *done, arc_write_done_func_t *physdone, arc_write_done_func_t *done,
void *private, zio_priority_t priority, int zio_flags, void *private, zio_priority_t priority, int zio_flags,
const zbookmark_phys_t *zb); const zbookmark_phys_t *zb);

View File

@ -29,6 +29,7 @@
#define _SYS_ARC_IMPL_H #define _SYS_ARC_IMPL_H
#include <sys/arc.h> #include <sys/arc.h>
#include <sys/zio_crypt.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -90,9 +91,11 @@ typedef struct arc_callback arc_callback_t;
struct arc_callback { struct arc_callback {
void *acb_private; void *acb_private;
arc_done_func_t *acb_done; arc_read_done_func_t *acb_done;
arc_buf_t *acb_buf; arc_buf_t *acb_buf;
boolean_t acb_encrypted;
boolean_t acb_compressed; boolean_t acb_compressed;
boolean_t acb_noauth;
zio_t *acb_zio_dummy; zio_t *acb_zio_dummy;
arc_callback_t *acb_next; arc_callback_t *acb_next;
}; };
@ -100,12 +103,12 @@ struct arc_callback {
typedef struct arc_write_callback arc_write_callback_t; typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback { struct arc_write_callback {
void *awcb_private; void *awcb_private;
arc_done_func_t *awcb_ready; arc_write_done_func_t *awcb_ready;
arc_done_func_t *awcb_children_ready; arc_write_done_func_t *awcb_children_ready;
arc_done_func_t *awcb_physdone; arc_write_done_func_t *awcb_physdone;
arc_done_func_t *awcb_done; arc_write_done_func_t *awcb_done;
arc_buf_t *awcb_buf; arc_buf_t *awcb_buf;
}; };
/* /*
@ -169,6 +172,36 @@ typedef struct l1arc_buf_hdr {
abd_t *b_pabd; abd_t *b_pabd;
} l1arc_buf_hdr_t; } l1arc_buf_hdr_t;
/*
* Encrypted blocks will need to be stored encrypted on the L2ARC
* disk as they appear in the main pool. In order for this to work we
* need to pass around the encryption parameters so they can be used
* to write data to the L2ARC. This struct is only defined in the
* arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
* flag set.
*/
typedef struct arc_buf_hdr_crypt {
abd_t *b_rabd; /* raw encrypted data */
dmu_object_type_t b_ot; /* object type */
uint32_t b_ebufcnt; /* count of encrypted buffers */
/* dsobj for looking up encryption key for l2arc encryption */
uint64_t b_dsobj;
/* encryption parameters */
uint8_t b_salt[ZIO_DATA_SALT_LEN];
uint8_t b_iv[ZIO_DATA_IV_LEN];
/*
* Technically this could be removed since we will always be able to
* get the mac from the bp when we need it. However, it is inconvenient
* for callers of arc code to have to pass a bp in all the time. This
* also allows us to assert that L2ARC data is properly encrypted to
* match the data in the main storage pool.
*/
uint8_t b_mac[ZIO_DATA_MAC_LEN];
} arc_buf_hdr_crypt_t;
typedef struct l2arc_dev { typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */ vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */ spa_t *l2ad_spa; /* spa */
@ -237,6 +270,11 @@ struct arc_buf_hdr {
l2arc_buf_hdr_t b_l2hdr; l2arc_buf_hdr_t b_l2hdr;
/* L1ARC fields. Undefined when in l2arc_only state */ /* L1ARC fields. Undefined when in l2arc_only state */
l1arc_buf_hdr_t b_l1hdr; l1arc_buf_hdr_t b_l1hdr;
/*
* Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
* is set and the L1 header exists.
*/
arc_buf_hdr_crypt_t b_crypt_hdr;
}; };
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -54,6 +54,7 @@ extern "C" {
#define DB_RF_NOPREFETCH (1 << 3) #define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5) #define DB_RF_CACHED (1 << 5)
#define DB_RF_NO_DECRYPT (1 << 6)
/* /*
* The simplified state transition diagram for dbufs looks like: * The simplified state transition diagram for dbufs looks like:
@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record {
override_states_t dr_override_state; override_states_t dr_override_state;
uint8_t dr_copies; uint8_t dr_copies;
boolean_t dr_nopwrite; boolean_t dr_nopwrite;
boolean_t dr_raw;
} dl; } dl;
} dt; } dt;
} dbuf_dirty_record_t; } dbuf_dirty_record_t;

View File

@ -67,9 +67,10 @@ enum ddt_class {
typedef struct ddt_key { typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */ zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/* /*
* Encoded with logical & physical size, and compression, as follows: * Encoded with logical & physical size, encryption, and compression,
* as follows:
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* | 0 | 0 | 0 | comp | PSIZE | LSIZE | * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
*/ */
uint64_t ddk_prop; uint64_t ddk_prop;
@ -85,11 +86,17 @@ typedef struct ddt_key {
#define DDK_SET_PSIZE(ddk, x) \ #define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8) #define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7)
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x) #define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x)
#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1)
typedef struct ddt_phys { typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP]; dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt; uint64_t ddp_refcnt;

View File

@ -71,6 +71,7 @@ struct nvlist;
struct arc_buf; struct arc_buf;
struct zio_prop; struct zio_prop;
struct sa_handle; struct sa_handle;
struct dsl_crypto_params;
typedef struct objset objset_t; typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t; typedef struct dmu_tx dmu_tx_t;
@ -100,16 +101,18 @@ typedef enum dmu_object_byteswap {
#define DMU_OT_NEWTYPE 0x80 #define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40 #define DMU_OT_METADATA 0x40
#define DMU_OT_BYTESWAP_MASK 0x3f #define DMU_OT_ENCRYPTED 0x20
#define DMU_OT_BYTESWAP_MASK 0x1f
/* /*
* Defines a uint8_t object type. Object types specify if the data * Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data * in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t). * (dmu_object_byteswap_t).
*/ */
#define DMU_OT(byteswap, metadata) \ #define DMU_OT(byteswap, metadata, encrypted) \
(DMU_OT_NEWTYPE | \ (DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \ ((metadata) ? DMU_OT_METADATA : 0) | \
((encrypted) ? DMU_OT_ENCRYPTED : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK)) ((byteswap) & DMU_OT_BYTESWAP_MASK))
#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ #define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
@ -120,6 +123,10 @@ typedef enum dmu_object_byteswap {
((ot) & DMU_OT_METADATA) : \ ((ot) & DMU_OT_METADATA) : \
dmu_ot[(int)(ot)].ot_metadata) dmu_ot[(int)(ot)].ot_metadata)
#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
((ot) & DMU_OT_ENCRYPTED) : \
dmu_ot[(int)(ot)].ot_encrypt)
/* /*
* These object types use bp_fill != 1 for their L0 bp's. Therefore they can't * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
* have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
@ -215,16 +222,27 @@ typedef enum dmu_object_type {
/* /*
* Names for valid types declared with DMU_OT(). * Names for valid types declared with DMU_OT().
*/ */
DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE),
DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE),
DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE),
DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE),
DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE),
DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE),
DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE),
DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE),
DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE),
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE),
DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE),
DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE),
DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE),
DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE),
DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE),
DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE),
DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE),
DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE),
DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE),
DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
} dmu_object_type_t; } dmu_object_type_t;
typedef enum txg_how { typedef enum txg_how {
@ -267,19 +285,24 @@ void zfs_znode_byteswap(void *buf, size_t size);
*/ */
#define DMU_BONUS_BLKID (-1ULL) #define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL) #define DMU_SPILL_BLKID (-2ULL)
/* /*
* Public routines to create, destroy, open, and close objsets. * Public routines to create, destroy, open, and close objsets.
*/ */
typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg,
cred_t *cr, dmu_tx_t *tx);
int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type, int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp); boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_evict_dbufs(objset_t *os); void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
void *arg);
int dmu_objset_clone(const char *name, const char *origin); int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist); struct nvlist *errlist);
@ -390,6 +413,13 @@ int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
int dmu_object_next(objset_t *os, uint64_t *objectp, int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg); boolean_t hole, uint64_t txg);
/*
* Set the number of levels on a dnode. nlevels must be greater than the
* current number of levels or an EINVAL will be returned.
*/
int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
dmu_tx_t *tx);
/* /*
* Set the data blocksize for an object. * Set the data blocksize for an object.
* *
@ -432,6 +462,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp); struct zio_prop *zp);
/* /*
* The bonus data is accessed more or less like a regular buffer. * The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a * You must dmu_bonus_hold() to get the buffer, which will give you a
@ -444,6 +475,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
* *
* Returns ENOENT, EIO, or 0. * Returns ENOENT, EIO, or 0.
*/ */
int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag,
uint32_t flags, dmu_buf_t **dbp);
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void); int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
@ -655,6 +688,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)). * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/ */
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx);
/* /*
* You must create a transaction, then hold the objects which you will * You must create a transaction, then hold the objects which you will
@ -737,6 +771,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
*/ */
#define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_PREFETCH 0 /* prefetch */
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags); void *buf, uint32_t flags);
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
@ -763,6 +798,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf); void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx); dmu_tx_t *tx);
void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
dmu_tx_t *tx);
void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_t *handle, dmu_tx_t *tx);
#ifdef HAVE_UIO_ZEROCOPY #ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov); int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio); void dmu_xuio_fini(struct xuio *uio);
@ -807,6 +848,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
typedef struct dmu_object_type_info { typedef struct dmu_object_type_info {
dmu_object_byteswap_t ot_byteswap; dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata; boolean_t ot_metadata;
boolean_t ot_encrypt;
char *ot_name; char *ot_name;
} dmu_object_type_info_t; } dmu_object_type_info_t;

View File

@ -58,13 +58,19 @@ struct dmu_tx;
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0) #define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1) #define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)
/* all flags are currently non-portable */
#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
typedef struct objset_phys { typedef struct objset_phys {
dnode_phys_t os_meta_dnode; dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header; zil_header_t os_zil_header;
uint64_t os_type; uint64_t os_type;
uint64_t os_flags; uint64_t os_flags;
uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN];
uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN];
char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 - char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
sizeof (zil_header_t) - sizeof (uint64_t)*2]; sizeof (zil_header_t) - sizeof (uint64_t)*2 -
2*ZIO_OBJSET_MAC_LEN];
dnode_phys_t os_userused_dnode; dnode_phys_t os_userused_dnode;
dnode_phys_t os_groupused_dnode; dnode_phys_t os_groupused_dnode;
} objset_phys_t; } objset_phys_t;
@ -77,6 +83,8 @@ struct objset {
spa_t *os_spa; spa_t *os_spa;
arc_buf_t *os_phys_buf; arc_buf_t *os_phys_buf;
objset_phys_t *os_phys; objset_phys_t *os_phys;
boolean_t os_encrypted;
/* /*
* The following "special" dnodes have no parent, are exempt * The following "special" dnodes have no parent, are exempt
* from dnode_move(), and are not recorded in os_dnodes, but they * from dnode_move(), and are not recorded in os_dnodes, but they
@ -118,6 +126,9 @@ struct objset {
uint64_t os_freed_dnodes; uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes; boolean_t os_rescan_dnodes;
/* os_phys_buf should be written raw next txg */
boolean_t os_next_write_raw;
/* Protected by os_obj_lock */ /* Protected by os_obj_lock */
kmutex_t os_obj_lock; kmutex_t os_obj_lock;
uint64_t os_obj_next_chunk; uint64_t os_obj_next_chunk;
@ -161,13 +172,18 @@ struct objset {
/* called from zpl */ /* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type, int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp); boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp);
int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj, int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt,
void dmu_objset_refresh_ownership(objset_t *os, void *tag); void *tag, objset_t **osp);
void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed,
void *tag);
void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag); void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag);
void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_stats(objset_t *os, nvlist_t *nv); void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@ -184,6 +200,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */ /* called from dsl */
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg); boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs,
dmu_tx_t *tx);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx); blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,

View File

@ -41,7 +41,7 @@ struct dmu_replay_record;
extern const char *recv_clone_name; extern const char *recv_clone_name;
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
boolean_t large_block_ok, boolean_t compressok, int outfd, boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off); uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
boolean_t stream_compressed, uint64_t *sizep); boolean_t stream_compressed, uint64_t *sizep);
@ -49,7 +49,7 @@ int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
boolean_t stream_compressed, uint64_t *sizep); boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, struct vnode *vp, offset_t *off); boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie { typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds; struct dsl_dataset *drc_ds;
@ -61,6 +61,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap; boolean_t drc_byteswap;
boolean_t drc_force; boolean_t drc_force;
boolean_t drc_resumable; boolean_t drc_resumable;
boolean_t drc_raw;
struct avl_tree *drc_guid_to_ds_map; struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum; zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj; uint64_t drc_newsnapobj;

View File

@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA) #define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4) #define TRAVERSE_HARD (1<<4)
/*
* Encrypted dnode blocks have encrypted bonus buffers while the rest
* of the dnode is left unencrypted. Callers can specify the
* TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that
* they wish to receive the raw encrypted dnodes instead of attempting
* to read the logical data.
*/
#define TRAVERSE_NO_DECRYPT (1<<5)
/* Special traverse error return value to indicate skipping of children */ /* Special traverse error return value to indicate skipping of children */
#define TRAVERSE_VISIT_NO_CHILDREN -1 #define TRAVERSE_VISIT_NO_CHILDREN -1

View File

@ -74,9 +74,7 @@ extern "C" {
/* /*
* dnode id flags * dnode id flags
* *
* Note: a file will never ever have its * Note: a file will never ever have its ids moved from bonus->spill
* ids moved from bonus->spill
* and only in a crypto environment would it be on spill
*/ */
#define DN_ID_CHKED_BONUS 0x1 #define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2 #define DN_ID_CHKED_SPILL 0x2
@ -115,6 +113,10 @@ extern "C" {
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
#define DN_MAX_BONUS_LEN(dnp) \
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
(uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \
(uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp))
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ #define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
@ -141,6 +143,8 @@ enum dnode_dirtycontext {
/* User/Group dnode accounting */ /* User/Group dnode accounting */
#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3) #define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)
#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR)
typedef struct dnode_phys { typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */ uint8_t dn_indblkshift; /* ln2(indirect block size) */
@ -342,6 +346,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp); void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size); void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn); void dnode_verify(dnode_t *dn);
int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx); int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx); void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space); void dnode_diduse_space(dnode_t *dn, int64_t space);

218
include/sys/dsl_crypt.h Normal file
View File

@ -0,0 +1,218 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_CRYPT_H
#define _SYS_DSL_CRYPT_H
#include <sys/dmu_tx.h>
#include <sys/dmu.h>
#include <sys/zio_crypt.h>
#include <sys/spa.h>
#include <sys/dsl_dataset.h>
/*
* ZAP entry keys for DSL Crypto Keys stored on disk. In addition,
* ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are
* also maintained here using their respective property names.
*/
#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE"
#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID"
#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV"
#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC"
#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1"
#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"
/*
* In-memory representation of a wrapping key. One of these structs will exist
* for each encryption root with its key loaded.
*/
typedef struct dsl_wrapping_key {
/* link on spa_keystore_t:sk_wkeys */
avl_node_t wk_avl_link;
/* keyformat property enum */
zfs_keyformat_t wk_keyformat;
/* the pbkdf2 salt, if the keyformat is of type passphrase */
uint64_t wk_salt;
/* the pbkdf2 iterations, if the keyformat is of type passphrase */
uint64_t wk_iters;
/* actual wrapping key */
crypto_key_t wk_key;
/* refcount of number of dsl_crypto_key_t's holding this struct */
refcount_t wk_refcnt;
/* dsl directory object that owns this wrapping key */
uint64_t wk_ddobj;
} dsl_wrapping_key_t;
/* enum of commands indicating special actions that should be run */
typedef enum dcp_cmd {
/* key creation commands */
DCP_CMD_NONE = 0, /* no specific command */
DCP_CMD_RAW_RECV, /* raw receive */
/* key changing commands */
DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */
DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */
DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */
DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */
DCP_CMD_MAX
} dcp_cmd_t;
/*
* This struct is a simple wrapper around all the parameters that are usually
* required to setup encryption. It exists so that all of the params can be
* passed around the kernel together for convenience.
*/
typedef struct dsl_crypto_params {
/* command indicating intended action */
dcp_cmd_t cp_cmd;
/* the encryption algorithm */
enum zio_encrypt cp_crypt;
/* keylocation property string */
char *cp_keylocation;
/* the wrapping key */
dsl_wrapping_key_t *cp_wkey;
} dsl_crypto_params_t;
/*
* In-memory representation of a DSL Crypto Key object. One of these structs
* (and corresponding on-disk ZAP object) will exist for each encrypted
* clone family that is mounted or otherwise reading protected data.
*/
typedef struct dsl_crypto_key {
/* link on spa_keystore_t:sk_dsl_keys */
avl_node_t dck_avl_link;
/* refcount of dsl_key_mapping_t's holding this key */
refcount_t dck_holds;
/* master key used to derive encryption keys */
zio_crypt_key_t dck_key;
/* wrapping key for syncing this structure to disk */
dsl_wrapping_key_t *dck_wkey;
/* on-disk object id */
uint64_t dck_obj;
} dsl_crypto_key_t;
/*
* In-memory mapping of a dataset object id to a DSL Crypto Key. This is used
* to look up the corresponding dsl_crypto_key_t from the zio layer for
* performing data encryption and decryption.
*/
typedef struct dsl_key_mapping {
/* link on spa_keystore_t:sk_key_mappings */
avl_node_t km_avl_link;
/* refcount of how many users are depending on this mapping */
refcount_t km_refcnt;
/* dataset this crypto key belongs to (index) */
uint64_t km_dsobj;
/* crypto key (value) of this record */
dsl_crypto_key_t *km_key;
} dsl_key_mapping_t;
/* in memory structure for holding all wrapping and dsl keys */
typedef struct spa_keystore {
/* lock for protecting sk_dsl_keys */
krwlock_t sk_dk_lock;
/* tree of all dsl_crypto_key_t's */
avl_tree_t sk_dsl_keys;
/* lock for protecting sk_key_mappings */
krwlock_t sk_km_lock;
/* tree of all dsl_key_mapping_t's, indexed by dsobj */
avl_tree_t sk_key_mappings;
/* lock for protecting the wrapping keys tree */
krwlock_t sk_wkeys_lock;
/* tree of all dsl_wrapping_key_t's, indexed by ddobj */
avl_tree_t sk_wkeys;
} spa_keystore_t;
int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
void spa_keystore_init(spa_keystore_t *sk);
void spa_keystore_fini(spa_keystore_t *sk);
void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag);
int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey);
int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
boolean_t noop);
int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
int spa_keystore_unload_wkey(const char *dsname);
int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd,
void *tag);
int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag);
int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
dsl_crypto_key_t **dck_out);
int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out);
int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj,
dmu_objset_type_t ostype, nvlist_t *nvl);
int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
dmu_tx_t *tx);
int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
dsl_crypto_params_t *dcp);
void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
dmu_tx_t *tx);
int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd);
uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx);
void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx);
int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt);
int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
abd_t *abd, uint_t datalen, uint8_t *mac);
int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
abd_t *abd, uint_t datalen, boolean_t byteswap);
int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj,
const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd,
abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt);
#endif

View File

@ -39,6 +39,7 @@
#include <sys/dsl_deadlist.h> #include <sys/dsl_deadlist.h>
#include <sys/refcount.h> #include <sys/refcount.h>
#include <sys/rrwlock.h> #include <sys/rrwlock.h>
#include <sys/dsl_crypt.h>
#include <zfeature_common.h> #include <zfeature_common.h>
#ifdef __cplusplus #ifdef __cplusplus
@ -48,6 +49,7 @@ extern "C" {
struct dsl_dataset; struct dsl_dataset;
struct dsl_dir; struct dsl_dir;
struct dsl_pool; struct dsl_pool;
struct dsl_crypto_params;
#define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \ #define DS_IS_INCONSISTENT(ds) \
@ -105,6 +107,7 @@ struct dsl_pool;
#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok" #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok" #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"
/* /*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
@ -245,26 +248,38 @@ dsl_dataset_phys(dsl_dataset_t *ds)
#define DS_UNIQUE_IS_ACCURATE(ds) \ #define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
/* flags for holding the dataset */
typedef enum ds_hold_flags {
DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
} ds_hold_flags_t;
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp); dsl_dataset_t **dsp);
int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
void *tag); void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **); dsl_dataset_t **);
int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name, int dsl_dataset_own(struct dsl_pool *dp, const char *name,
void *tag, dsl_dataset_t **dsp); ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **dsp); ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name); void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
int dsl_dataset_namelen(dsl_dataset_t *ds); int dsl_dataset_namelen(dsl_dataset_t *ds);
boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); dsl_dataset_t *origin, uint64_t flags, cred_t *,
struct dsl_crypto_params *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx); struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_rename_snapshot(const char *fsname, int dsl_dataset_rename_snapshot(const char *fsname,
@ -343,6 +358,8 @@ boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner, int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
nvlist_t *result); nvlist_t *result);
void dsl_dataset_activate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx);
void dsl_dataset_deactivate_feature(uint64_t dsobj, void dsl_dataset_deactivate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx); spa_feature_t f, dmu_tx_t *tx);

View File

@ -61,6 +61,8 @@ extern "C" {
#define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark" #define ZFS_DELEG_PERM_BOOKMARK "bookmark"
#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"
/* /*
* Note: the names of properties that are marked delegatable are also * Note: the names of properties that are marked delegatable are also

View File

@ -33,6 +33,7 @@
#include <sys/dsl_synctask.h> #include <sys/dsl_synctask.h>
#include <sys/refcount.h> #include <sys/refcount.h>
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
#include <sys/dsl_crypt.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -47,6 +48,7 @@ struct dsl_dataset;
#define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count" #define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count"
#define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count" #define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count"
#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj"
typedef enum dd_used { typedef enum dd_used {
DD_USED_HEAD, DD_USED_HEAD,
@ -89,6 +91,7 @@ struct dsl_dir {
/* These are immutable; no lock needed: */ /* These are immutable; no lock needed: */
uint64_t dd_object; uint64_t dd_object;
uint64_t dd_crypto_obj;
dsl_pool_t *dd_pool; dsl_pool_t *dd_pool;
/* Stable until user eviction; no lock needed: */ /* Stable until user eviction; no lock needed: */

View File

@ -52,6 +52,7 @@ struct dsl_dataset;
struct dsl_pool; struct dsl_pool;
struct dmu_tx; struct dmu_tx;
struct dsl_scan; struct dsl_scan;
struct dsl_crypto_params;
extern unsigned long zfs_dirty_data_max; extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max; extern unsigned long zfs_dirty_data_max_max;
@ -142,7 +143,8 @@ typedef struct dsl_pool {
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_open(dsl_pool_t *dp); int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp); void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops,
struct dsl_crypto_params *dcp, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg); void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp); int dsl_pool_sync_context(dsl_pool_t *dp);

View File

@ -33,6 +33,7 @@ extern "C" {
#define ZFS_ERROR_CLASS "fs.zfs" #define ZFS_ERROR_CLASS "fs.zfs"
#define FM_EREPORT_ZFS_CHECKSUM "checksum" #define FM_EREPORT_ZFS_CHECKSUM "checksum"
#define FM_EREPORT_ZFS_AUTHENTICATION "authentication"
#define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay" #define FM_EREPORT_ZFS_DELAY "delay"

View File

@ -171,6 +171,14 @@ typedef enum {
ZFS_PROP_OVERLAY, ZFS_PROP_OVERLAY,
ZFS_PROP_PREV_SNAP, ZFS_PROP_PREV_SNAP,
ZFS_PROP_RECEIVE_RESUME_TOKEN, ZFS_PROP_RECEIVE_RESUME_TOKEN,
ZFS_PROP_ENCRYPTION,
ZFS_PROP_KEYLOCATION,
ZFS_PROP_KEYFORMAT,
ZFS_PROP_PBKDF2_SALT,
ZFS_PROP_PBKDF2_ITERS,
ZFS_PROP_ENCRYPTION_ROOT,
ZFS_PROP_KEY_GUID,
ZFS_PROP_KEYSTATUS,
ZFS_NUM_PROPS ZFS_NUM_PROPS
} zfs_prop_t; } zfs_prop_t;
@ -281,6 +289,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
boolean_t zfs_prop_readonly(zfs_prop_t); boolean_t zfs_prop_readonly(zfs_prop_t);
boolean_t zfs_prop_inheritable(zfs_prop_t); boolean_t zfs_prop_inheritable(zfs_prop_t);
boolean_t zfs_prop_setonce(zfs_prop_t); boolean_t zfs_prop_setonce(zfs_prop_t);
boolean_t zfs_prop_encryption_key_param(zfs_prop_t);
boolean_t zfs_prop_valid_keylocation(const char *, boolean_t);
const char *zfs_prop_to_name(zfs_prop_t); const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *); zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *); boolean_t zfs_prop_user(const char *);
@ -404,6 +414,30 @@ typedef enum {
ZFS_VOLMODE_NONE = 3 ZFS_VOLMODE_NONE = 3
} zfs_volmode_t; } zfs_volmode_t;
typedef enum zfs_keystatus {
ZFS_KEYSTATUS_NONE = 0,
ZFS_KEYSTATUS_UNAVAILABLE,
ZFS_KEYSTATUS_AVAILABLE,
} zfs_keystatus_t;
typedef enum zfs_keyformat {
ZFS_KEYFORMAT_NONE = 0,
ZFS_KEYFORMAT_RAW,
ZFS_KEYFORMAT_HEX,
ZFS_KEYFORMAT_PASSPHRASE,
ZFS_KEYFORMAT_FORMATS
} zfs_keyformat_t;
typedef enum zfs_key_location {
ZFS_KEYLOCATION_NONE = 0,
ZFS_KEYLOCATION_PROMPT,
ZFS_KEYLOCATION_URI,
ZFS_KEYLOCATION_LOCATIONS
} zfs_keylocation_t;
#define DEFAULT_PBKDF2_ITERATIONS 350000
#define MIN_PBKDF2_ITERATIONS 100000
/* /*
* On-disk version number. * On-disk version number.
*/ */
@ -1061,6 +1095,9 @@ typedef enum zfs_ioc {
ZFS_IOC_DESTROY_BOOKMARKS, ZFS_IOC_DESTROY_BOOKMARKS,
ZFS_IOC_RECV_NEW, ZFS_IOC_RECV_NEW,
ZFS_IOC_POOL_SYNC, ZFS_IOC_POOL_SYNC,
ZFS_IOC_LOAD_KEY,
ZFS_IOC_UNLOAD_KEY,
ZFS_IOC_CHANGE_KEY,
/* /*
* Linux - 3/64 numbers reserved. * Linux - 3/64 numbers reserved.
@ -1125,6 +1162,12 @@ typedef enum {
#define ZPOOL_HIST_DSNAME "dsname" #define ZPOOL_HIST_DSNAME "dsname"
#define ZPOOL_HIST_DSID "dsid" #define ZPOOL_HIST_DSID "dsid"
/*
* Special nvlist name that will not have its args recorded in the pool's
* history log.
*/
#define ZPOOL_HIDDEN_ARGS "hidden_args"
/* /*
* Flags for ZFS_IOC_VDEV_SET_STATE * Flags for ZFS_IOC_VDEV_SET_STATE
*/ */
@ -1144,6 +1187,7 @@ typedef enum {
#define ZFS_IMPORT_ONLY 0x8 #define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10 #define ZFS_IMPORT_TEMP_NAME 0x10
#define ZFS_IMPORT_SKIP_MMP 0x20 #define ZFS_IMPORT_SKIP_MMP 0x20
#define ZFS_IMPORT_LOAD_KEYS 0x40
/* /*
* Sysevent payload members. ZFS will generate the following sysevents with the * Sysevent payload members. ZFS will generate the following sysevents with the

View File

@ -63,6 +63,7 @@ typedef struct zbookmark_phys zbookmark_phys_t;
struct dsl_pool; struct dsl_pool;
struct dsl_dataset; struct dsl_dataset;
struct dsl_crypto_params;
/* /*
* General-purpose 32-bit and 64-bit bitfield encodings. * General-purpose 32-bit and 64-bit bitfield encodings.
@ -222,7 +223,7 @@ typedef struct zio_cksum_salt {
* G gang block indicator * G gang block indicator
* B byteorder (endianness) * B byteorder (endianness)
* D dedup * D dedup
* X encryption (on version 30, which is not supported) * X encryption
* E blkptr_t contains embedded data (see below) * E blkptr_t contains embedded data (see below)
* lvl level of indirection * lvl level of indirection
* type DMU object type * type DMU object type
@ -232,6 +233,83 @@ typedef struct zio_cksum_salt {
* checksum[4] 256-bit checksum of the data this bp describes * checksum[4] 256-bit checksum of the data this bp describes
*/ */
/*
* The blkptr_t's of encrypted blocks also need to store the encryption
* parameters so that the block can be decrypted. This layout is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | salt |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 | IV1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | IV2 | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | MAC[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | MAC[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* salt Salt for generating encryption keys
* IV1 First 64 bits of encryption IV
* X Block requires encryption handling (set to 1)
* E blkptr_t contains embedded data (set to 0, see below)
* fill count number of non-zero blocks under this bp (truncated to 32 bits)
* IV2 Last 32 bits of encryption IV
* checksum[2] 128-bit checksum of the data this bp describes
* MAC[2] 128-bit message authentication code for this data
*
* The X bit being set indicates that this block is one of 3 types. If this is
* a level 0 block with an encrypted object type, the block is encrypted
* (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
* object type, this block is authenticated with an HMAC (see
* BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
* words to store a checksum-of-MACs from the level below (see
* BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
* refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
* refers to any of these 3 kinds of blocks.
*
* The additional encryption parameters are the salt, IV, and MAC which are
* explained in greater detail in the block comment at the top of zio_crypt.c.
* The MAC occupies half of the checksum space since it serves a very similar
* purpose: to prevent data corruption on disk. The only functional difference
* is that the checksum is used to detect on-disk corruption whether or not the
* encryption key is loaded and the MAC provides additional protection against
* malicious disk tampering. We use the 3rd DVA to store the salt and first
* 64 bits of the IV. As a result encrypted blocks can only have 2 copies
* maximum instead of the normal 3. The last 32 bits of the IV are stored in
* the upper bits of what is usually the fill count. Note that only blocks at
* level 0 or -2 are ever encrypted, which allows us to guarantee that these
* 32 bits are not trampled over by other code (see zio_crypt.c for details).
* The salt and IV are not used for authenticated bps or bps with an indirect
* MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
* for the fill count.
*/
/* /*
* "Embedded" blkptr_t's don't actually point to a block, instead they * "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment * have a data payload embedded in the blkptr_t itself. See the comment
@ -268,7 +346,7 @@ typedef struct zio_cksum_salt {
* payload contains the embedded data * payload contains the embedded data
* B (byteorder) byteorder (endianness) * B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero) * D (dedup) padding (set to zero)
* X encryption (set to zero; see above) * X encryption (set to zero)
* E (embedded) set to one * E (embedded) set to one
* lvl indirection level * lvl indirection level
* type DMU object type * type DMU object type
@ -287,7 +365,9 @@ typedef struct zio_cksum_salt {
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct * other macros, as they assert that they are only used on BP's of the correct
* "embedded-ness". * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
* the payload space for encryption parameters (see the comment above on
* how encryption parameters are stored).
*/ */
#define BPE_GET_ETYPE(bp) \ #define BPE_GET_ETYPE(bp) \
@ -411,6 +491,26 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
/* encrypted, authenticated, and MAC cksum bps use the same bit */
#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1)
#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
#define BP_IS_ENCRYPTED(bp) \
(BP_USES_CRYPT(bp) && \
BP_GET_LEVEL(bp) <= 0 && \
DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
#define BP_IS_AUTHENTICATED(bp) \
(BP_USES_CRYPT(bp) && \
BP_GET_LEVEL(bp) <= 0 && \
!DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \
(BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)
#define BP_IS_PROTECTED(bp) \
(BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
@ -428,7 +528,26 @@ _NOTE(CONSTCOND) } while (0)
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
} }
#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) #define BP_GET_FILL(bp) \
((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
#define BP_SET_FILL(bp, fill) \
{ \
if (BP_IS_ENCRYPTED(bp)) \
BF64_SET((bp)->blk_fill, 0, 32, fill); \
else \
(bp)->blk_fill = fill; \
}
#define BP_GET_IV2(bp) \
(ASSERT(BP_IS_ENCRYPTED(bp)), \
BF64_GET((bp)->blk_fill, 32, 32))
#define BP_SET_IV2(bp, iv2) \
{ \
ASSERT(BP_IS_ENCRYPTED(bp)); \
BF64_SET((bp)->blk_fill, 32, 32, iv2); \
}
#define BP_IS_METADATA(bp) \ #define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
@ -437,7 +556,7 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \ (BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2])) (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_GET_UCSIZE(bp) \ #define BP_GET_UCSIZE(bp) \
(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
@ -446,13 +565,13 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \ (BP_IS_EMBEDDED(bp) ? 0 : \
!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[2])) (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_COUNT_GANG(bp) \ #define BP_COUNT_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \ (BP_IS_EMBEDDED(bp) ? 0 : \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \ (DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \ DVA_GET_GANG(&(bp)->blk_dva[1]) + \
DVA_GET_GANG(&(bp)->blk_dva[2]))) (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))
#define DVA_EQUAL(dva1, dva2) \ #define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
@ -505,14 +624,15 @@ _NOTE(CONSTCOND) } while (0)
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) #define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
#define BP_SPRINTF_LEN 320 #define BP_SPRINTF_LEN 400
/* /*
* This macro allows code sharing between zfs, libzpool, and mdb. * This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf(). * 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line. * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/ */
#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \ #define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \
compress) \
{ \ { \
static const char *copyname[] = \ static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \ { "zero", "single", "double", "triple" }; \
@ -553,18 +673,27 @@ _NOTE(CONSTCOND) } while (0)
(u_longlong_t)DVA_GET_ASIZE(dva), \ (u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \ ws); \
} \ } \
if (BP_IS_ENCRYPTED(bp)) { \
len += func(buf + len, size - len, \
"salt=%llx iv=%llx:%llx%c", \
(u_longlong_t)bp->blk_dva[2].dva_word[0], \
(u_longlong_t)bp->blk_dva[2].dva_word[1], \
(u_longlong_t)BP_GET_IV2(bp), \
ws); \
} \
if (BP_IS_GANG(bp) && \ if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \ DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \ DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \ copies--; \
len += func(buf + len, size - len, \ len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s%c" \ "[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \ "cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \ (u_longlong_t)BP_GET_LEVEL(bp), \
type, \ type, \
checksum, \ checksum, \
compress, \ compress, \
crypt_type, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \ BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \ BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \ BP_GET_DEDUP(bp) ? "dedup" : "unique", \
@ -598,8 +727,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config); nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen); size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t *zplprops); nvlist_t *zplprops, struct dsl_crypto_params *dcp);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props, extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags); uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
@ -886,9 +1015,9 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
/* error handling */ /* error handling */
struct zbookmark_phys; struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, zio_t *zio); extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
zio_t *zio, uint64_t stateoroffset, uint64_t length); zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux); const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd); extern void zfs_post_remove(spa_t *spa, vdev_t *vd);

View File

@ -42,6 +42,7 @@
#include <sys/refcount.h> #include <sys/refcount.h>
#include <sys/bplist.h> #include <sys/bplist.h>
#include <sys/bpobj.h> #include <sys/bpobj.h>
#include <sys/dsl_crypt.h>
#include <sys/zfeature.h> #include <sys/zfeature.h>
#include <zfeature_common.h> #include <zfeature_common.h>
@ -273,6 +274,7 @@ struct spa {
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */ uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */ spa_stats_t spa_stats; /* assorted spa statistics */
spa_keystore_t spa_keystore; /* loaded crypto keys */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */ taskq_t *spa_zvol_taskq; /* Taskq for minor management */
uint64_t spa_multihost; /* multihost aware (mmp) */ uint64_t spa_multihost; /* multihost aware (mmp) */

View File

@ -104,6 +104,7 @@ typedef enum drr_headertype {
/* flag #21 is reserved for a Delphix feature */ /* flag #21 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23) #define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
/* /*
* Mask of all supported backup features * Mask of all supported backup features
@ -112,7 +113,8 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \ DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \ DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE) DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW)
/* Are all features in the given flag word currently supported? */ /* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@ -158,18 +160,28 @@ typedef enum dmu_send_resume_token_version {
#define DRR_FLAG_FREERECORDS (1<<2) #define DRR_FLAG_FREERECORDS (1<<2)
/* /*
* flags in the drr_checksumflags field in the DRR_WRITE and * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
* DRR_WRITE_BYREF blocks * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/ */
#define DRR_CHECKSUM_DEDUP (1<<0) #define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
#define DRR_RAW_ENCRYPTED (1<<1)
#define DRR_RAW_BYTESWAP (1<<2)
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED)
#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
/* deal with compressed drr_write replay records */ /* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0) #define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
#define DRR_WRITE_PAYLOAD_SIZE(drrw) \ #define DRR_WRITE_PAYLOAD_SIZE(drrw) \
(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \ (DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
(drrw)->drr_logical_size) (drrw)->drr_logical_size)
#define DRR_SPILL_PAYLOAD_SIZE(drrs) \
(DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \
(drrs)->drr_compressed_size : (drrs)->drr_length)
#define DRR_OBJECT_PAYLOAD_SIZE(drro) \
(DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \
drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8))
/* /*
* zfs ioctl command structure * zfs ioctl command structure
@ -178,7 +190,8 @@ typedef struct dmu_replay_record {
enum { enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
DRR_NUMTYPES
} drr_type; } drr_type;
uint32_t drr_payloadlen; uint32_t drr_payloadlen;
union { union {
@ -205,8 +218,13 @@ typedef struct dmu_replay_record {
uint8_t drr_checksumtype; uint8_t drr_checksumtype;
uint8_t drr_compress; uint8_t drr_compress;
uint8_t drr_dn_slots; uint8_t drr_dn_slots;
uint8_t drr_pad[5]; uint8_t drr_flags;
uint32_t drr_raw_bonuslen;
uint64_t drr_toguid; uint64_t drr_toguid;
/* only nonzero if DRR_RAW_ENCRYPTED flag is set */
uint8_t drr_indblkshift;
uint8_t drr_nlevels;
uint8_t drr_nblkptr;
/* bonus content follows */ /* bonus content follows */
} drr_object; } drr_object;
struct drr_freeobjects { struct drr_freeobjects {
@ -222,13 +240,17 @@ typedef struct dmu_replay_record {
uint64_t drr_logical_size; uint64_t drr_logical_size;
uint64_t drr_toguid; uint64_t drr_toguid;
uint8_t drr_checksumtype; uint8_t drr_checksumtype;
uint8_t drr_checksumflags; uint8_t drr_flags;
uint8_t drr_compressiontype; uint8_t drr_compressiontype;
uint8_t drr_pad2[5]; uint8_t drr_pad2[5];
/* deduplication key */ /* deduplication key */
ddt_key_t drr_key; ddt_key_t drr_key;
/* only nonzero if drr_compressiontype is not 0 */ /* only nonzero if drr_compressiontype is not 0 */
uint64_t drr_compressed_size; uint64_t drr_compressed_size;
/* only nonzero if DRR_RAW_ENCRYPTED flag is set */
uint8_t drr_salt[ZIO_DATA_SALT_LEN];
uint8_t drr_iv[ZIO_DATA_IV_LEN];
uint8_t drr_mac[ZIO_DATA_MAC_LEN];
/* content follows */ /* content follows */
} drr_write; } drr_write;
struct drr_free { struct drr_free {
@ -249,7 +271,7 @@ typedef struct dmu_replay_record {
uint64_t drr_refoffset; uint64_t drr_refoffset;
/* properties of the data */ /* properties of the data */
uint8_t drr_checksumtype; uint8_t drr_checksumtype;
uint8_t drr_checksumflags; uint8_t drr_flags;
uint8_t drr_pad2[6]; uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */ ddt_key_t drr_key; /* deduplication key */
} drr_write_byref; } drr_write_byref;
@ -257,7 +279,15 @@ typedef struct dmu_replay_record {
uint64_t drr_object; uint64_t drr_object;
uint64_t drr_length; uint64_t drr_length;
uint64_t drr_toguid; uint64_t drr_toguid;
uint64_t drr_pad[4]; /* needed for crypto */ uint8_t drr_flags;
uint8_t drr_compressiontype;
uint8_t drr_pad[6];
/* only nonzero if DRR_RAW_ENCRYPTED flag is set */
uint64_t drr_compressed_size;
uint8_t drr_salt[ZIO_DATA_SALT_LEN];
uint8_t drr_iv[ZIO_DATA_IV_LEN];
uint8_t drr_mac[ZIO_DATA_MAC_LEN];
dmu_object_type_t drr_type;
/* spill data follows */ /* spill data follows */
} drr_spill; } drr_spill;
struct drr_write_embedded { struct drr_write_embedded {
@ -273,6 +303,16 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */ uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */ /* (possibly compressed) content follows */
} drr_write_embedded; } drr_write_embedded;
struct drr_object_range {
uint64_t drr_firstobj;
uint64_t drr_numslots;
uint64_t drr_toguid;
uint8_t drr_salt[ZIO_DATA_SALT_LEN];
uint8_t drr_iv[ZIO_DATA_IV_LEN];
uint8_t drr_mac[ZIO_DATA_MAC_LEN];
uint8_t drr_flags;
uint8_t drr_pad[3];
} drr_object_range;
/* /*
* Nore: drr_checksum is overlaid with all record types * Nore: drr_checksum is overlaid with all record types

View File

@ -32,6 +32,7 @@
#include <sys/spa.h> #include <sys/spa.h>
#include <sys/zio.h> #include <sys/zio.h>
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/zio_crypt.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -466,7 +467,8 @@ typedef int (*const zil_replay_func_t)(void *, char *, boolean_t);
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio); typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
boolean_t decrypt);
extern void zil_init(void); extern void zil_init(void);
extern void zil_fini(void); extern void zil_fini(void);

View File

@ -104,6 +104,29 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100 #define ZIO_DEDUPDITTO_MIN 100
/* supported encryption algorithms */
enum zio_encrypt {
ZIO_CRYPT_INHERIT = 0,
ZIO_CRYPT_ON,
ZIO_CRYPT_OFF,
ZIO_CRYPT_AES_128_CCM,
ZIO_CRYPT_AES_192_CCM,
ZIO_CRYPT_AES_256_CCM,
ZIO_CRYPT_AES_128_GCM,
ZIO_CRYPT_AES_192_GCM,
ZIO_CRYPT_AES_256_GCM,
ZIO_CRYPT_FUNCTIONS
};
#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM
#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
/* macros defining encryption lengths */
#define ZIO_OBJSET_MAC_LEN 32
#define ZIO_DATA_IV_LEN 12
#define ZIO_DATA_SALT_LEN 8
#define ZIO_DATA_MAC_LEN 16
/* /*
* The number of "legacy" compression functions which can be set on individual * The number of "legacy" compression functions which can be set on individual
* objects. * objects.
@ -191,17 +214,19 @@ enum zio_flag {
ZIO_FLAG_DONT_PROPAGATE = 1 << 20, ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
ZIO_FLAG_IO_BYPASS = 1 << 21, ZIO_FLAG_IO_BYPASS = 1 << 21,
ZIO_FLAG_IO_REWRITE = 1 << 22, ZIO_FLAG_IO_REWRITE = 1 << 22,
ZIO_FLAG_RAW = 1 << 23, ZIO_FLAG_RAW_COMPRESS = 1 << 23,
ZIO_FLAG_GANG_CHILD = 1 << 24, ZIO_FLAG_RAW_ENCRYPT = 1 << 24,
ZIO_FLAG_DDT_CHILD = 1 << 25, ZIO_FLAG_GANG_CHILD = 1 << 25,
ZIO_FLAG_GODFATHER = 1 << 26, ZIO_FLAG_DDT_CHILD = 1 << 26,
ZIO_FLAG_NOPWRITE = 1 << 27, ZIO_FLAG_GODFATHER = 1 << 27,
ZIO_FLAG_REEXECUTED = 1 << 28, ZIO_FLAG_NOPWRITE = 1 << 28,
ZIO_FLAG_DELEGATED = 1 << 29, ZIO_FLAG_REEXECUTED = 1 << 29,
ZIO_FLAG_FASTWRITE = 1 << 30 ZIO_FLAG_DELEGATED = 1 << 30,
ZIO_FLAG_FASTWRITE = 1 << 31,
}; };
#define ZIO_FLAG_MUSTSUCCEED 0 #define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
#define ZIO_DDT_CHILD_FLAGS(zio) \ #define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
@ -303,6 +328,11 @@ typedef struct zio_prop {
boolean_t zp_dedup; boolean_t zp_dedup;
boolean_t zp_dedup_verify; boolean_t zp_dedup_verify;
boolean_t zp_nopwrite; boolean_t zp_nopwrite;
boolean_t zp_encrypt;
boolean_t zp_byteorder;
uint8_t zp_salt[ZIO_DATA_SALT_LEN];
uint8_t zp_iv[ZIO_DATA_IV_LEN];
uint8_t zp_mac[ZIO_DATA_MAC_LEN];
} zio_prop_t; } zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t; typedef struct zio_cksum_report zio_cksum_report_t;
@ -514,8 +544,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags); const blkptr_t *bp, enum zio_flag flags);
extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
uint64_t size, boolean_t *slog); blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size); extern void zio_shrink(zio_t *zio, uint64_t size);
@ -596,8 +626,9 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
/* /*
* Checksum ereport functions * Checksum ereport functions
*/ */
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical); const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
@ -605,7 +636,7 @@ extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */ /* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length, zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info); const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */ /* Called from spa_sync(), but primarily an injection handler */

147
include/sys/zio_crypt.h Normal file
View File

@ -0,0 +1,147 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
*/
#ifndef _SYS_ZIO_CRYPT_H
#define _SYS_ZIO_CRYPT_H
#include <sys/dmu.h>
#include <sys/refcount.h>
#include <sys/crypto/api.h>
#include <sys/nvpair.h>
#include <sys/avl.h>
#include <sys/zio.h>
/* forward declarations */
struct zbookmark_phys;
#define WRAPPING_KEY_LEN 32
#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN
#define WRAPPING_MAC_LEN 16
#define SHA1_DIGEST_LEN 20
#define SHA512_DIGEST_LEN 64
#define SHA512_HMAC_KEYLEN 64
#define MASTER_KEY_MAX_LEN 32
#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM
/* utility macros */
#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY)
#define BYTES_TO_BITS(x) (x * NBBY)
typedef enum zio_crypt_type {
ZC_TYPE_NONE = 0,
ZC_TYPE_CCM,
ZC_TYPE_GCM
} zio_crypt_type_t;
/* table of supported crypto algorithms, modes and keylengths. */
typedef struct zio_crypt_info {
/* mechanism name, needed by ICP */
crypto_mech_name_t ci_mechname;
/* cipher mode type (GCM, CCM) */
zio_crypt_type_t ci_crypt_type;
/* length of the encryption key */
size_t ci_keylen;
/* human-readable name of the encryption alforithm */
char *ci_name;
} zio_crypt_info_t;
extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS];
/* in memory representation of an unwrapped key that is loaded into memory */
typedef struct zio_crypt_key {
/* encryption algorithm */
uint64_t zk_crypt;
/* GUID for uniquely identifying this key. Not encrypted on disk. */
uint64_t zk_guid;
/* buffer for master key */
uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN];
/* buffer for hmac key */
uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN];
/* buffer for currrent encryption key derived from master key */
uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN];
/* current 64 bit salt for deriving an encryption key */
uint8_t zk_salt[ZIO_DATA_SALT_LEN];
/* count of how many times the current salt has been used */
uint64_t zk_salt_count;
/* illumos crypto api current encryption key */
crypto_key_t zk_current_key;
/* template of current encryption key for illumos crypto api */
crypto_ctx_template_t zk_current_tmpl;
/* illumos crypto api current hmac key */
crypto_key_t zk_hmac_key;
/* template of hmac key for illumos crypto api */
crypto_ctx_template_t zk_hmac_tmpl;
/* lock for changing the salt and dependant values */
krwlock_t zk_salt_lock;
} zio_crypt_key_t;
void zio_crypt_key_destroy(zio_crypt_key_t *key);
int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key);
int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);
int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
zio_crypt_key_t *key);
int zio_crypt_generate_iv(uint8_t *ivbuf);
int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv);
void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv);
void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac);
void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac);
void zio_crypt_encode_mac_zil(void *data, uint8_t *mac);
void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac);
void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen);
int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
uint_t datalen, boolean_t byteswap, uint8_t *cksum);
int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
uint_t datalen, boolean_t byteswap, uint8_t *cksum);
int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
uint8_t *digestbuf);
int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac);
int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf,
boolean_t *no_crypt);
int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt);
#endif

View File

@ -96,6 +96,18 @@ extern "C" {
* physical I/O. The nop write feature can handle writes in either * physical I/O. The nop write feature can handle writes in either
* syncing or open context (i.e. zil writes) and as a result is mutually * syncing or open context (i.e. zil writes) and as a result is mutually
* exclusive with dedup. * exclusive with dedup.
*
* Encryption:
* Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
* This stage determines how the encryption metadata is stored in the bp.
* Decryption and MAC verification is performed during zio_decrypt() as a
* transform callback. Encryption is mutually exclusive with nopwrite, because
* blocks with the same plaintext will be encrypted with different salts and
* IV's (if dedup is off), and therefore have different ciphertexts. For dedup
* blocks we deterministically generate the IV and salt by performing an HMAC
* of the plaintext, which is computationally expensive, but allows us to keep
* support for encrypted dedup. See the block comment in zio_crypt.c for
* details.
*/ */
/* /*
@ -110,32 +122,33 @@ enum zio_stage {
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */ ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */ ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */
ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */ ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */
ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */ ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */
ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */ ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */
ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */ ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */
ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */ ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */
ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */ ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */
ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */ ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */
ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */ ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */
ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */ ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */
ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */ ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */
ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */ ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */
ZIO_STAGE_READY = 1 << 18, /* RWFCI */ ZIO_STAGE_READY = 1 << 19, /* RWFCI */
ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */ ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */ ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */
ZIO_STAGE_DONE = 1 << 23 /* RWFCI */ ZIO_STAGE_DONE = 1 << 24 /* RWFCI */
}; };
#define ZIO_INTERLOCK_STAGES \ #define ZIO_INTERLOCK_STAGES \
@ -187,12 +200,14 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \ #define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \ (ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_COMPRESS | \ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_WRITE_BP_INIT) ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \ #define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \ (ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_WRITE_COMPRESS | \ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE) ZIO_STAGE_DVA_ALLOCATE)
@ -207,6 +222,7 @@ enum zio_stage {
ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_ISSUE_ASYNC | \ ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_COMPRESS | \ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_CHECKSUM_GENERATE | \ ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE) ZIO_STAGE_DDT_WRITE)

View File

@ -57,6 +57,7 @@ typedef enum spa_feature {
SPA_FEATURE_SKEIN, SPA_FEATURE_SKEIN,
SPA_FEATURE_EDONR, SPA_FEATURE_EDONR,
SPA_FEATURE_USEROBJ_ACCOUNTING, SPA_FEATURE_USEROBJ_ACCOUNTING,
SPA_FEATURE_ENCRYPTION,
SPA_FEATURES SPA_FEATURES
} spa_feature_t; } spa_feature_t;

View File

@ -71,6 +71,8 @@ typedef enum {
ZFS_DELEG_NOTE_RELEASE, ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF, ZFS_DELEG_NOTE_DIFF,
ZFS_DELEG_NOTE_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK,
ZFS_DELEG_NOTE_LOAD_KEY,
ZFS_DELEG_NOTE_CHANGE_KEY,
ZFS_DELEG_NOTE_NONE ZFS_DELEG_NOTE_NONE
} zfs_deleg_note_t; } zfs_deleg_note_t;

View File

@ -51,9 +51,12 @@ typedef enum {
* ONETIME properties are a sort of conglomeration of READONLY * ONETIME properties are a sort of conglomeration of READONLY
* and INHERIT. They can be set only during object creation, * and INHERIT. They can be set only during object creation,
* after that they are READONLY. If not explicitly set during * after that they are READONLY. If not explicitly set during
* creation, they can be inherited. * creation, they can be inherited. ONETIME_DEFAULT properties
* work the same way, but they will default instead of
* inheriting a value.
*/ */
PROP_ONETIME PROP_ONETIME,
PROP_ONETIME_DEFAULT
} zprop_attr_t; } zprop_attr_t;
typedef struct zfs_index { typedef struct zfs_index {

View File

@ -88,4 +88,11 @@
*/ */
#define MS_OVERLAY 0x00000004 #define MS_OVERLAY 0x00000004
/*
* MS_CRYPT indicates that encryption keys should be loaded if they are not
* already available. This is not defined in glibc, but it is never seen by
* the kernel so it will not cause any problems.
*/
#define MS_CRYPT 0x00000008
#endif /* _LIBSPL_SYS_MOUNT_H */ #endif /* _LIBSPL_SYS_MOUNT_H */

View File

@ -18,6 +18,7 @@ lib_LTLIBRARIES = libzfs.la
USER_C = \ USER_C = \
libzfs_changelist.c \ libzfs_changelist.c \
libzfs_config.c \ libzfs_config.c \
libzfs_crypto.c \
libzfs_dataset.c \ libzfs_dataset.c \
libzfs_diff.c \ libzfs_diff.c \
libzfs_fru.c \ libzfs_fru.c \
@ -30,7 +31,6 @@ USER_C = \
libzfs_util.c libzfs_util.c
KERNEL_C = \ KERNEL_C = \
algs/sha2/sha2.c \
zfeature_common.c \ zfeature_common.c \
zfs_comutil.c \ zfs_comutil.c \
zfs_deleg.c \ zfs_deleg.c \
@ -53,10 +53,12 @@ nodist_libzfs_la_SOURCES = \
libzfs_la_LIBADD = \ libzfs_la_LIBADD = \
$(top_builddir)/lib/libefi/libefi.la \ $(top_builddir)/lib/libefi/libefi.la \
$(top_builddir)/lib/libicp/libicp.la \
$(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libnvpair/libnvpair.la \
$(top_builddir)/lib/libshare/libshare.la \ $(top_builddir)/lib/libshare/libshare.la \
$(top_builddir)/lib/libtpool/libtpool.la \ $(top_builddir)/lib/libtpool/libtpool.la \
$(top_builddir)/lib/libuutil/libuutil.la \ $(top_builddir)/lib/libuutil/libuutil.la \
$(top_builddir)/lib/libzpool/libzpool.la \
$(top_builddir)/lib/libzfs_core/libzfs_core.la $(top_builddir)/lib/libzfs_core/libzfs_core.la
libzfs_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV) libzfs_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV)

View File

@ -199,6 +199,7 @@ changelist_postfix(prop_changelist_t *clp)
boolean_t sharenfs; boolean_t sharenfs;
boolean_t sharesmb; boolean_t sharesmb;
boolean_t mounted; boolean_t mounted;
boolean_t needs_key;
/* /*
* If we are in the global zone, but this dataset is exported * If we are in the global zone, but this dataset is exported
@ -229,9 +230,12 @@ changelist_postfix(prop_changelist_t *clp)
shareopts, sizeof (shareopts), NULL, NULL, 0, shareopts, sizeof (shareopts), NULL, NULL, 0,
B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
needs_key = (zfs_prop_get_int(cn->cn_handle,
ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE);
mounted = zfs_is_mounted(cn->cn_handle, NULL); mounted = zfs_is_mounted(cn->cn_handle, NULL);
if (!mounted && (cn->cn_mounted || if (!mounted && !needs_key && (cn->cn_mounted ||
((sharenfs || sharesmb || clp->cl_waslegacy) && ((sharenfs || sharesmb || clp->cl_waslegacy) &&
(zfs_prop_get_int(cn->cn_handle, (zfs_prop_get_int(cn->cn_handle,
ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) { ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) {

1612
lib/libzfs/libzfs_crypto.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -58,6 +58,7 @@
#include <sys/dnode.h> #include <sys/dnode.h>
#include <sys/spa.h> #include <sys/spa.h>
#include <sys/zap.h> #include <sys/zap.h>
#include <sys/dsl_crypt.h>
#include <libzfs.h> #include <libzfs.h>
#include "zfs_namecheck.h" #include "zfs_namecheck.h"
@ -965,7 +966,7 @@ zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop)
nvlist_t * nvlist_t *
zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl, uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl,
const char *errbuf) boolean_t key_params_ok, const char *errbuf)
{ {
nvpair_t *elem; nvpair_t *elem;
uint64_t intval; uint64_t intval;
@ -1124,7 +1125,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
} }
if (zfs_prop_readonly(prop) && if (zfs_prop_readonly(prop) &&
(!zfs_prop_setonce(prop) || zhp != NULL)) { !(zfs_prop_setonce(prop) && zhp == NULL) &&
!(zfs_prop_encryption_key_param(prop) && key_params_ok)) {
zfs_error_aux(hdl, zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "'%s' is readonly"), dgettext(TEXT_DOMAIN, "'%s' is readonly"),
propname); propname);
@ -1390,6 +1392,48 @@ badlabel:
break; break;
case ZFS_PROP_KEYLOCATION:
if (!zfs_prop_valid_keylocation(strval, B_FALSE)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid keylocation"));
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
if (zhp != NULL) {
uint64_t crypt =
zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
if (crypt == ZIO_CRYPT_OFF &&
strcmp(strval, "none") != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"keylocation must not be 'none' "
"for encrypted datasets"));
(void) zfs_error(hdl, EZFS_BADPROP,
errbuf);
goto error;
} else if (crypt != ZIO_CRYPT_OFF &&
strcmp(strval, "none") == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"keylocation must be 'none' "
"for unencrypted datasets"));
(void) zfs_error(hdl, EZFS_BADPROP,
errbuf);
goto error;
}
}
break;
case ZFS_PROP_PBKDF2_ITERS:
if (intval < MIN_PBKDF2_ITERATIONS) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"minimum pbkdf2 iterations is %u"),
MIN_PBKDF2_ITERATIONS);
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
break;
case ZFS_PROP_UTF8ONLY: case ZFS_PROP_UTF8ONLY:
chosen_utf = (int)intval; chosen_utf = (int)intval;
break; break;
@ -1453,6 +1497,27 @@ badlabel:
break; break;
} }
} }
/* check encryption properties */
if (zhp != NULL) {
int64_t crypt = zfs_prop_get_int(zhp,
ZFS_PROP_ENCRYPTION);
switch (prop) {
case ZFS_PROP_COPIES:
if (crypt != ZIO_CRYPT_OFF && intval > 2) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"encrypted datasets cannot have "
"3 copies"));
(void) zfs_error(hdl, EZFS_BADPROP,
errbuf);
goto error;
}
break;
default:
break;
}
}
} }
/* /*
@ -1609,6 +1674,16 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
} }
break; break;
case EACCES:
if (prop == ZFS_PROP_KEYLOCATION) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"keylocation may only be set on encryption roots"));
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
} else {
(void) zfs_standard_error(hdl, err, errbuf);
}
break;
case EOVERFLOW: case EOVERFLOW:
/* /*
* This platform can't address a volume this big. * This platform can't address a volume this big.
@ -1700,7 +1775,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props, if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props,
zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl,
errbuf)) == NULL) B_FALSE, errbuf)) == NULL)
goto error; goto error;
/* /*
@ -3155,6 +3230,12 @@ parent_name(const char *path, char *buf, size_t buflen)
return (0); return (0);
} }
int
zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen)
{
return (parent_name(zfs_get_name(zhp), buf, buflen));
}
/* /*
* If accept_ancestor is false, then check to make sure that the given path has * If accept_ancestor is false, then check to make sure that the given path has
* a parent, and that it exists. If accept_ancestor is true, then find the * a parent, and that it exists. If accept_ancestor is true, then find the
@ -3373,10 +3454,13 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
int ret; int ret;
uint64_t size = 0; uint64_t size = 0;
uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
char errbuf[1024];
uint64_t zoned; uint64_t zoned;
enum lzc_dataset_type ost; enum lzc_dataset_type ost;
zpool_handle_t *zpool_handle; zpool_handle_t *zpool_handle;
uint8_t *wkeydata = NULL;
uint_t wkeylen = 0;
char errbuf[1024];
char parent[ZFS_MAX_DATASET_NAME_LEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create '%s'"), path); "cannot create '%s'"), path);
@ -3420,7 +3504,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
return (-1); return (-1);
if (props && (props = zfs_valid_proplist(hdl, type, props, if (props && (props = zfs_valid_proplist(hdl, type, props,
zoned, NULL, zpool_handle, errbuf)) == 0) { zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) {
zpool_close(zpool_handle); zpool_close(zpool_handle);
return (-1); return (-1);
} }
@ -3472,15 +3556,21 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
} }
} }
(void) parent_name(path, parent, sizeof (parent));
if (zfs_crypto_create(hdl, parent, props, NULL, &wkeydata,
&wkeylen) != 0) {
nvlist_free(props);
return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
}
/* create the dataset */ /* create the dataset */
ret = lzc_create(path, ost, props); ret = lzc_create(path, ost, props, wkeydata, wkeylen);
nvlist_free(props); nvlist_free(props);
if (wkeydata != NULL)
free(wkeydata);
/* check for failure */ /* check for failure */
if (ret != 0) { if (ret != 0) {
char parent[ZFS_MAX_DATASET_NAME_LEN];
(void) parent_name(path, parent, sizeof (parent));
switch (errno) { switch (errno) {
case ENOENT: case ENOENT:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@ -3497,6 +3587,13 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
"pool must be upgraded to set this " "pool must be upgraded to set this "
"property or value")); "property or value"));
return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
case EACCES:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"encryption root's key is not loaded "
"or provided"));
return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
#ifdef _ILP32 #ifdef _ILP32
case EOVERFLOW: case EOVERFLOW:
/* /*
@ -3691,10 +3788,15 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
type = ZFS_TYPE_FILESYSTEM; type = ZFS_TYPE_FILESYSTEM;
} }
if ((props = zfs_valid_proplist(hdl, type, props, zoned, if ((props = zfs_valid_proplist(hdl, type, props, zoned,
zhp, zhp->zpool_hdl, errbuf)) == NULL) zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL)
return (-1); return (-1);
} }
if (zfs_crypto_clone_check(hdl, zhp, parent, props) != 0) {
nvlist_free(props);
return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
}
ret = lzc_clone(target, zhp->zfs_name, props); ret = lzc_clone(target, zhp->zfs_name, props);
nvlist_free(props); nvlist_free(props);
@ -3847,7 +3949,7 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
if (props != NULL && if (props != NULL &&
(props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
props, B_FALSE, NULL, zpool_hdl, errbuf)) == NULL) { props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) {
zpool_close(zpool_hdl); zpool_close(zpool_hdl);
return (-1); return (-1);
} }
@ -4223,6 +4325,18 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive,
"a child dataset already has a snapshot " "a child dataset already has a snapshot "
"with the new name")); "with the new name"));
(void) zfs_error(hdl, EZFS_EXISTS, errbuf); (void) zfs_error(hdl, EZFS_EXISTS, errbuf);
} else if (errno == EACCES) {
if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) ==
ZIO_CRYPT_OFF) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot rename an unencrypted dataset to "
"be a decendent of an encrypted one"));
} else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot move encryption child outside of "
"its encryption root"));
}
(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
} else { } else {
(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
} }

View File

@ -109,6 +109,11 @@ get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj,
"The sys_config privilege or diff delegated permission " "The sys_config privilege or diff delegated permission "
"is needed\nto discover path names")); "is needed\nto discover path names"));
return (-1); return (-1);
} else if (di->zerr == EACCES) {
(void) snprintf(di->errbuf, sizeof (di->errbuf),
dgettext(TEXT_DOMAIN,
"Key must be loaded to discover path names"));
return (-1);
} else { } else {
(void) snprintf(di->errbuf, sizeof (di->errbuf), (void) snprintf(di->errbuf, sizeof (di->errbuf),
dgettext(TEXT_DOMAIN, dgettext(TEXT_DOMAIN,

View File

@ -78,6 +78,7 @@
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/vfs.h> #include <sys/vfs.h>
#include <sys/dsl_crypt.h>
#include <libzfs.h> #include <libzfs.h>
@ -465,6 +466,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
char mntopts[MNT_LINE_MAX]; char mntopts[MNT_LINE_MAX];
char overlay[ZFS_MAXPROPLEN]; char overlay[ZFS_MAXPROPLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl; libzfs_handle_t *hdl = zhp->zfs_hdl;
uint64_t keystatus;
int remount = 0, rc; int remount = 0, rc;
if (options == NULL) { if (options == NULL) {
@ -501,6 +503,39 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
mountpoint)); mountpoint));
} }
/*
* If the filesystem is encrypted the key must be loaded in order to
* mount. If the key isn't loaded, the MS_CRYPT flag decides whether
* or not we attempt to load the keys. Note: we must call
* zfs_refresh_properties() here since some callers of this function
* (most notably zpool_enable_datasets()) may implicitly load our key
* by loading the parent's key first.
*/
if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
zfs_refresh_properties(zhp);
keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
/*
* If the key is unavailable and MS_CRYPT is set give the
* user a chance to enter the key. Otherwise just fail
* immediately.
*/
if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
if (flags & MS_CRYPT) {
rc = zfs_crypto_load_key(zhp, B_FALSE, NULL);
if (rc)
return (rc);
} else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"encryption key not loaded"));
return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
mountpoint));
}
}
}
/* /*
* Append zfsutil option so the mount helper allow the mount * Append zfsutil option so the mount helper allow the mount
*/ */
@ -1136,6 +1171,12 @@ mount_cb(zfs_handle_t *zhp, void *data)
return (0); return (0);
} }
if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
ZFS_KEYSTATUS_UNAVAILABLE) {
zfs_close(zhp);
return (0);
}
/* /*
* If this filesystem is inconsistent and has a receive resume * If this filesystem is inconsistent and has a receive resume
* token, we can not mount it. * token, we can not mount it.
@ -1225,6 +1266,14 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
ret = 0; ret = 0;
for (i = 0; i < cb.cb_used; i++) { for (i = 0; i < cb.cb_used; i++) {
/*
* don't attempt to mount encrypted datasets with
* unloaded keys
*/
if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) ==
ZFS_KEYSTATUS_UNAVAILABLE)
continue;
if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
ret = -1; ret = -1;
else else

View File

@ -1160,6 +1160,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zfs_cmd_t zc = {"\0"}; zfs_cmd_t zc = {"\0"};
nvlist_t *zc_fsprops = NULL; nvlist_t *zc_fsprops = NULL;
nvlist_t *zc_props = NULL; nvlist_t *zc_props = NULL;
nvlist_t *hidden_args = NULL;
uint8_t *wkeydata = NULL;
uint_t wkeylen = 0;
char msg[1024]; char msg[1024];
int ret = -1; int ret = -1;
@ -1190,17 +1193,34 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
strcmp(zonestr, "on") == 0); strcmp(zonestr, "on") == 0);
if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
fsprops, zoned, NULL, NULL, msg)) == NULL) { fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
goto create_failed; goto create_failed;
} }
if (!zc_props && if (!zc_props &&
(nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
goto create_failed; goto create_failed;
} }
if (zfs_crypto_create(hdl, NULL, zc_fsprops, props,
&wkeydata, &wkeylen) != 0) {
zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
goto create_failed;
}
if (nvlist_add_nvlist(zc_props, if (nvlist_add_nvlist(zc_props,
ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
goto create_failed; goto create_failed;
} }
if (wkeydata != NULL) {
if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
goto create_failed;
if (nvlist_add_uint8_array(hidden_args, "wkeydata",
wkeydata, wkeylen) != 0)
goto create_failed;
if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
hidden_args) != 0)
goto create_failed;
}
} }
if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
@ -1213,6 +1233,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zcmd_free_nvlists(&zc); zcmd_free_nvlists(&zc);
nvlist_free(zc_props); nvlist_free(zc_props);
nvlist_free(zc_fsprops); nvlist_free(zc_fsprops);
nvlist_free(hidden_args);
if (wkeydata != NULL)
free(wkeydata);
switch (errno) { switch (errno) {
case EBUSY: case EBUSY:
@ -1282,6 +1305,9 @@ create_failed:
zcmd_free_nvlists(&zc); zcmd_free_nvlists(&zc);
nvlist_free(zc_props); nvlist_free(zc_props);
nvlist_free(zc_fsprops); nvlist_free(zc_fsprops);
nvlist_free(hidden_args);
if (wkeydata != NULL)
free(wkeydata);
return (ret); return (ret);
} }

View File

@ -61,6 +61,7 @@
#include "libzfs_impl.h" #include "libzfs_impl.h"
#include <zlib.h> #include <zlib.h>
#include <sys/zio_checksum.h> #include <sys/zio_checksum.h>
#include <sys/dsl_crypt.h>
#include <sys/ddt.h> #include <sys/ddt.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/sha2.h> #include <sys/sha2.h>
@ -336,11 +337,9 @@ cksummer(void *arg)
struct drr_object *drro = &drr->drr_u.drr_object; struct drr_object *drro = &drr->drr_u.drr_object;
if (drro->drr_bonuslen > 0) { if (drro->drr_bonuslen > 0) {
(void) ssread(buf, (void) ssread(buf,
P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), DRR_OBJECT_PAYLOAD_SIZE(drro), ofp);
ofp);
} }
if (dump_record(drr, buf, if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro),
P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
&stream_cksum, outfd) != 0) &stream_cksum, outfd) != 0)
goto out; goto out;
break; break;
@ -349,8 +348,8 @@ cksummer(void *arg)
case DRR_SPILL: case DRR_SPILL:
{ {
struct drr_spill *drrs = &drr->drr_u.drr_spill; struct drr_spill *drrs = &drr->drr_u.drr_spill;
(void) ssread(buf, drrs->drr_length, ofp); (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp);
if (dump_record(drr, buf, drrs->drr_length, if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs),
&stream_cksum, outfd) != 0) &stream_cksum, outfd) != 0)
goto out; goto out;
break; break;
@ -380,7 +379,7 @@ cksummer(void *arg)
if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
zero_cksum) || zero_cksum) ||
!DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) {
SHA2_CTX ctx; SHA2_CTX ctx;
zio_cksum_t tmpsha256; zio_cksum_t tmpsha256;
@ -397,7 +396,7 @@ cksummer(void *arg)
drrw->drr_key.ddk_cksum.zc_word[3] = drrw->drr_key.ddk_cksum.zc_word[3] =
BE_64(tmpsha256.zc_word[3]); BE_64(tmpsha256.zc_word[3]);
drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
} }
dataref.ref_guid = drrw->drr_toguid; dataref.ref_guid = drrw->drr_toguid;
@ -426,8 +425,7 @@ cksummer(void *arg)
wbr_drrr->drr_checksumtype = wbr_drrr->drr_checksumtype =
drrw->drr_checksumtype; drrw->drr_checksumtype;
wbr_drrr->drr_checksumflags = wbr_drrr->drr_flags = drrw->drr_flags;
drrw->drr_checksumflags;
wbr_drrr->drr_key.ddk_cksum = wbr_drrr->drr_key.ddk_cksum =
drrw->drr_key.ddk_cksum; drrw->drr_key.ddk_cksum;
wbr_drrr->drr_key.ddk_prop = wbr_drrr->drr_key.ddk_prop =
@ -466,6 +464,14 @@ cksummer(void *arg)
break; break;
} }
case DRR_OBJECT_RANGE:
{
if (dump_record(drr, NULL, 0, &stream_cksum,
outfd) != 0)
goto out;
break;
}
default: default:
(void) fprintf(stderr, "INVALID record type 0x%x\n", (void) fprintf(stderr, "INVALID record type 0x%x\n",
drr->drr_type); drr->drr_type);
@ -614,6 +620,7 @@ typedef struct send_data {
const char *fsname; const char *fsname;
const char *fromsnap; const char *fromsnap;
const char *tosnap; const char *tosnap;
boolean_t raw;
boolean_t recursive; boolean_t recursive;
boolean_t verbose; boolean_t verbose;
boolean_t seenfrom; boolean_t seenfrom;
@ -635,6 +642,7 @@ typedef struct send_data {
* "snapprops" -> { name (lastname) -> { name -> value } } * "snapprops" -> { name (lastname) -> { name -> value } }
* *
* "origin" -> number (guid) (if clone) * "origin" -> number (guid) (if clone)
* "is_encroot" -> boolean
* "sent" -> boolean (not on-disk) * "sent" -> boolean (not on-disk)
* } * }
* } * }
@ -812,7 +820,7 @@ static int
send_iterate_fs(zfs_handle_t *zhp, void *arg) send_iterate_fs(zfs_handle_t *zhp, void *arg)
{ {
send_data_t *sd = arg; send_data_t *sd = arg;
nvlist_t *nvfs, *nv; nvlist_t *nvfs = NULL, *nv = NULL;
int rv = 0; int rv = 0;
uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
uint64_t fromsnap_txg_save = sd->fromsnap_txg; uint64_t fromsnap_txg_save = sd->fromsnap_txg;
@ -878,8 +886,37 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
/* iterate over props */ /* iterate over props */
VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
send_iterate_prop(zhp, nv); send_iterate_prop(zhp, nv);
if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
boolean_t encroot;
/* determine if this dataset is an encryption root */
if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
rv = -1;
goto out;
}
if (encroot)
VERIFY(0 == nvlist_add_boolean(nvfs, "is_encroot"));
/*
* Encrypted datasets can only be sent with properties if
* the raw flag is specified because the receive side doesn't
* currently have a mechanism for recursively asking the user
* for new encryption parameters.
*/
if (!sd->raw) {
(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
"cannot send %s@%s: encrypted dataset %s may not "
"be sent with properties without the raw flag\n"),
sd->fsname, sd->tosnap, zhp->zfs_name);
rv = -1;
goto out;
}
}
VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
nvlist_free(nv);
/* iterate over snaps, and set sd->parent_fromsnap_guid */ /* iterate over snaps, and set sd->parent_fromsnap_guid */
sd->parent_fromsnap_guid = 0; sd->parent_fromsnap_guid = 0;
@ -895,7 +932,6 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
(void) snprintf(guidstring, sizeof (guidstring), (void) snprintf(guidstring, sizeof (guidstring),
"0x%llx", (longlong_t)guid); "0x%llx", (longlong_t)guid);
VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
nvlist_free(nvfs);
/* iterate over children */ /* iterate over children */
if (sd->recursive) if (sd->recursive)
@ -905,6 +941,8 @@ out:
sd->parent_fromsnap_guid = parent_fromsnap_guid_save; sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
sd->fromsnap_txg = fromsnap_txg_save; sd->fromsnap_txg = fromsnap_txg_save;
sd->tosnap_txg = tosnap_txg_save; sd->tosnap_txg = tosnap_txg_save;
nvlist_free(nv);
nvlist_free(nvfs);
zfs_close(zhp); zfs_close(zhp);
return (rv); return (rv);
@ -912,7 +950,7 @@ out:
static int static int
gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
const char *tosnap, boolean_t recursive, boolean_t verbose, const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t verbose,
nvlist_t **nvlp, avl_tree_t **avlp) nvlist_t **nvlp, avl_tree_t **avlp)
{ {
zfs_handle_t *zhp; zfs_handle_t *zhp;
@ -928,6 +966,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
sd.fromsnap = fromsnap; sd.fromsnap = fromsnap;
sd.tosnap = tosnap; sd.tosnap = tosnap;
sd.recursive = recursive; sd.recursive = recursive;
sd.raw = raw;
sd.verbose = verbose; sd.verbose = verbose;
if ((error = send_iterate_fs(zhp, &sd)) != 0) { if ((error = send_iterate_fs(zhp, &sd)) != 0) {
@ -959,7 +998,7 @@ typedef struct send_dump_data {
uint64_t prevsnap_obj; uint64_t prevsnap_obj;
boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
boolean_t large_block, compress; boolean_t large_block, compress, raw;
int outfd; int outfd;
boolean_t err; boolean_t err;
nvlist_t *fss; nvlist_t *fss;
@ -1081,6 +1120,11 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
"not an earlier snapshot from the same fs")); "not an earlier snapshot from the same fs"));
return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
case EACCES:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"source key must be loaded"));
return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
case ENOENT: case ENOENT:
if (zfs_dataset_exists(hdl, zc.zc_name, if (zfs_dataset_exists(hdl, zc.zc_name,
ZFS_TYPE_SNAPSHOT)) { ZFS_TYPE_SNAPSHOT)) {
@ -1263,6 +1307,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
flags |= LZC_SEND_FLAG_EMBED_DATA; flags |= LZC_SEND_FLAG_EMBED_DATA;
if (sdd->compress) if (sdd->compress)
flags |= LZC_SEND_FLAG_COMPRESS; flags |= LZC_SEND_FLAG_COMPRESS;
if (sdd->raw)
flags |= LZC_SEND_FLAG_RAW;
if (!sdd->doall && !isfromsnap && !istosnap) { if (!sdd->doall && !isfromsnap && !istosnap) {
if (sdd->replicate) { if (sdd->replicate) {
@ -1646,6 +1692,8 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (flags->compress || nvlist_exists(resume_nvl, "compressok")) if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
lzc_flags |= LZC_SEND_FLAG_COMPRESS; lzc_flags |= LZC_SEND_FLAG_COMPRESS;
if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
lzc_flags |= LZC_SEND_FLAG_RAW;
if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
@ -1723,6 +1771,11 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
switch (error) { switch (error) {
case 0: case 0:
return (0); return (0);
case EACCES:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"source key must be loaded"));
return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
case EXDEV: case EXDEV:
case ENOENT: case ENOENT:
case EDQUOT: case EDQUOT:
@ -1801,7 +1854,14 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
} }
} }
if (flags->dedup && !flags->dryrun) { /*
* Start the dedup thread if this is a dedup stream. We do not bother
* doing this if this a raw send of an encrypted dataset with dedup off
* because normal encrypted blocks won't dedup.
*/
if (flags->dedup && !flags->dryrun && !(flags->raw &&
zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) {
featureflags |= (DMU_BACKUP_FEATURE_DEDUP | featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_BACKUP_FEATURE_DEDUPPROPS);
if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) { if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) {
@ -1842,10 +1902,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
VERIFY(0 == nvlist_add_boolean(hdrnv, VERIFY(0 == nvlist_add_boolean(hdrnv,
"not_recursive")); "not_recursive"));
} }
if (flags->raw) {
VERIFY(0 == nvlist_add_boolean(hdrnv, "raw"));
}
err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
fromsnap, tosnap, flags->replicate, flags->verbose, fromsnap, tosnap, flags->replicate, flags->raw,
&fss, &fsavl); flags->verbose, &fss, &fsavl);
if (err) if (err)
goto err_out; goto err_out;
VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
@ -1914,6 +1977,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sdd.large_block = flags->largeblock; sdd.large_block = flags->largeblock;
sdd.embed_data = flags->embed_data; sdd.embed_data = flags->embed_data;
sdd.compress = flags->compress; sdd.compress = flags->compress;
sdd.raw = flags->raw;
sdd.filter_cb = filter_func; sdd.filter_cb = filter_func;
sdd.filter_cb_arg = cb_arg; sdd.filter_cb_arg = cb_arg;
if (debugnvp) if (debugnvp)
@ -2075,6 +2139,11 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
} }
return (zfs_error(hdl, EZFS_NOENT, errbuf)); return (zfs_error(hdl, EZFS_NOENT, errbuf));
case EACCES:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"dataset key must be loaded"));
return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
case EBUSY: case EBUSY:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"target is busy; if a filesystem, " "target is busy; if a filesystem, "
@ -2165,6 +2234,63 @@ recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
return (0); return (0);
} }
/*
* Returns the grand origin (origin of origin of origin...) of a given handle.
* If this dataset is not a clone, it simply returns a copy of the original
* handle.
*/
static zfs_handle_t *
recv_open_grand_origin(zfs_handle_t *zhp)
{
char origin[ZFS_MAX_DATASET_NAME_LEN];
zprop_source_t src;
zfs_handle_t *ozhp = zfs_handle_dup(zhp);
while (ozhp != NULL) {
if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
break;
(void) zfs_close(ozhp);
ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
}
return (ozhp);
}
static int
recv_rename_impl(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
int err;
zfs_handle_t *ozhp = NULL;
/*
* Attempt to rename the dataset. If it fails with EACCES we have
* attempted to rename the dataset outside of its encryption root.
* Force the dataset to become an encryption root and try again.
*/
err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
if (err == EACCES) {
ozhp = recv_open_grand_origin(zhp);
if (ozhp == NULL) {
err = ENOENT;
goto out;
}
err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
NULL, NULL, 0);
if (err != 0)
goto out;
err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
}
out:
if (ozhp != NULL)
zfs_close(ozhp);
return (err);
}
static int static int
recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
int baselen, char *newname, recvflags_t *flags) int baselen, char *newname, recvflags_t *flags)
@ -2172,20 +2298,23 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
static int seq; static int seq;
zfs_cmd_t zc = {"\0"}; zfs_cmd_t zc = {"\0"};
int err; int err;
prop_changelist_t *clp; prop_changelist_t *clp = NULL;
zfs_handle_t *zhp; zfs_handle_t *zhp = NULL;
zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
if (zhp == NULL) if (zhp == NULL) {
return (-1); err = -1;
goto out;
}
clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
flags->force ? MS_FORCE : 0); flags->force ? MS_FORCE : 0);
zfs_close(zhp); if (clp == NULL) {
if (clp == NULL) err = -1;
return (-1); goto out;
}
err = changelist_prefix(clp); err = changelist_prefix(clp);
if (err) if (err)
return (err); goto out;
zc.zc_objset_type = DMU_OST_ZFS; zc.zc_objset_type = DMU_OST_ZFS;
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
@ -2199,7 +2328,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
(void) printf("attempting rename %s to %s\n", (void) printf("attempting rename %s to %s\n",
zc.zc_name, zc.zc_value); zc.zc_name, zc.zc_value);
} }
err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); err = recv_rename_impl(zhp, &zc);
if (err == 0) if (err == 0)
changelist_rename(clp, name, tryname); changelist_rename(clp, name, tryname);
} else { } else {
@ -2217,7 +2346,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
(void) printf("failed - trying rename %s to %s\n", (void) printf("failed - trying rename %s to %s\n",
zc.zc_name, zc.zc_value); zc.zc_name, zc.zc_value);
} }
err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); err = recv_rename_impl(zhp, &zc);
if (err == 0) if (err == 0)
changelist_rename(clp, name, newname); changelist_rename(clp, name, newname);
if (err && flags->verbose) { if (err && flags->verbose) {
@ -2233,7 +2362,62 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
} }
(void) changelist_postfix(clp); (void) changelist_postfix(clp);
changelist_free(clp);
out:
if (clp != NULL)
changelist_free(clp);
if (zhp != NULL)
zfs_close(zhp);
return (err);
}
static int
recv_promote(libzfs_handle_t *hdl, const char *fsname,
const char *origin_fsname, recvflags_t *flags)
{
int err;
zfs_cmd_t zc = {"\0"};
zfs_handle_t *zhp = NULL, *ozhp = NULL;
if (flags->verbose)
(void) printf("promoting %s\n", fsname);
(void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
/*
* Attempt to promote the dataset. If it fails with EACCES the
* promotion would cause this dataset to leave its encryption root.
* Force the origin to become an encryption root and try again.
*/
err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
if (err == EACCES) {
zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
if (zhp == NULL) {
err = -1;
goto out;
}
ozhp = recv_open_grand_origin(zhp);
if (ozhp == NULL) {
err = -1;
goto out;
}
err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
NULL, NULL, 0);
if (err != 0)
goto out;
err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
}
out:
if (zhp != NULL)
zfs_close(zhp);
if (ozhp != NULL)
zfs_close(ozhp);
return (err); return (err);
} }
@ -2435,6 +2619,140 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
return (rv); return (rv);
} }
/*
* This function reestablishes the heirarchy of encryption roots after a
* recursive incremental receive has completed. This must be done after the
* second call to recv_incremental_replication() has renamed and promoted all
* sent datasets to their final locations in the dataset heriarchy.
*/
static int
recv_fix_encryption_heirarchy(libzfs_handle_t *hdl, const char *destname,
nvlist_t *stream_nv, avl_tree_t *stream_avl)
{
int err;
nvpair_t *fselem = NULL;
nvlist_t *stream_fss;
VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss));
while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
zfs_handle_t *zhp = NULL;
uint64_t crypt;
nvlist_t *snaps, *props, *stream_nvfs = NULL;
nvpair_t *snapel = NULL;
boolean_t is_encroot, is_clone, stream_encroot;
char *cp;
char *stream_keylocation = NULL;
char keylocation[MAXNAMELEN];
char fsname[ZFS_MAX_DATASET_NAME_LEN];
keylocation[0] = '\0';
VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs));
VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps));
VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props));
stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
/* find a snapshot from the stream that exists locally */
err = ENOENT;
while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
uint64_t guid;
VERIFY(0 == nvpair_value_uint64(snapel, &guid));
err = guid_to_name(hdl, destname, guid, B_FALSE,
fsname);
if (err == 0)
break;
}
if (err != 0)
continue;
cp = strchr(fsname, '@');
if (cp != NULL)
*cp = '\0';
zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
if (zhp == NULL) {
err = ENOENT;
goto error;
}
crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
(void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
/* we don't need to do anything for unencrypted filesystems */
if (crypt == ZIO_CRYPT_OFF) {
zfs_close(zhp);
continue;
}
/*
* If the dataset is flagged as an encryption root, was not
* received as a clone and is not currently an encryption root,
* force it to become one. Fixup the keylocation if necessary.
*/
if (stream_encroot) {
if (!is_clone && !is_encroot) {
err = lzc_change_key(fsname,
DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
if (err != 0) {
zfs_close(zhp);
goto error;
}
}
VERIFY(0 == nvlist_lookup_string(props,
zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
&stream_keylocation));
/*
* Refresh the properties in case the call to
* lzc_change_key() changed the value.
*/
zfs_refresh_properties(zhp);
err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
keylocation, sizeof (keylocation), NULL, NULL,
0, B_TRUE);
if (err != 0) {
zfs_close(zhp);
goto error;
}
if (strcmp(keylocation, stream_keylocation) != 0) {
err = zfs_prop_set(zhp,
zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
stream_keylocation);
if (err != 0) {
zfs_close(zhp);
goto error;
}
}
}
/*
* If the dataset is not flagged as an encryption root and is
* currently an encryption root, force it to inherit from its
* parent.
*/
if (!stream_encroot && is_encroot) {
err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
NULL, NULL, 0);
if (err != 0) {
zfs_close(zhp);
goto error;
}
}
zfs_close(zhp);
}
return (0);
error:
return (err);
}
static int static int
recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
@ -2464,7 +2782,7 @@ again:
VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0)); VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
recursive, B_FALSE, &local_nv, &local_avl)) != 0) recursive, B_TRUE, B_FALSE, &local_nv, &local_avl)) != 0)
return (error); return (error);
/* /*
@ -2513,22 +2831,15 @@ again:
stream_originguid, originguid)) { stream_originguid, originguid)) {
case 1: { case 1: {
/* promote it! */ /* promote it! */
zfs_cmd_t zc = {"\0"};
nvlist_t *origin_nvfs; nvlist_t *origin_nvfs;
char *origin_fsname; char *origin_fsname;
if (flags->verbose)
(void) printf("promoting %s\n", fsname);
origin_nvfs = fsavl_find(local_avl, originguid, origin_nvfs = fsavl_find(local_avl, originguid,
NULL); NULL);
VERIFY(0 == nvlist_lookup_string(origin_nvfs, VERIFY(0 == nvlist_lookup_string(origin_nvfs,
"name", &origin_fsname)); "name", &origin_fsname));
(void) strlcpy(zc.zc_value, origin_fsname, error = recv_promote(hdl, fsname, origin_fsname,
sizeof (zc.zc_value)); flags);
(void) strlcpy(zc.zc_name, fsname,
sizeof (zc.zc_name));
error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
if (error == 0) if (error == 0)
progress = B_TRUE; progress = B_TRUE;
break; break;
@ -2744,7 +3055,7 @@ doagain:
goto again; goto again;
} }
return (needagain); return (needagain || error != 0);
} }
static int static int
@ -2765,7 +3076,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
int error; int error;
boolean_t anyerr = B_FALSE; boolean_t anyerr = B_FALSE;
boolean_t softerr = B_FALSE; boolean_t softerr = B_FALSE;
boolean_t recursive; boolean_t recursive, raw;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive")); "cannot receive"));
@ -2789,6 +3100,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
ENOENT); ENOENT);
raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
if (recursive && strchr(destname, '@')) { if (recursive && strchr(destname, '@')) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@ -2944,6 +3256,11 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
stream_nv, stream_avl, NULL); stream_nv, stream_avl, NULL);
} }
if (raw && softerr == 0) {
softerr = recv_fix_encryption_heirarchy(hdl, destname,
stream_nv, stream_avl);
}
out: out:
fsavl_destroy(stream_avl); fsavl_destroy(stream_avl);
nvlist_free(stream_nv); nvlist_free(stream_nv);
@ -3194,7 +3511,7 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, boolean_t zoned,
if (toplevel) { if (toplevel) {
/* convert override strings properties to native */ /* convert override strings properties to native */
if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET, if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
oprops, zoned, zhp, zpool_hdl, errbuf)) == NULL) { oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
ret = zfs_error(hdl, EZFS_BADPROP, errbuf); ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error; goto error;
} }
@ -3247,6 +3564,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
char destsnap[MAXPATHLEN * 2]; char destsnap[MAXPATHLEN * 2];
char origin[MAXNAMELEN]; char origin[MAXNAMELEN];
char name[MAXPATHLEN]; char name[MAXPATHLEN];
char tmp_keylocation[MAXNAMELEN];
nvlist_t *rcvprops = NULL; /* props received from the send stream */ nvlist_t *rcvprops = NULL; /* props received from the send stream */
nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */ nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
nvlist_t *origprops = NULL; /* original props (if destination exists) */ nvlist_t *origprops = NULL; /* original props (if destination exists) */
@ -3256,6 +3574,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
begin_time = time(NULL); begin_time = time(NULL);
bzero(origin, MAXNAMELEN); bzero(origin, MAXNAMELEN);
bzero(tmp_keylocation, MAXNAMELEN);
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive")); "cannot receive"));
@ -3264,6 +3583,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
ENOENT); ENOENT);
if (stream_avl != NULL) { if (stream_avl != NULL) {
char *keylocation = NULL;
nvlist_t *lookup = NULL; nvlist_t *lookup = NULL;
nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
&snapname); &snapname);
@ -3276,6 +3596,22 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
newprops = B_TRUE; newprops = B_TRUE;
} }
/*
* The keylocation property may only be set on encryption roots,
* but this dataset might not become an encryption root until
* recv_fix_encryption_heirarchy() is called. That function
* will fixup the keylocation anyway, so we temporarily unset
* the keylocation for now to avoid any errors from the receive
* ioctl.
*/
err = nvlist_lookup_string(rcvprops,
zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
if (err == 0) {
strcpy(tmp_keylocation, keylocation);
(void) nvlist_remove_all(rcvprops,
zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
}
if (flags->canmountoff) { if (flags->canmountoff) {
VERIFY(0 == nvlist_add_uint64(rcvprops, VERIFY(0 == nvlist_add_uint64(rcvprops,
zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
@ -3397,6 +3733,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_RESUMING; DMU_BACKUP_FEATURE_RESUMING;
boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_RAW;
stream_wantsnewfs = (drrb->drr_fromguid == 0 || stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
(drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
@ -3503,6 +3841,26 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
goto out; goto out;
} }
/*
* zfs recv -F cant be used to blow away an existing
* encrypted filesystem. This is because it would require
* the dsl dir to point to the the new key (or lack of a
* key) and the old key at the same time. The -F flag may
* still be used for deleting intermediate snapshots that
* would otherwise prevent the receive from working.
*/
if (stream_wantsnewfs && flags->force &&
zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
ZIO_CRYPT_OFF) {
zfs_close(zhp);
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"zfs receive -F cannot be used to "
"destroy an encrypted filesystem"));
err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
goto out;
}
if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
stream_wantsnewfs) { stream_wantsnewfs) {
/* We can't do online recv in this case */ /* We can't do online recv in this case */
@ -3541,6 +3899,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
zfs_close(zhp); zfs_close(zhp);
} else { } else {
zfs_handle_t *zhp;
/* /*
* Destination filesystem does not exist. Therefore we better * Destination filesystem does not exist. Therefore we better
* be creating a new filesystem (either from a full backup, or * be creating a new filesystem (either from a full backup, or
@ -3569,7 +3929,39 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
goto out; goto out;
} }
/*
* It is invalid to receive a properties stream that was
* unencrypted on the send side as a child of an encrypted
* parent. Technically there is nothing preventing this, but
* it would mean that the encryption=off property which is
* locally set on the send side would not be received correctly.
* We can infer encryption=off if the stream is not raw and
* properties were included since the send side will only ever
* send the encryption property in a raw nvlist header.
*/
if (!raw && rcvprops != NULL) {
uint64_t crypt;
zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
if (zhp == NULL) {
err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
goto out;
}
crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
zfs_close(zhp);
if (crypt != ZIO_CRYPT_OFF) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"parent '%s' must not be encrypted to "
"receive unenecrypted property"), name);
err = zfs_error(hdl, EZFS_BADPROP, errbuf);
goto out;
}
}
newfs = B_TRUE; newfs = B_TRUE;
*cp = '/';
} }
if (flags->verbose) { if (flags->verbose) {
@ -3601,7 +3993,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
goto out; goto out;
err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops, err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops,
origin, flags->force, flags->resumable, infd, drr_noswap, origin, flags->force, flags->resumable, raw, infd, drr_noswap,
cleanup_fd, &read_bytes, &errflags, action_handlep, &prop_errors); cleanup_fd, &read_bytes, &errflags, action_handlep, &prop_errors);
ioctl_errno = ioctl_err; ioctl_errno = ioctl_err;
prop_errflags = errflags; prop_errflags = errflags;
@ -3672,7 +4064,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
* get a strange "does not exist" error message. * get a strange "does not exist" error message.
*/ */
*cp = '\0'; *cp = '\0';
if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
B_FALSE, &local_nv, &local_avl) == 0) { B_FALSE, &local_nv, &local_avl) == 0) {
*cp = '@'; *cp = '@';
fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
@ -3708,6 +4100,20 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
"since most recent snapshot"), name); "since most recent snapshot"), name);
(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
break; break;
case EACCES:
if (raw && stream_wantsnewfs) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"failed to create encryption key"));
} else if (raw && !stream_wantsnewfs) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"encryption key does not match "
"existing key"));
} else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"inherited key must be loaded"));
}
(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
break;
case EEXIST: case EEXIST:
cp = strchr(destsnap, '@'); cp = strchr(destsnap, '@');
if (newfs) { if (newfs) {
@ -3816,6 +4222,11 @@ out:
if (prop_errors != NULL) if (prop_errors != NULL)
nvlist_free(prop_errors); nvlist_free(prop_errors);
if (tmp_keylocation[0] != '\0') {
VERIFY(0 == nvlist_add_string(rcvprops,
zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation));
}
if (newprops) if (newprops)
nvlist_free(rcvprops); nvlist_free(rcvprops);

View File

@ -264,6 +264,8 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_ACTIVE_POOL: case EZFS_ACTIVE_POOL:
return (dgettext(TEXT_DOMAIN, "pool is imported on a " return (dgettext(TEXT_DOMAIN, "pool is imported on a "
"different host")); "different host"));
case EZFS_CRYPTOFAILED:
return (dgettext(TEXT_DOMAIN, "encryption failure"));
case EZFS_UNKNOWN: case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error")); return (dgettext(TEXT_DOMAIN, "unknown error"));
default: default:

View File

@ -175,34 +175,49 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name,
} }
out: out:
fnvlist_pack_free(packed, size); if (packed != NULL)
fnvlist_pack_free(packed, size);
free((void *)(uintptr_t)zc.zc_nvlist_dst); free((void *)(uintptr_t)zc.zc_nvlist_dst);
return (error); return (error);
} }
int int
lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
uint8_t *wkeydata, uint_t wkeylen)
{ {
int error; int error;
nvlist_t *hidden_args = NULL;
nvlist_t *args = fnvlist_alloc(); nvlist_t *args = fnvlist_alloc();
fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
if (props != NULL) if (props != NULL)
fnvlist_add_nvlist(args, "props", props); fnvlist_add_nvlist(args, "props", props);
if (wkeydata != NULL) {
hidden_args = fnvlist_alloc();
fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
wkeylen);
fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
}
error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
nvlist_free(hidden_args);
nvlist_free(args); nvlist_free(args);
return (error); return (error);
} }
int int
lzc_clone(const char *fsname, const char *origin, lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
nvlist_t *props)
{ {
int error; int error;
nvlist_t *hidden_args = NULL;
nvlist_t *args = fnvlist_alloc(); nvlist_t *args = fnvlist_alloc();
fnvlist_add_string(args, "origin", origin); fnvlist_add_string(args, "origin", origin);
if (props != NULL) if (props != NULL)
fnvlist_add_nvlist(args, "props", props); fnvlist_add_nvlist(args, "props", props);
error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
nvlist_free(hidden_args);
nvlist_free(args); nvlist_free(args);
return (error); return (error);
} }
@ -532,6 +547,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
fnvlist_add_boolean(args, "embedok"); fnvlist_add_boolean(args, "embedok");
if (flags & LZC_SEND_FLAG_COMPRESS) if (flags & LZC_SEND_FLAG_COMPRESS)
fnvlist_add_boolean(args, "compressok"); fnvlist_add_boolean(args, "compressok");
if (flags & LZC_SEND_FLAG_RAW)
fnvlist_add_boolean(args, "rawok");
if (resumeobj != 0 || resumeoff != 0) { if (resumeobj != 0 || resumeoff != 0) {
fnvlist_add_uint64(args, "resume_object", resumeobj); fnvlist_add_uint64(args, "resume_object", resumeobj);
fnvlist_add_uint64(args, "resume_offset", resumeoff); fnvlist_add_uint64(args, "resume_offset", resumeoff);
@ -601,17 +618,17 @@ recv_read(int fd, void *buf, int ilen)
} }
/* /*
* Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
* ZFS_IOC_RECV user/kernel interface. The new interface supports all stream * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
* options but is currently only used for resumable streams. This way updated * stream options but is currently only used for resumable streams. This way
* user space utilities will interoperate with older kernel modules. * updated user space utilities will interoperate with older kernel modules.
* *
* Non-Linux OpenZFS platforms have opted to modify the legacy interface. * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
*/ */
static int static int
recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
const char *origin, boolean_t force, boolean_t resumable, int input_fd, const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
const dmu_replay_record_t *begin_record, int cleanup_fd, int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors) nvlist_t **errors)
{ {
@ -651,7 +668,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
drr = *begin_record; drr = *begin_record;
} }
if (resumable) { if (resumable || raw) {
nvlist_t *outnvl = NULL; nvlist_t *outnvl = NULL;
nvlist_t *innvl = fnvlist_alloc(); nvlist_t *innvl = fnvlist_alloc();
@ -792,10 +809,10 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
*/ */
int int
lzc_receive(const char *snapname, nvlist_t *props, const char *origin, lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, int fd) boolean_t force, boolean_t raw, int fd)
{ {
return (recv_impl(snapname, props, NULL, origin, force, B_FALSE, fd, return (recv_impl(snapname, props, NULL, origin, force, B_FALSE, raw,
NULL, -1, NULL, NULL, NULL, NULL)); fd, NULL, -1, NULL, NULL, NULL, NULL));
} }
/* /*
@ -806,10 +823,10 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
*/ */
int int
lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, int fd) boolean_t force, boolean_t raw, int fd)
{ {
return (recv_impl(snapname, props, NULL, origin, force, B_TRUE, fd, return (recv_impl(snapname, props, NULL, origin, force, B_TRUE, raw,
NULL, -1, NULL, NULL, NULL, NULL)); fd, NULL, -1, NULL, NULL, NULL, NULL));
} }
/* /*
@ -825,13 +842,14 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
*/ */
int int
lzc_receive_with_header(const char *snapname, nvlist_t *props, lzc_receive_with_header(const char *snapname, nvlist_t *props,
const char *origin, boolean_t force, boolean_t resumable, int fd, const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
const dmu_replay_record_t *begin_record) int fd, const dmu_replay_record_t *begin_record)
{ {
if (begin_record == NULL) if (begin_record == NULL)
return (EINVAL); return (EINVAL);
return (recv_impl(snapname, props, NULL, origin, force, resumable, fd,
begin_record, -1, NULL, NULL, NULL, NULL)); return (recv_impl(snapname, props, NULL, origin, force, resumable, raw,
fd, begin_record, -1, NULL, NULL, NULL, NULL));
} }
/* /*
@ -855,13 +873,13 @@ lzc_receive_with_header(const char *snapname, nvlist_t *props,
* property. Callers are responsible for freeing this nvlist. * property. Callers are responsible for freeing this nvlist.
*/ */
int lzc_receive_one(const char *snapname, nvlist_t *props, int lzc_receive_one(const char *snapname, nvlist_t *props,
const char *origin, boolean_t force, boolean_t resumable, int input_fd, const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
const dmu_replay_record_t *begin_record, int cleanup_fd, int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors) nvlist_t **errors)
{ {
return (recv_impl(snapname, props, NULL, origin, force, resumable, return (recv_impl(snapname, props, NULL, origin, force, resumable,
input_fd, begin_record, cleanup_fd, read_bytes, errflags, raw, input_fd, begin_record, cleanup_fd, read_bytes, errflags,
action_handle, errors)); action_handle, errors));
} }
@ -875,12 +893,13 @@ int lzc_receive_one(const char *snapname, nvlist_t *props,
*/ */
int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
nvlist_t *cmdprops, const char *origin, boolean_t force, nvlist_t *cmdprops, const char *origin, boolean_t force,
boolean_t resumable, int input_fd, const dmu_replay_record_t *begin_record, boolean_t resumable, boolean_t raw, int input_fd,
int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *action_handle, nvlist_t **errors) uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{ {
return (recv_impl(snapname, props, cmdprops, origin, force, resumable, return (recv_impl(snapname, props, cmdprops, origin, force, resumable,
input_fd, begin_record, cleanup_fd, read_bytes, errflags, raw, input_fd, begin_record, cleanup_fd, read_bytes, errflags,
action_handle, errors)); action_handle, errors));
} }
@ -1027,3 +1046,66 @@ lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
return (error); return (error);
} }
/*
* Performs key management functions
*
* crypto_cmd should be a value from zfs_ioc_crypto_cmd_t. If the command
* specifies to load or change a wrapping key, the key should be specified in
* the hidden_args nvlist so that it is not logged
*/
int
lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
uint_t wkeylen)
{
int error;
nvlist_t *ioc_args;
nvlist_t *hidden_args;
if (wkeydata == NULL)
return (EINVAL);
ioc_args = fnvlist_alloc();
hidden_args = fnvlist_alloc();
fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
if (noop)
fnvlist_add_boolean(ioc_args, "noop");
error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
nvlist_free(hidden_args);
nvlist_free(ioc_args);
return (error);
}
int
lzc_unload_key(const char *fsname)
{
return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
}
int
lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
uint8_t *wkeydata, uint_t wkeylen)
{
int error;
nvlist_t *ioc_args = fnvlist_alloc();
nvlist_t *hidden_args = NULL;
fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
if (wkeydata != NULL) {
hidden_args = fnvlist_alloc();
fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
wkeylen);
fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
}
if (props != NULL)
fnvlist_add_nvlist(ioc_args, "props", props);
error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
nvlist_free(hidden_args);
nvlist_free(ioc_args);
return (error);
}

View File

@ -60,6 +60,7 @@ KERNEL_C = \
dsl_deadlist.c \ dsl_deadlist.c \
dsl_deleg.c \ dsl_deleg.c \
dsl_dir.c \ dsl_dir.c \
dsl_crypt.c \
dsl_pool.c \ dsl_pool.c \
dsl_prop.c \ dsl_prop.c \
dsl_scan.c \ dsl_scan.c \
@ -128,6 +129,7 @@ KERNEL_C = \
zio.c \ zio.c \
zio_checksum.c \ zio_checksum.c \
zio_compress.c \ zio_compress.c \
zio_crypt.c \
zio_inject.c \ zio_inject.c \
zle.c \ zle.c \
zrlock.c zrlock.c

View File

@ -619,5 +619,26 @@ files.
.RE .RE
.sp
.ne 2
.na
\fB\fBencryption\fR\fR
.ad
.RS 4n
.TS
l l .
GUID com.datto:encryption
READ\-ONLY COMPATIBLE no
DEPENDENCIES extensible_dataset
.TE
This feature enables the creation and management of natively encrypted datasets.
This feature becomes \fBactive\fR when an encrypted dataset is created and will
be returned to the \fBenabled\fR state when all datasets that use this feature
are destroyed.
.RE
.SH "SEE ALSO" .SH "SEE ALSO"
\fBzpool\fR(8) \fBzpool\fR(8)

View File

@ -148,7 +148,7 @@
.Cm mount .Cm mount
.Nm .Nm
.Cm mount .Cm mount
.Op Fl Ov .Op Fl Olv
.Op Fl o Ar options .Op Fl o Ar options
.Fl a | Ar filesystem .Fl a | Ar filesystem
.Nm .Nm
@ -166,12 +166,12 @@
.Ar snapshot bookmark .Ar snapshot bookmark
.Nm .Nm
.Cm send .Cm send
.Op Fl DLPRcenpv .Op Fl DLPRcenpvw
.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
.Ar snapshot .Ar snapshot
.Nm .Nm
.Cm send .Cm send
.Op Fl Lce .Op Fl Lcew
.Op Fl i Ar snapshot Ns | Ns Ar bookmark .Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm .Nm
@ -270,6 +270,27 @@
.Cm diff .Cm diff
.Op Fl FHt .Op Fl FHt
.Ar snapshot Ar snapshot Ns | Ns Ar filesystem .Ar snapshot Ar snapshot Ns | Ns Ar filesystem
.Nm
.Cm load-key
.Op Fl nr
.Op Fl L Ar keylocation
.Fl a | Ar filesystem
.Nm
.Cm unload-key
.Op Fl r
.Fl a | Ar filesystem
.Nm
.Cm change-key
.Op Fl l
.Op Fl o Ar keylocation Ns = Ns Ar value
.Op Fl o Ar keyformat Ns = Ns Ar value
.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
.Ar filesystem
.Nm
.Cm change-key
.Fl i
.Op Fl l
.Ar filesystem
.Sh DESCRIPTION .Sh DESCRIPTION
The The
.Nm .Nm
@ -572,12 +593,36 @@ if the snapshot has been marked for deferred destroy by using the
command. command.
Otherwise, the property is Otherwise, the property is
.Sy off . .Sy off .
.It Sy encryptionroot
For encrypted datasets, indicates where the dataset is currently inheriting its
encryption key from. Loading or unloading a key for the
.Sy encryptionroot
will implicitly load / unload the key for any inheriting datasets (see
.Nm zfs Cm load-key
and
.Nm zfs Cm unload-key
for details).
Clones will always share an
encryption key with their origin. See the
.Sx Encryption
section for details.
.It Sy filesystem_count .It Sy filesystem_count
The total number of filesystems and volumes that exist under this location in The total number of filesystems and volumes that exist under this location in
the dataset tree. the dataset tree.
This value is only available when a This value is only available when a
.Sy filesystem_limit .Sy filesystem_limit
has been set somewhere in the tree under which the dataset resides. has been set somewhere in the tree under which the dataset resides.
.It Sy keystatus
Indicates if an encryption key is currently loaded into ZFS. The possible
values are
.Sy none ,
.Sy available ,
and
.Sy unavailable .
See
.Nm zfs Cm load-key
and
.Nm zfs Cm unload-key .
.It Sy guid .It Sy guid
The 64 bit GUID of this dataset or bookmark which does not change over its The 64 bit GUID of this dataset or bookmark which does not change over its
entire lifetime. When a snapshot is sent to another pool, the received entire lifetime. When a snapshot is sent to another pool, the received
@ -1218,6 +1263,93 @@ that doesn't support the large_dnode feature.
.Pp .Pp
This property can also be referred to by its shortened column name, This property can also be referred to by its shortened column name,
.Sy dnsize . .Sy dnsize .
.It Xo
.Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns
.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns
.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm
.Xc
Controls the encryption cipher suite (block cipher, key length, and mode) used
for this dataset. Requires the
.Sy encryption
feature to be enabled on the pool.
Requires a
.Sy keyformat
to be set at dataset creation time.
.Pp
Selecting
.Sy encryption Ns = Ns Sy on
when creating a dataset indicates that the default encryption suite will be
selected, which is currently
.Sy aes-256-ccm .
In order to provide consistent data protection, encryption must be specified at
dataset creation time and it cannot be changed afterwards.
.Pp
For more details and caveats about encryption see the
.Sy Encryption
section.
.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase
Controls what format the user's encryption key will be provided as. This
property is only set when the dataset is encrypted.
.Pp
Raw keys and hex keys must be 32 bytes long (regardless of the chosen
encryption suite) and must be randomly generated. A raw key can be generated
with the following command:
.Bd -literal
# dd if=/dev/urandom of=/path/to/output/key bs=32 count=1
.Ed
.Pp
Passphrases must be between 8 and 512 bytes long and will be processed through
PBKDF2 before being used (see the
.Sy pbkdf2iters
property). Even though the
encryption suite cannot be changed after dataset creation, the keyformat can be
with
.Nm zfs Cm change-key .
.It Xo
.Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Em </absolute/file/path>
.Xc
Controls where the user's encryption key will be loaded from by default for
commands such as
.Nm zfs Cm load-key
and
.Nm zfs Cm mount Cm -l . This property is
only set for encrypted datasets which are encryption roots. If unspecified, the
default is
.Sy prompt.
.Pp
Even though the encryption suite cannot be changed after dataset creation, the
keylocation can be with either
.Nm zfs Cm set
or
.Nm zfs Cm change-key .
If
.Sy prompt
is selected ZFS will ask for the key at the command prompt when it is required
to access the encrypted data (see
.Nm zfs Cm load-key
for details). This setting will also allow the key to be passed in via STDIN,
but users should be careful not to place keys which should be kept secret on
the command line. If a file URI is selected, the key will be loaded from the
specified absolute file path.
.It Sy pbkdf2iters Ns = Ns Ar iterations
Controls the number of PBKDF2 iterations that a
.Sy passphrase
encryption key should be run through when processing it into an encryption key.
This property is only defined when encryption is enabled and a keyformat of
.Sy passphrase
is selected. The goal of PBKDF2 is to significantly increase the
computational difficulty needed to brute force a user's passphrase. This is
accomplished by forcing the attacker to run each passphrase through a
computationally expensive hashing function many times before they arrive at the
resulting key. A user who actually knows the passphrase will only have to pay
this cost once. As CPUs become better at processing, this number should be
raised to ensure that a brute force attack is still not possible. The current
default is
.Sy 350000
and the minimum is
.Sy 100000 .
This property may be changed with
.Nm zfs Cm change-key .
.It Sy exec Ns = Ns Sy on Ns | Ns Sy off .It Sy exec Ns = Ns Sy on Ns | Ns Sy off
Controls whether processes can be executed from within this file system. Controls whether processes can be executed from within this file system.
The default value is The default value is
@ -2020,6 +2152,69 @@ and
.Xr swapon 8 .Xr swapon 8
commands. Do not swap to a file on a ZFS file system. A ZFS swap file commands. Do not swap to a file on a ZFS file system. A ZFS swap file
configuration is not supported. configuration is not supported.
.Ss Encryption
Enabling the
.Sy encryption
feature allows for the creation of encrypted filesystems and volumes.
.Nm
will encrypt all user data including file and zvol data, file attributes,
ACLs, permission bits, directory listings, FUID mappings, and userused /
groupused data.
.Nm
will not encrypt metadata related to the pool structure, including dataset
names, dataset hierarchy, file size, file holes, and dedup tables. Key rotation
is managed internally by the kernel module and changing the user's key does not
require re-encrypting the entire dataset. Datasets can be scrubbed, resilvered,
renamed, and deleted without the encryption keys being loaded (see the
.Nm zfs Cm load-key
subcommand for more info on key loading).
.Pp
Creating an encrypted dataset requires specifying the
.Sy encryption
and
.Sy keyformat
properties at creation time, along with an optional
.Sy
keylocation
and
.Sy pbkdf2iters .
After entering an encryption key, the
created dataset will become an encryption root. Any descendant datasets will
inherit their encryption key from the encryption root, meaning that loading,
unloading, or changing the key for the encryption root will implicitly do the
same for all inheriting datasets. If this inheritence is not desired, simply
supply a new
.Sy encryption
and
.Sy keyformat
when creating the child dataset or use
.Nm zfs Cm change-key
to break the relationship. The one exception is that clones will always use
their origin's encryption key. Encryption root inheritence can be tracked via
the read-only
.Sy encryptionroot
property.
.Pp
Encryption changes the behavior of a few
.Nm
operations. Encryption is applied after compression so compression ratios are
preserved. Normally checksums in ZFS are 256 bits long, but for encrypted data
the checksum is 128 bits of the user-chosen checksum and 128 bits of MAC from
the encryption suite, which provides additional protection against maliciously
altered data. Deduplication is still possible with encryption enabled but for
security, datasets will only dedup against themselves, their snapshots, and
their clones.
.Pp
There are a few limitations on encrypted datasets. Encrypted data cannot be
embedded via the
.Sy embedded_data
feature. Encrypted datasets may not have
.Sy copies Ns = Ns Em 3
since the implementation stores some encryption metadata where the third copy
would normally be. Since compression is applied before encryption datasets may
be vulnerable to a CRIME-like attack if applications accessing the data allow
for it. Deduplication with encryption will leak information about which blocks
are equivalent in a dataset and will incur an extra CPU cost per block written.
.Sh SUBCOMMANDS .Sh SUBCOMMANDS
All subcommands that modify state are logged persistently to the pool in their All subcommands that modify state are logged persistently to the pool in their
original form. original form.
@ -2776,7 +2971,7 @@ Displays all ZFS file systems currently mounted.
.It Xo .It Xo
.Nm .Nm
.Cm mount .Cm mount
.Op Fl Ov .Op Fl Olv
.Op Fl o Ar options .Op Fl o Ar options
.Fl a | Ar filesystem .Fl a | Ar filesystem
.Xc .Xc
@ -2798,6 +2993,15 @@ duration of the mount.
See the See the
.Sx Temporary Mount Point Properties .Sx Temporary Mount Point Properties
section for details. section for details.
.It Fl l
Load keys for encrypted filesystems as they are being mounted. This is
equivalent to executing
.Nm zfs Cm load-key
on each encryption root before mounting it. Note that if a filesystem has a
.Sy keylocation
of
.Sy prompt
this will cause the terminal to interactively block after asking for the key.
.It Fl v .It Fl v
Report mount progress. Report mount progress.
.El .El
@ -2875,7 +3079,7 @@ feature.
.It Xo .It Xo
.Nm .Nm
.Cm send .Cm send
.Op Fl DLPRcenpv .Op Fl DLPRcenpvw
.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
.Ar snapshot .Ar snapshot
.Xc .Xc
@ -2987,6 +3191,23 @@ option is not supplied in conjunction with
.Fl c , .Fl c ,
then the data will be decompressed before sending so it can be split into then the data will be decompressed before sending so it can be split into
smaller block sizes. smaller block sizes.
.It Fl w, -raw
For encrypted datasets, send data exactly as it exists on disk. This allows
backups to be taken even if encryption keys are not currently loaded. The
backup may then be received on an untrusted machine since that machine will
not have the encryption keys to read the protected data or alter it without
being detected. Upon being received, the dataset will have the same encryption
keys as it did on the send side, although the
.Sy keylocation
property will be defaulted to
.Sy prompt
if not otherwise provided. For unencrypted datasets, this flag will be
equivalent to
.Fl Lec .
Note that if you do not use this flag for sending encrypted datasets, data will
be sent unencrypted and may be re-encrypted with a different encryption key on
the receiving system, which will disable the ability to do a raw send to that
system for incrementals.
.It Fl i Ar snapshot .It Fl i Ar snapshot
Generate an incremental stream from the first Generate an incremental stream from the first
.Ar snapshot .Ar snapshot
@ -3085,6 +3306,23 @@ option is not supplied in conjunction with
.Fl c , .Fl c ,
then the data will be decompressed before sending so it can be split into then the data will be decompressed before sending so it can be split into
smaller block sizes. smaller block sizes.
.It Fl w, -raw
For encrypted datasets, send data exactly as it exists on disk. This allows
backups to be taken even if encryption keys are not currently loaded. The
backup may then be received on an untrusted machine since that machine will
not have the encryption keys to read the protected data or alter it without
being detected. Upon being received, the dataset will have the same encryption
keys as it did on the send side, although the
.Sy keylocation
property will be defaulted to
.Sy prompt
if not otherwise provided. For unencrypted datasets, this flag will be
equivalent to
.Fl Lec .
Note that if you do not use this flag for sending encrypted datasets, data will
be sent unencrypted and may be re-encrypted with a different encryption key on
the receiving system, which will disable the ability to do a raw send to that
system for incrementals.
.It Fl e, -embed .It Fl e, -embed
Generate a more compact stream by using Generate a more compact stream by using
.Sy WRITE_EMBEDDED .Sy WRITE_EMBEDDED
@ -3478,6 +3716,10 @@ diff subcommand Allows lookup of paths within a dataset
given an object number, and the ability given an object number, and the ability
to create snapshots necessary to to create snapshots necessary to
'zfs diff'. 'zfs diff'.
load-key subcommand Allows loading and unloading of encryption key
(see 'zfs load-key' and 'zfs unload-key').
change-key subcommand Allows changing an encryption key via
'zfs change-key'.
mount subcommand Allows mount/umount of ZFS datasets mount subcommand Allows mount/umount of ZFS datasets
promote subcommand Must also have the 'mount' and 'promote' promote subcommand Must also have the 'mount' and 'promote'
ability in the origin file system ability in the origin file system
@ -3726,6 +3968,129 @@ arrows.
.It Fl t .It Fl t
Display the path's inode change time as the first column of output. Display the path's inode change time as the first column of output.
.El .El
.It Xo
.Nm
.Cm load-key
.Op Fl nr
.Op Fl L Ar keylocation
.Fl a | Ar filesystem
.Xc
Load the key for
.Ar filesystem ,
allowing it and all children that inherit the
.Sy keylocation
property to be accessed. The key will be expected in the format specified by the
.Sy keyformat
and location specified by the
.Sy keylocation
property. Note that if the
.Sy keylocation
is set to
.Sy prompt
the terminal will interactively wait for the key to be entered. Loading a key
will not automatically mount the dataset. If that functionality is desired,
.Nm zfs Cm mount Sy -l
will ask for the key and mount the dataset. Once the key is loaded the
.Sy keystatus
property will become
.Sy available .
.Bl -tag -width "-r"
.It Fl r
Recursively loads the keys for the specified filesystem and all descendent
encryption roots.
.It Fl a
Loads the keys for all encryption roots in all imported pools.
.It Fl n
Do a dry-run
.Pq Qq No-op
load-key. This will cause zfs to simply check that the
provided key is correct. This command may be run even if the key is already
loaded.
.It Fl L Ar keylocation
Use
.Ar keylocation
instead of the
.Sy keylocation
property. This will not change the value of the property on the dataset. Note
that if used with either
.Fl r
or
.Fl a ,
.Ar keylocation
may only be given as
.Sy prompt .
.El
.It Xo
.Nm
.Cm unload-key
.Op Fl r
.Fl a | Ar filesystem
.Xc
Unloads a key from ZFS, removing the ability to access the dataset and all of
its children that inherit the
.Sy keylocation
property. This requires that the dataset is not currently open or mounted. Once
the key is unloaded the
.Sy keystatus
property will become
.Sy unavailable .
.Bl -tag -width "-r"
.It Fl r
Recursively unloads the keys for the specified filesystem and all descendent
encryption roots.
.It Fl a
Unloads the keys for all encryption roots in all imported pools.
.El
.It Xo
.Nm
.Cm change-key
.Op Fl l
.Op Fl o Ar keylocation Ns = Ns Ar value
.Op Fl o Ar keyformat Ns = Ns Ar value
.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
.Ar filesystem
.Xc
.It Xo
.Nm
.Cm change-key
.Fl i
.Op Fl l
.Ar filesystem
.Xc
Allows a user to change the encryption key used to access a dataset. This
command requires that the existing key for the dataset is already loaded into
ZFS. This command may also be used to change the
.Sy keylocation ,
.Sy keyformat ,
and
.Sy pbkdf2iters
properties as needed. If the dataset was not previously an encryption root it
will become one. Alternatively, the
.Fl i
flag may be provided to cause an encryption root to inherit the parent's key
instead.
.Bl -tag -width "-r"
.It Fl l
Ensures the key is loaded before attempting to change the key. This is
effectively equivalent to
.Qq Nm zfs Cm load-key Ar filesystem ; Nm zfs Cm change-key Ar filesystem
.It Fl o Ar property Ns = Ns Ar value
Allows the user to set encryption key properties (
.Sy keyformat ,
.Sy keylocation ,
and
.Sy pbkdf2iters
) while changing the key. This is the only way to alter
.Sy keyformat
and
.Sy pbkdf2iters
after the dataset has been created.
.It Fl i
Indicates that zfs should make
.Ar filesystem
inherit the key of its parent. Note that this command can only be run on an
encryption root that has an encrypted parent.
.El
.El .El
.Sh EXIT STATUS .Sh EXIT STATUS
The The

View File

@ -92,7 +92,7 @@
.Nm .Nm
.Cm import .Cm import
.Fl a .Fl a
.Op Fl DfmN .Op Fl DflmN
.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc .Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts .Op Fl o Ar mntopts
@ -100,7 +100,7 @@
.Op Fl R Ar root .Op Fl R Ar root
.Nm .Nm
.Cm import .Cm import
.Op Fl Dfm .Op Fl Dflm
.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc .Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts .Op Fl o Ar mntopts
@ -160,7 +160,7 @@
.Ar pool .Ar pool
.Nm .Nm
.Cm split .Cm split
.Op Fl gLnP .Op Fl gLlnP
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
.Op Fl R Ar root .Op Fl R Ar root
.Ar pool newpool .Ar pool newpool
@ -1186,7 +1186,7 @@ Lists destroyed pools only.
.Nm .Nm
.Cm import .Cm import
.Fl a .Fl a
.Op Fl DfmN .Op Fl DflmN
.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc .Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts .Op Fl o Ar mntopts
@ -1237,6 +1237,15 @@ transactions.
Not all damaged pools can be recovered by using this option. Not all damaged pools can be recovered by using this option.
If successful, the data from the discarded transactions is irretrievably lost. If successful, the data from the discarded transactions is irretrievably lost.
This option is ignored if the pool is importable or already imported. This option is ignored if the pool is importable or already imported.
.It Fl l
Indicates that this command will request encryption keys for all encrypted
datasets it attempts to mount as it is bringing the pool online. Note that if
any datasets have a
.Sy keylocation
of
.Sy prompt
this command will block waiting for the keys to be entered. Without this flag
encrypted datasets will be left unavailable until the keys are loaded.
.It Fl m .It Fl m
Allows a pool to import when there is a missing log device. Allows a pool to import when there is a missing log device.
Recent transactions can be lost because the log device will be discarded. Recent transactions can be lost because the log device will be discarded.
@ -1298,7 +1307,7 @@ health of your pool and should only be used as a last resort.
.It Xo .It Xo
.Nm .Nm
.Cm import .Cm import
.Op Fl Dfm .Op Fl Dflm
.Op Fl F Oo Fl n Oc Oo Fl t Oc Oo Fl T Oc Oo Fl X Oc .Op Fl F Oo Fl n Oc Oo Fl t Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts .Op Fl o Ar mntopts
@ -1357,6 +1366,15 @@ transactions.
Not all damaged pools can be recovered by using this option. Not all damaged pools can be recovered by using this option.
If successful, the data from the discarded transactions is irretrievably lost. If successful, the data from the discarded transactions is irretrievably lost.
This option is ignored if the pool is importable or already imported. This option is ignored if the pool is importable or already imported.
.It Fl l
Indicates that this command will request encryption keys for all encrypted
datasets it attempts to mount as it is bringing the pool online. Note that if
any datasets have a
.Sy keylocation
of
.Sy prompt
this command will block waiting for the keys to be entered. Without this flag
encrypted datasets will be left unavailable until the keys are loaded.
.It Fl m .It Fl m
Allows a pool to import when there is a missing log device. Allows a pool to import when there is a missing log device.
Recent transactions can be lost because the log device will be discarded. Recent transactions can be lost because the log device will be discarded.
@ -1849,7 +1867,7 @@ values.
.It Xo .It Xo
.Nm .Nm
.Cm split .Cm split
.Op Fl gLnP .Op Fl gLlnP
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
.Op Fl R Ar root .Op Fl R Ar root
.Ar pool newpool .Ar pool newpool
@ -1887,6 +1905,15 @@ Display real paths for vdevs resolving all symbolic links. This can
be used to look up the current block device name regardless of the be used to look up the current block device name regardless of the
.Pa /dev/disk/ .Pa /dev/disk/
path used to open it. path used to open it.
.It Fl l
Indicates that this command will request encryption keys for all encrypted
datasets it attempts to mount as it is bringing the new pool online. Note that
if any datasets have a
.Sy keylocation
of
.Sy prompt
this command will block waiting for the keys to be entered. Without this flag
encrypted datasets will be left unavailable until the keys are loaded.
.It Fl n .It Fl n
Do dry run, do not actually perform the split. Do dry run, do not actually perform the split.
Print out the expected configuration of Print out the expected configuration of

View File

@ -52,7 +52,7 @@
static void Encode(uint8_t *, uint32_t *, size_t); static void Encode(uint8_t *, uint32_t *, size_t);
static void Encode64(uint8_t *, uint64_t *, size_t); static void Encode64(uint8_t *, uint64_t *, size_t);
#if defined(__amd64) && defined(_KERNEL) #if defined(__amd64)
#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1) #define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1) #define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
@ -62,7 +62,7 @@ void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
#else #else
static void SHA256Transform(SHA2_CTX *, const uint8_t *); static void SHA256Transform(SHA2_CTX *, const uint8_t *);
static void SHA512Transform(SHA2_CTX *, const uint8_t *); static void SHA512Transform(SHA2_CTX *, const uint8_t *);
#endif /* __amd64 && _KERNEL */ #endif /* __amd64 */
static uint8_t PADDING[128] = { 0x80, /* all zeros */ }; static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
@ -142,7 +142,7 @@ static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
#endif /* _BIG_ENDIAN */ #endif /* _BIG_ENDIAN */
#if !defined(__amd64) || !defined(_KERNEL) #if !defined(__amd64)
/* SHA256 Transform */ /* SHA256 Transform */
static void static void
@ -600,7 +600,7 @@ SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
ctx->state.s64[7] += h; ctx->state.s64[7] += h;
} }
#endif /* !__amd64 || !_KERNEL */ #endif /* !__amd64 */
/* /*
@ -838,7 +838,7 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
i = buf_len; i = buf_len;
} }
#if !defined(__amd64) || !defined(_KERNEL) #if !defined(__amd64)
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
for (; i + buf_limit - 1 < input_len; i += buf_limit) { for (; i + buf_limit - 1 < input_len; i += buf_limit) {
SHA256Transform(ctx, &input[i]); SHA256Transform(ctx, &input[i]);
@ -866,7 +866,7 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
i += block_count << 7; i += block_count << 7;
} }
} }
#endif /* !__amd64 || !_KERNEL */ #endif /* !__amd64 */
/* /*
* general optimization: * general optimization:

View File

@ -61,7 +61,7 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
offset -= uiop->uio_iov[vec_idx++].iov_len) offset -= uiop->uio_iov[vec_idx++].iov_len)
; ;
if (vec_idx == uiop->uio_iovcnt) { if (vec_idx == uiop->uio_iovcnt && length > 0) {
/* /*
* The caller specified an offset that is larger than * The caller specified an offset that is larger than
* the total size of the buffers it provided. * the total size of the buffers it provided.
@ -192,7 +192,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
offset >= uiop->uio_iov[vec_idx].iov_len; offset >= uiop->uio_iov[vec_idx].iov_len;
offset -= uiop->uio_iov[vec_idx++].iov_len) offset -= uiop->uio_iov[vec_idx++].iov_len)
; ;
if (vec_idx == uiop->uio_iovcnt) { if (vec_idx == uiop->uio_iovcnt && length > 0) {
/* /*
* The caller specified an offset that is larger than the * The caller specified an offset that is larger than the
* total size of the buffers it provided. * total size of the buffers it provided.

View File

@ -20,7 +20,7 @@
* CDDL HEADER END * CDDL HEADER END
*/ */
/* /*
* Copyright (c) 2016, Datto, Inc. All rights reserved. * Copyright (c) 2017, Datto, Inc. All rights reserved.
*/ */
#ifdef _KERNEL #ifdef _KERNEL

View File

@ -318,6 +318,17 @@ zpool_feature_init(void)
ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET, ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET,
userobj_accounting_deps); userobj_accounting_deps);
} }
{
static const spa_feature_t encryption_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_ENCRYPTION,
"com.datto:encryption", "encryption",
"Support for dataset level encryption",
ZFEATURE_FLAG_PER_DATASET, encryption_deps);
}
} }
#if defined(_KERNEL) && defined(HAVE_SPL) #if defined(_KERNEL) && defined(HAVE_SPL)

View File

@ -69,6 +69,8 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_GROUPOBJUSED}, {ZFS_DELEG_PERM_GROUPOBJUSED},
{ZFS_DELEG_PERM_HOLD}, {ZFS_DELEG_PERM_HOLD},
{ZFS_DELEG_PERM_RELEASE}, {ZFS_DELEG_PERM_RELEASE},
{ZFS_DELEG_PERM_LOAD_KEY},
{ZFS_DELEG_PERM_CHANGE_KEY},
{NULL} {NULL}
}; };

View File

@ -33,6 +33,7 @@
#include <sys/zfs_acl.h> #include <sys/zfs_acl.h>
#include <sys/zfs_ioctl.h> #include <sys/zfs_ioctl.h>
#include <sys/zfs_znode.h> #include <sys/zfs_znode.h>
#include <sys/dsl_crypt.h>
#include "zfs_prop.h" #include "zfs_prop.h"
#include "zfs_deleg.h" #include "zfs_deleg.h"
@ -119,6 +120,26 @@ zfs_prop_init(void)
{ NULL } { NULL }
}; };
static zprop_index_t crypto_table[] = {
{ "on", ZIO_CRYPT_ON },
{ "off", ZIO_CRYPT_OFF },
{ "aes-128-ccm", ZIO_CRYPT_AES_128_CCM },
{ "aes-192-ccm", ZIO_CRYPT_AES_192_CCM },
{ "aes-256-ccm", ZIO_CRYPT_AES_256_CCM },
{ "aes-128-gcm", ZIO_CRYPT_AES_128_GCM },
{ "aes-192-gcm", ZIO_CRYPT_AES_192_GCM },
{ "aes-256-gcm", ZIO_CRYPT_AES_256_GCM },
{ NULL }
};
static zprop_index_t keyformat_table[] = {
{ "none", ZFS_KEYFORMAT_NONE },
{ "raw", ZFS_KEYFORMAT_RAW },
{ "hex", ZFS_KEYFORMAT_HEX },
{ "passphrase", ZFS_KEYFORMAT_PASSPHRASE },
{ NULL }
};
static zprop_index_t snapdir_table[] = { static zprop_index_t snapdir_table[] = {
{ "hidden", ZFS_SNAPDIR_HIDDEN }, { "hidden", ZFS_SNAPDIR_HIDDEN },
{ "visible", ZFS_SNAPDIR_VISIBLE }, { "visible", ZFS_SNAPDIR_VISIBLE },
@ -193,6 +214,13 @@ zfs_prop_init(void)
{ NULL } { NULL }
}; };
static zprop_index_t keystatus_table[] = {
{ "none", ZFS_KEYSTATUS_NONE},
{ "unavailable", ZFS_KEYSTATUS_UNAVAILABLE},
{ "available", ZFS_KEYSTATUS_AVAILABLE},
{ NULL }
};
static zprop_index_t logbias_table[] = { static zprop_index_t logbias_table[] = {
{ "latency", ZFS_LOGBIAS_LATENCY }, { "latency", ZFS_LOGBIAS_LATENCY },
{ "throughput", ZFS_LOGBIAS_THROUGHPUT }, { "throughput", ZFS_LOGBIAS_THROUGHPUT },
@ -351,12 +379,16 @@ zfs_prop_init(void)
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
"CANMOUNT", canmount_table); "CANMOUNT", canmount_table);
/* readonly index (boolean) properties */ /* readonly index properties */
zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
boolean_table); boolean_table);
zprop_register_index(ZFS_PROP_KEYSTATUS, "keystatus",
ZFS_KEYSTATUS_NONE, PROP_READONLY, ZFS_TYPE_DATASET,
"none | unavailable | available",
"KEYSTATUS", keystatus_table);
/* set once index properties */ /* set once index properties */
zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
@ -367,6 +399,15 @@ zfs_prop_init(void)
ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_SNAPSHOT, ZFS_TYPE_SNAPSHOT,
"sensitive | insensitive | mixed", "CASE", case_table); "sensitive | insensitive | mixed", "CASE", case_table);
zprop_register_index(ZFS_PROP_KEYFORMAT, "keyformat",
ZFS_KEYFORMAT_NONE, PROP_ONETIME_DEFAULT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"none | raw | hex | passphrase", "KEYFORMAT", keyformat_table);
zprop_register_index(ZFS_PROP_ENCRYPTION, "encryption",
ZIO_CRYPT_DEFAULT, PROP_ONETIME, ZFS_TYPE_DATASET,
"on | off | aes-128-ccm | aes-192-ccm | aes-256-ccm | "
"aes-128-gcm | aes-192-gcm | aes-256-gcm", "ENCRYPTION",
crypto_table);
/* set once index (boolean) properties */ /* set once index (boolean) properties */
zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
@ -409,6 +450,12 @@ zfs_prop_init(void)
"receive_resume_token", "receive_resume_token",
NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"<string token>", "RESUMETOK"); "<string token>", "RESUMETOK");
zprop_register_string(ZFS_PROP_ENCRYPTION_ROOT, "encryptionroot", NULL,
PROP_READONLY, ZFS_TYPE_DATASET, "<filesystem | volume>",
"ENCROOT");
zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation",
"none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"prompt | <file URI>", "KEYLOCATION");
/* readonly number properties */ /* readonly number properties */
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
@ -456,6 +503,9 @@ zfs_prop_init(void)
ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID"); ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID");
zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY, zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY,
ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG"); ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG");
zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters",
0, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"<iters>", "PBKDF2ITERS");
/* default number properties */ /* default number properties */
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
@ -503,6 +553,11 @@ zfs_prop_init(void)
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT"); PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT");
zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING, zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP"); PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP");
zprop_register_hidden(ZFS_PROP_PBKDF2_SALT, "pbkdf2salt",
PROP_TYPE_NUMBER, PROP_ONETIME_DEFAULT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT");
zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID");
/* /*
* Property to be removed once libbe is integrated * Property to be removed once libbe is integrated
@ -650,7 +705,8 @@ boolean_t
zfs_prop_readonly(zfs_prop_t prop) zfs_prop_readonly(zfs_prop_t prop)
{ {
return (zfs_prop_table[prop].pd_attr == PROP_READONLY || return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME); zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
} }
/* /*
@ -659,7 +715,8 @@ zfs_prop_readonly(zfs_prop_t prop)
boolean_t boolean_t
zfs_prop_setonce(zfs_prop_t prop) zfs_prop_setonce(zfs_prop_t prop)
{ {
return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); return (zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
} }
const char * const char *
@ -694,6 +751,40 @@ zfs_prop_inheritable(zfs_prop_t prop)
zfs_prop_table[prop].pd_attr == PROP_ONETIME); zfs_prop_table[prop].pd_attr == PROP_ONETIME);
} }
/*
* Returns TRUE if property is one of the encryption properties that requires
* a loaded encryption key to modify.
*/
boolean_t
zfs_prop_encryption_key_param(zfs_prop_t prop)
{
/*
* keylocation does not count as an encryption property. It can be
* changed at will without needing the master keys.
*/
return (prop == ZFS_PROP_PBKDF2_SALT || prop == ZFS_PROP_PBKDF2_ITERS ||
prop == ZFS_PROP_KEYFORMAT);
}
/*
* Helper function used by both kernelspace and userspace to check the
* keylocation property. If encrypted is set, the keylocation must be valid
* for an encrypted dataset.
*/
boolean_t
zfs_prop_valid_keylocation(const char *str, boolean_t encrypted)
{
if (strcmp("none", str) == 0)
return (!encrypted);
else if (strcmp("prompt", str) == 0)
return (B_TRUE);
else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0)
return (B_TRUE);
return (B_FALSE);
}
#ifndef _KERNEL #ifndef _KERNEL
/* /*
@ -774,6 +865,8 @@ EXPORT_SYMBOL(zfs_prop_default_string);
EXPORT_SYMBOL(zfs_prop_default_numeric); EXPORT_SYMBOL(zfs_prop_default_numeric);
EXPORT_SYMBOL(zfs_prop_readonly); EXPORT_SYMBOL(zfs_prop_readonly);
EXPORT_SYMBOL(zfs_prop_inheritable); EXPORT_SYMBOL(zfs_prop_inheritable);
EXPORT_SYMBOL(zfs_prop_encryption_key_param);
EXPORT_SYMBOL(zfs_prop_valid_keylocation);
EXPORT_SYMBOL(zfs_prop_setonce); EXPORT_SYMBOL(zfs_prop_setonce);
EXPORT_SYMBOL(zfs_prop_to_name); EXPORT_SYMBOL(zfs_prop_to_name);
EXPORT_SYMBOL(zfs_name_to_prop); EXPORT_SYMBOL(zfs_name_to_prop);

View File

@ -33,6 +33,7 @@ $(MODULE)-objs += dsl_deadlist.o
$(MODULE)-objs += dsl_deleg.o $(MODULE)-objs += dsl_deleg.o
$(MODULE)-objs += dsl_bookmark.o $(MODULE)-objs += dsl_bookmark.o
$(MODULE)-objs += dsl_dir.o $(MODULE)-objs += dsl_dir.o
$(MODULE)-objs += dsl_crypt.o
$(MODULE)-objs += dsl_pool.o $(MODULE)-objs += dsl_pool.o
$(MODULE)-objs += dsl_prop.o $(MODULE)-objs += dsl_prop.o
$(MODULE)-objs += dsl_scan.o $(MODULE)-objs += dsl_scan.o
@ -103,6 +104,7 @@ $(MODULE)-objs += zil.o
$(MODULE)-objs += zio.o $(MODULE)-objs += zio.o
$(MODULE)-objs += zio_checksum.o $(MODULE)-objs += zio_checksum.o
$(MODULE)-objs += zio_compress.o $(MODULE)-objs += zio_compress.o
$(MODULE)-objs += zio_crypt.o
$(MODULE)-objs += zio_inject.o $(MODULE)-objs += zio_inject.o
$(MODULE)-objs += zle.o $(MODULE)-objs += zle.o
$(MODULE)-objs += zpl_ctldir.o $(MODULE)-objs += zpl_ctldir.o

File diff suppressed because it is too large Load Diff

View File

@ -212,7 +212,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
err = 0; err = 0;
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
bptree_entry_phys_t bte; bptree_entry_phys_t bte;
int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST |
TRAVERSE_NO_DECRYPT;
err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
&bte, DMU_READ_NO_PREFETCH); &bte, DMU_READ_NO_PREFETCH);

View File

@ -964,7 +964,7 @@ dbuf_whichblock(const dnode_t *dn, const int64_t level, const uint64_t offset)
} }
static void static void
dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) dbuf_read_done(zio_t *zio, int err, arc_buf_t *buf, void *vdb)
{ {
dmu_buf_impl_t *db = vdb; dmu_buf_impl_t *db = vdb;
@ -984,7 +984,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
db->db_freed_in_flight = FALSE; db->db_freed_in_flight = FALSE;
dbuf_set_data(db, buf); dbuf_set_data(db, buf);
db->db_state = DB_CACHED; db->db_state = DB_CACHED;
} else if (zio == NULL || zio->io_error == 0) { } else if (err == 0) {
dbuf_set_data(db, buf); dbuf_set_data(db, buf);
db->db_state = DB_CACHED; db->db_state = DB_CACHED;
} else { } else {
@ -1003,7 +1003,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
dnode_t *dn; dnode_t *dn;
zbookmark_phys_t zb; zbookmark_phys_t zb;
uint32_t aflags = ARC_FLAG_NOWAIT; uint32_t aflags = ARC_FLAG_NOWAIT;
int err; int err, zio_flags = 0;
DB_DNODE_ENTER(db); DB_DNODE_ENTER(db);
dn = DB_DNODE(db); dn = DB_DNODE(db);
@ -1021,6 +1021,22 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/ */
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ?
dn->dn_dbuf->db_buf : NULL;
/* if the underlying dnode block is encrypted, decrypt it */
if (dn_buf != NULL && dn->dn_objset->os_encrypted &&
DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) &&
(flags & DB_RF_NO_DECRYPT) == 0 &&
arc_is_encrypted(dn_buf)) {
err = arc_untransform(dn_buf, dn->dn_objset->os_spa,
dmu_objset_id(dn->dn_objset), B_TRUE);
if (err != 0) {
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
return (err);
}
}
ASSERT3U(bonuslen, <=, db->db.db_size); ASSERT3U(bonuslen, <=, db->db.db_size);
db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP); db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
@ -1088,11 +1104,27 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
db->db.db_object, db->db_level, db->db_blkid); db->db.db_object, db->db_level, db->db_blkid);
/*
* All bps of an encrypted os should have the encryption bit set.
* If this is not true it indicates tampering and we report an error.
*/
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) {
spa_log_error(db->db_objset->os_spa, &zb);
zfs_panic_recover("unencrypted block in encrypted "
"object set %llu", dmu_objset_id(db->db_objset));
return (SET_ERROR(EIO));
}
dbuf_add_ref(db, NULL); dbuf_add_ref(db, NULL);
zio_flags = (flags & DB_RF_CANFAIL) ?
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr))
zio_flags |= ZIO_FLAG_RAW;
err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr, err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
(flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb); &aflags, &zb);
return (err); return (err);
@ -1141,18 +1173,31 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
arc_space_consume(bonuslen, ARC_SPACE_BONUS); arc_space_consume(bonuslen, ARC_SPACE_BONUS);
bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen); bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
dnode_t *dn = DB_DNODE(db);
int size = arc_buf_size(db->db_buf); int size = arc_buf_size(db->db_buf);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
spa_t *spa = db->db_objset->os_spa; spa_t *spa = db->db_objset->os_spa;
enum zio_compress compress_type = enum zio_compress compress_type =
arc_get_compression(db->db_buf); arc_get_compression(db->db_buf);
if (compress_type == ZIO_COMPRESS_OFF) { if (arc_is_encrypted(db->db_buf)) {
dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); boolean_t byteorder;
} else { uint8_t salt[ZIO_DATA_SALT_LEN];
uint8_t iv[ZIO_DATA_IV_LEN];
uint8_t mac[ZIO_DATA_MAC_LEN];
arc_get_raw_params(db->db_buf, &byteorder, salt,
iv, mac);
dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db,
dmu_objset_id(dn->dn_objset), byteorder, salt, iv,
mac, dn->dn_type, size, arc_buf_lsize(db->db_buf),
compress_type);
} else if (compress_type != ZIO_COMPRESS_OFF) {
ASSERT3U(type, ==, ARC_BUFC_DATA); ASSERT3U(type, ==, ARC_BUFC_DATA);
dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db, dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
size, arc_buf_lsize(db->db_buf), compress_type); size, arc_buf_lsize(db->db_buf), compress_type);
} else {
dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
} }
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else { } else {
@ -1188,16 +1233,21 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_enter(&db->db_mtx); mutex_enter(&db->db_mtx);
if (db->db_state == DB_CACHED) { if (db->db_state == DB_CACHED) {
spa_t *spa = dn->dn_objset->os_spa;
/* /*
* If the arc buf is compressed, we need to decompress it to * If the arc buf is compressed or encrypted, we need to
* read the data. This could happen during the "zfs receive" of * untransform it to read the data. This could happen during
* a stream which is compressed and deduplicated. * the "zfs receive" of a stream which is deduplicated and
* either raw or compressed. We do not need to do this if the
* caller wants raw encrypted data.
*/ */
if (db->db_buf != NULL && if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 &&
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) { (arc_is_encrypted(db->db_buf) ||
dbuf_fix_old_data(db, arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
spa_syncing_txg(dmu_objset_spa(db->db_objset))); dbuf_fix_old_data(db, spa_syncing_txg(spa));
err = arc_decompress(db->db_buf); err = arc_untransform(db->db_buf, spa,
dmu_objset_id(db->db_objset), B_FALSE);
dbuf_set_data(db, db->db_buf); dbuf_set_data(db, db->db_buf);
} }
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
@ -1316,6 +1366,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_nopwrite = B_FALSE;
dr->dt.dl.dr_raw = B_FALSE;
/* /*
* Release the already-written buffer, so we leave it in * Release the already-written buffer, so we leave it in
@ -1908,11 +1959,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (B_FALSE); return (B_FALSE);
} }
void static void
dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
{ {
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
dbuf_dirty_record_t *dr; dbuf_dirty_record_t *dr;
ASSERT(tx->tx_txg != 0); ASSERT(tx->tx_txg != 0);
@ -1944,12 +1994,19 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
DB_DNODE_ENTER(db); DB_DNODE_ENTER(db);
if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
rf |= DB_RF_HAVESTRUCT; flags |= DB_RF_HAVESTRUCT;
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
(void) dbuf_read(db, NULL, rf); (void) dbuf_read(db, NULL, flags);
(void) dbuf_dirty(db, tx); (void) dbuf_dirty(db, tx);
} }
void
dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
dmu_buf_will_dirty_impl(db_fake,
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx);
}
void void
dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
{ {
@ -1977,6 +2034,29 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
(void) dbuf_dirty(db, tx); (void) dbuf_dirty(db, tx);
} }
/*
* This function is effectively the same as dmu_buf_will_dirty(), but
* indicates the caller expects raw encrypted data in the db. It will
* also set the raw flag on the created dirty record.
*/
void
dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dbuf_dirty_record_t *dr;
dmu_buf_will_dirty_impl(db_fake,
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
dr = db->db_last_dirty;
while (dr != NULL && dr->dr_txg > tx->tx_txg)
dr = dr->dr_next;
ASSERT3P(dr, !=, NULL);
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
dr->dt.dl.dr_raw = B_TRUE;
}
#pragma weak dmu_buf_fill_done = dbuf_fill_done #pragma weak dmu_buf_fill_done = dbuf_fill_done
/* ARGSUSED */ /* ARGSUSED */
void void
@ -2117,10 +2197,11 @@ dbuf_destroy(dmu_buf_impl_t *db)
if (db->db_blkid == DMU_BONUS_BLKID) { if (db->db_blkid == DMU_BONUS_BLKID) {
int slots = DB_DNODE(db)->dn_num_slots; int slots = DB_DNODE(db)->dn_num_slots;
int bonuslen = DN_SLOTS_TO_BONUSLEN(slots); int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
ASSERT(db->db.db_data != NULL); if (db->db.db_data != NULL) {
kmem_free(db->db.db_data, bonuslen); kmem_free(db->db.db_data, bonuslen);
arc_space_return(bonuslen, ARC_SPACE_BONUS); arc_space_return(bonuslen, ARC_SPACE_BONUS);
db->db_state = DB_UNCACHED; db->db_state = DB_UNCACHED;
}
} }
dbuf_clear_data(db); dbuf_clear_data(db);
@ -2416,7 +2497,7 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
* prefetch if the next block down is our target. * prefetch if the next block down is our target.
*/ */
static void static void
dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) dbuf_prefetch_indirect_done(zio_t *zio, int err, arc_buf_t *abuf, void *private)
{ {
dbuf_prefetch_arg_t *dpa = private; dbuf_prefetch_arg_t *dpa = private;
uint64_t nextblkid; uint64_t nextblkid;
@ -2438,7 +2519,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
*/ */
if (zio != NULL) { if (zio != NULL) {
ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel); ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
if (zio->io_flags & ZIO_FLAG_RAW) { if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) {
ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size); ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size);
} else { } else {
ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size); ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
@ -2463,7 +2544,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
(dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level)); (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
bp = ((blkptr_t *)abuf->b_data) + bp = ((blkptr_t *)abuf->b_data) +
P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs); P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) { if (BP_IS_HOLE(bp) || err != 0) {
kmem_free(dpa, sizeof (*dpa)); kmem_free(dpa, sizeof (*dpa));
} else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) { } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid); ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
@ -2491,7 +2572,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
* Issue prefetch reads for the given block on the given level. If the indirect * Issue prefetch reads for the given block on the given level. If the indirect
* blocks above that block are not in memory, we will read them in * blocks above that block are not in memory, we will read them in
* asynchronously. As a result, this call never blocks waiting for a read to * asynchronously. As a result, this call never blocks waiting for a read to
* complete. * complete. Note that the prefetch might fail if the dataset is encrypted and
* the encryption key is unmapped before the IO completes.
*/ */
void void
dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio, dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
@ -3120,6 +3202,41 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
} }
} }
/*
* Ensure the dbuf's data is untransformed if the associated dirty
* record requires it. This is used by dbuf_sync_leaf() to ensure
* that a dnode block is decrypted before we write new data to it.
* For raw writes we assert that the buffer is already encrypted.
*/
static void
dbuf_check_crypt(dbuf_dirty_record_t *dr)
{
int err;
dmu_buf_impl_t *db = dr->dr_dbuf;
ASSERT(MUTEX_HELD(&db->db_mtx));
if (!dr->dt.dl.dr_raw && arc_is_encrypted(db->db_buf)) {
/*
* Unfortunately, there is currently no mechanism for
* syncing context to handle decryption errors. An error
* here is only possible if an attacker maliciously
* changed a dnode block and updated the associated
* checksums going up the block tree.
*/
err = arc_untransform(db->db_buf, db->db_objset->os_spa,
dmu_objset_id(db->db_objset), B_TRUE);
if (err)
panic("Invalid dnode block MAC");
} else if (dr->dt.dl.dr_raw) {
/*
* Writing raw encrypted data requires the db's arc buffer
* to be converted to raw by the caller.
*/
ASSERT(arc_is_encrypted(db->db_buf));
}
}
/* /*
* dbuf_sync_indirect() is called recursively from dbuf_sync_list() so it * dbuf_sync_indirect() is called recursively from dbuf_sync_list() so it
* is critical the we not allow the compiler to inline this function in to * is critical the we not allow the compiler to inline this function in to
@ -3241,9 +3358,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
ASSERT(*datap != NULL); ASSERT(*datap != NULL);
ASSERT0(db->db_level); ASSERT0(db->db_level);
ASSERT3U(dn->dn_phys->dn_bonuslen, <=, ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=,
DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1)); DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); bcopy(*datap, DN_BONUS(dn->dn_phys),
DN_MAX_BONUS_LEN(dn->dn_phys));
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
if (*datap != db->db.db_data) { if (*datap != db->db.db_data) {
@ -3290,6 +3408,13 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
} }
/*
* If this is a dnode block, ensure it is appropriately encrypted
* or decrypted, depending on what we are writing to it this txg.
*/
if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
dbuf_check_crypt(dr);
if (db->db_state != DB_NOFILL && if (db->db_state != DB_NOFILL &&
dn->dn_object != DMU_META_DNODE_OBJECT && dn->dn_object != DMU_META_DNODE_OBJECT &&
refcount_count(&db->db_holds) > 1 && refcount_count(&db->db_holds) > 1 &&
@ -3307,16 +3432,27 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
* DNONE_DNODE blocks). * DNONE_DNODE blocks).
*/ */
int psize = arc_buf_size(*datap); int psize = arc_buf_size(*datap);
int lsize = arc_buf_lsize(*datap);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
enum zio_compress compress_type = arc_get_compression(*datap); enum zio_compress compress_type = arc_get_compression(*datap);
if (compress_type == ZIO_COMPRESS_OFF) { if (arc_is_encrypted(*datap)) {
*datap = arc_alloc_buf(os->os_spa, db, type, psize); boolean_t byteorder;
} else { uint8_t salt[ZIO_DATA_SALT_LEN];
uint8_t iv[ZIO_DATA_IV_LEN];
uint8_t mac[ZIO_DATA_MAC_LEN];
arc_get_raw_params(*datap, &byteorder, salt, iv, mac);
*datap = arc_alloc_raw_buf(os->os_spa, db,
dmu_objset_id(os), byteorder, salt, iv, mac,
dn->dn_type, psize, lsize, compress_type);
} else if (compress_type != ZIO_COMPRESS_OFF) {
ASSERT3U(type, ==, ARC_BUFC_DATA); ASSERT3U(type, ==, ARC_BUFC_DATA);
int lsize = arc_buf_lsize(*datap); int lsize = arc_buf_lsize(*datap);
*datap = arc_alloc_compressed_buf(os->os_spa, db, *datap = arc_alloc_compressed_buf(os->os_spa, db,
psize, lsize, compress_type); psize, lsize, compress_type);
} else {
*datap = arc_alloc_buf(os->os_spa, db, type, psize);
} }
bcopy(db->db.db_data, (*datap)->b_data, psize); bcopy(db->db.db_data, (*datap)->b_data, psize);
} }
@ -3453,7 +3589,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
if (!BP_IS_EMBEDDED(bp)) if (!BP_IS_EMBEDDED(bp))
bp->blk_fill = fill; BP_SET_FILL(bp, fill);
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
@ -3778,7 +3914,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
ZIO_PRIORITY_ASYNC_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb); ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else { } else {
arc_done_func_t *children_ready_cb = NULL; arc_write_done_func_t *children_ready_cb = NULL;
ASSERT(arc_released(data)); ASSERT(arc_released(data));
/* /*
@ -3810,6 +3946,7 @@ EXPORT_SYMBOL(dbuf_free_range);
EXPORT_SYMBOL(dbuf_new_size); EXPORT_SYMBOL(dbuf_new_size);
EXPORT_SYMBOL(dbuf_release_bp); EXPORT_SYMBOL(dbuf_release_bp);
EXPORT_SYMBOL(dbuf_dirty); EXPORT_SYMBOL(dbuf_dirty);
EXPORT_SYMBOL(dmu_buf_will_change_crypt_params);
EXPORT_SYMBOL(dmu_buf_will_dirty); EXPORT_SYMBOL(dmu_buf_will_dirty);
EXPORT_SYMBOL(dmu_buf_will_not_fill); EXPORT_SYMBOL(dmu_buf_will_not_fill);
EXPORT_SYMBOL(dmu_buf_will_fill); EXPORT_SYMBOL(dmu_buf_will_fill);

View File

@ -269,6 +269,10 @@ ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth);
} }
/*
* The bp created via this function may be used for repairs and scrub, but it
* will be missing the salt / IV required to do a full decrypting read.
*/
void void
ddt_bp_create(enum zio_checksum checksum, ddt_bp_create(enum zio_checksum checksum,
const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp)
@ -279,11 +283,12 @@ ddt_bp_create(enum zio_checksum checksum,
ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth);
bp->blk_cksum = ddk->ddk_cksum; bp->blk_cksum = ddk->ddk_cksum;
bp->blk_fill = 1;
BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk));
BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk));
BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk));
BP_SET_CRYPT(bp, DDK_GET_CRYPT(ddk));
BP_SET_FILL(bp, 1);
BP_SET_CHECKSUM(bp, checksum); BP_SET_CHECKSUM(bp, checksum);
BP_SET_TYPE(bp, DMU_OT_DEDUP); BP_SET_TYPE(bp, DMU_OT_DEDUP);
BP_SET_LEVEL(bp, 0); BP_SET_LEVEL(bp, 0);
@ -297,9 +302,12 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp)
ddk->ddk_cksum = bp->blk_cksum; ddk->ddk_cksum = bp->blk_cksum;
ddk->ddk_prop = 0; ddk->ddk_prop = 0;
ASSERT(BP_IS_ENCRYPTED(bp) || !BP_USES_CRYPT(bp));
DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp));
DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp));
DDK_SET_CRYPT(ddk, BP_USES_CRYPT(bp));
} }
void void
@ -389,7 +397,7 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
if (ddp->ddp_phys_birth == 0) if (ddp->ddp_phys_birth == 0)
continue; continue;
for (d = 0; d < SPA_DVAS_PER_BP; d++) for (d = 0; d < DDE_GET_NDVAS(dde); d++)
dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
dds->dds_blocks += 1; dds->dds_blocks += 1;
@ -562,6 +570,7 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
uint64_t ditto = spa->spa_dedup_ditto; uint64_t ditto = spa->spa_dedup_ditto;
int total_copies = 0; int total_copies = 0;
int desired_copies = 0; int desired_copies = 0;
int copies_needed = 0;
int p; int p;
for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
@ -588,7 +597,13 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
if (total_refcnt >= ditto * ditto) if (total_refcnt >= ditto * ditto)
desired_copies++; desired_copies++;
return (MAX(desired_copies, total_copies) - total_copies); copies_needed = MAX(desired_copies, total_copies) - total_copies;
/* encrypted blocks store their IV in DVA[2] */
if (DDK_GET_CRYPT(&dde->dde_key))
copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1);
return (copies_needed);
} }
int int
@ -599,7 +614,7 @@ ddt_ditto_copies_present(ddt_entry_t *dde)
int copies = 0 - DVA_GET_GANG(dva); int copies = 0 - DVA_GET_GANG(dva);
int d; int d;
for (d = 0; d < SPA_DVAS_PER_BP; d++, dva++) for (d = 0; d < DDE_GET_NDVAS(dde); d++, dva++)
if (DVA_IS_VALID(dva)) if (DVA_IS_VALID(dva))
copies++; copies++;

View File

@ -73,60 +73,60 @@ unsigned long zfs_per_txg_dirty_frees_percent = 30;
int zfs_dmu_offset_next_sync = 0; int zfs_dmu_offset_next_sync = 0;
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ DMU_BSWAP_UINT8, TRUE, "unallocated" }, { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" },
{ DMU_BSWAP_ZAP, TRUE, "object directory" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" },
{ DMU_BSWAP_UINT64, TRUE, "object array" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "object array" },
{ DMU_BSWAP_UINT8, TRUE, "packed nvlist" }, { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" },
{ DMU_BSWAP_UINT64, TRUE, "packed nvlist size" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" },
{ DMU_BSWAP_UINT64, TRUE, "bpobj" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" },
{ DMU_BSWAP_UINT64, TRUE, "bpobj header" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" },
{ DMU_BSWAP_UINT64, TRUE, "SPA space map header" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" },
{ DMU_BSWAP_UINT64, TRUE, "SPA space map" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" },
{ DMU_BSWAP_UINT64, TRUE, "ZIL intent log" }, { DMU_BSWAP_UINT64, TRUE, TRUE, "ZIL intent log" },
{ DMU_BSWAP_DNODE, TRUE, "DMU dnode" }, { DMU_BSWAP_DNODE, TRUE, TRUE, "DMU dnode" },
{ DMU_BSWAP_OBJSET, TRUE, "DMU objset" }, { DMU_BSWAP_OBJSET, TRUE, FALSE, "DMU objset" },
{ DMU_BSWAP_UINT64, TRUE, "DSL directory" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL directory" },
{ DMU_BSWAP_ZAP, TRUE, "DSL directory child map"}, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL directory child map"},
{ DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset snap map" },
{ DMU_BSWAP_ZAP, TRUE, "DSL props" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL props" },
{ DMU_BSWAP_UINT64, TRUE, "DSL dataset" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL dataset" },
{ DMU_BSWAP_ZNODE, TRUE, "ZFS znode" }, { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" },
{ DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" }, { DMU_BSWAP_OLDACL, TRUE, TRUE, "ZFS V0 ACL" },
{ DMU_BSWAP_UINT8, FALSE, "ZFS plain file" }, { DMU_BSWAP_UINT8, FALSE, TRUE, "ZFS plain file" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS directory" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS directory" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS master node" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS delete queue" },
{ DMU_BSWAP_UINT8, FALSE, "zvol object" }, { DMU_BSWAP_UINT8, FALSE, TRUE, "zvol object" },
{ DMU_BSWAP_ZAP, TRUE, "zvol prop" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" },
{ DMU_BSWAP_UINT8, FALSE, "other uint8[]" }, { DMU_BSWAP_UINT8, FALSE, TRUE, "other uint8[]" },
{ DMU_BSWAP_UINT64, FALSE, "other uint64[]" }, { DMU_BSWAP_UINT64, FALSE, TRUE, "other uint64[]" },
{ DMU_BSWAP_ZAP, TRUE, "other ZAP" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" },
{ DMU_BSWAP_ZAP, TRUE, "persistent error log" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" },
{ DMU_BSWAP_UINT8, TRUE, "SPA history" }, { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" },
{ DMU_BSWAP_UINT64, TRUE, "SPA history offsets" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" },
{ DMU_BSWAP_ZAP, TRUE, "Pool properties" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "Pool properties" },
{ DMU_BSWAP_ZAP, TRUE, "DSL permissions" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL permissions" },
{ DMU_BSWAP_ACL, TRUE, "ZFS ACL" }, { DMU_BSWAP_ACL, TRUE, TRUE, "ZFS ACL" },
{ DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" }, { DMU_BSWAP_UINT8, TRUE, TRUE, "ZFS SYSACL" },
{ DMU_BSWAP_UINT8, TRUE, "FUID table" }, { DMU_BSWAP_UINT8, TRUE, TRUE, "FUID table" },
{ DMU_BSWAP_UINT64, TRUE, "FUID table size" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" },
{ DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"}, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"},
{ DMU_BSWAP_ZAP, TRUE, "scan work queue" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group used" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group quota" },
{ DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"}, { DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"},
{ DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" },
{ DMU_BSWAP_ZAP, TRUE, "DDT statistics" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" },
{ DMU_BSWAP_UINT8, TRUE, "System attributes" }, { DMU_BSWAP_UINT8, TRUE, TRUE, "System attributes" },
{ DMU_BSWAP_ZAP, TRUE, "SA master node" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "SA master node" },
{ DMU_BSWAP_ZAP, TRUE, "SA attr registration" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr registration" },
{ DMU_BSWAP_ZAP, TRUE, "SA attr layouts" }, { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr layouts" },
{ DMU_BSWAP_ZAP, TRUE, "scan translations" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" },
{ DMU_BSWAP_UINT8, FALSE, "deduplicated block" }, { DMU_BSWAP_UINT8, FALSE, TRUE, "deduplicated block" },
{ DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL deadlist map" },
{ DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" }, { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL deadlist map hdr" },
{ DMU_BSWAP_ZAP, TRUE, "DSL dir clones" }, { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dir clones" },
{ DMU_BSWAP_UINT64, TRUE, "bpobj subobj" } { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" }
}; };
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
@ -198,6 +198,8 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
if (flags & DMU_READ_NO_PREFETCH) if (flags & DMU_READ_NO_PREFETCH)
db_flags |= DB_RF_NOPREFETCH; db_flags |= DB_RF_NOPREFETCH;
if (flags & DMU_READ_NO_DECRYPT)
db_flags |= DB_RF_NO_DECRYPT;
err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp); err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
if (err == 0) { if (err == 0) {
@ -221,6 +223,8 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
if (flags & DMU_READ_NO_PREFETCH) if (flags & DMU_READ_NO_PREFETCH)
db_flags |= DB_RF_NOPREFETCH; db_flags |= DB_RF_NOPREFETCH;
if (flags & DMU_READ_NO_DECRYPT)
db_flags |= DB_RF_NO_DECRYPT;
err = dmu_buf_hold_noread(os, object, offset, tag, dbp); err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
if (err == 0) { if (err == 0) {
@ -321,11 +325,18 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
* returns ENOENT, EIO, or 0. * returns ENOENT, EIO, or 0.
*/ */
int int
dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags,
dmu_buf_t **dbp)
{ {
dnode_t *dn; dnode_t *dn;
dmu_buf_impl_t *db; dmu_buf_impl_t *db;
int error; int error;
uint32_t db_flags = DB_RF_MUST_SUCCEED;
if (flags & DMU_READ_NO_PREFETCH)
db_flags |= DB_RF_NOPREFETCH;
if (flags & DMU_READ_NO_DECRYPT)
db_flags |= DB_RF_NO_DECRYPT;
error = dnode_hold(os, object, FTAG, &dn); error = dnode_hold(os, object, FTAG, &dn);
if (error) if (error)
@ -355,12 +366,24 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH)); error = dbuf_read(db, NULL, db_flags);
if (error) {
dnode_evict_bonus(dn);
dbuf_rele(db, tag);
*dbp = NULL;
return (error);
}
*dbp = &db->db; *dbp = &db->db;
return (0); return (0);
} }
int
dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp)
{
return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp));
}
/* /*
* returns ENOENT, EIO, or 0. * returns ENOENT, EIO, or 0.
* *
@ -601,8 +624,8 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
* indirect blocks prefeteched will be those that point to the blocks containing * indirect blocks prefeteched will be those that point to the blocks containing
* the data starting at offset, and continuing to offset + len. * the data starting at offset, and continuing to offset + len.
* *
* Note that if the indirect blocks above the blocks being prefetched are not in * Note that if the indirect blocks above the blocks being prefetched are not
* cache, they will be asychronously read in. * in cache, they will be asychronously read in.
*/ */
void void
dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
@ -1462,6 +1485,83 @@ dmu_return_arcbuf(arc_buf_t *buf)
arc_buf_destroy(buf, FTAG); arc_buf_destroy(buf, FTAG);
} }
void
dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
dbuf_assign_arcbuf(db, buf, tx);
}
void
dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
{
dmu_object_type_t type;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
uint64_t dsobj = dmu_objset_id(db->db_objset);
ASSERT3P(db->db_buf, !=, NULL);
ASSERT3U(dsobj, !=, 0);
dmu_buf_will_change_crypt_params(handle, tx);
DB_DNODE_ENTER(db);
type = DB_DNODE(db)->dn_type;
DB_DNODE_EXIT(db);
/*
* This technically violates the assumption the dmu code makes
* that dnode blocks are only released in syncing context.
*/
(void) arc_release(db->db_buf, db);
arc_convert_to_raw(db->db_buf, dsobj, byteorder, type, salt, iv, mac);
}
void
dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_t *handle, dmu_tx_t *tx)
{
dmu_buf_t *dst_handle;
dmu_buf_impl_t *dstdb;
dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
arc_buf_t *abuf;
uint64_t datalen;
boolean_t byteorder;
uint8_t salt[ZIO_DATA_SALT_LEN];
uint8_t iv[ZIO_DATA_IV_LEN];
uint8_t mac[ZIO_DATA_MAC_LEN];
ASSERT3P(srcdb->db_buf, !=, NULL);
/* hold the db that we want to write to */
VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle,
DMU_READ_NO_DECRYPT));
dstdb = (dmu_buf_impl_t *)dst_handle;
datalen = arc_buf_size(srcdb->db_buf);
/* allocated an arc buffer that matches the type of srcdb->db_buf */
if (arc_is_encrypted(srcdb->db_buf)) {
arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type,
datalen, arc_buf_lsize(srcdb->db_buf),
arc_get_compression(srcdb->db_buf));
} else {
/* we won't get a compressed db back from dmu_buf_hold() */
ASSERT3U(arc_get_compression(srcdb->db_buf),
==, ZIO_COMPRESS_OFF);
abuf = arc_loan_buf(os->os_spa,
DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen);
}
ASSERT3U(datalen, ==, arc_buf_size(abuf));
/* copy the data to the new buffer and assign it to the dstdb */
bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen);
dbuf_assign_arcbuf(dstdb, abuf, tx);
dmu_buf_rele(dst_handle, FTAG);
}
/* /*
* When possible directly assign passed loaned arc buffer to a dbuf. * When possible directly assign passed loaned arc buffer to a dbuf.
* If this is not possible copy the contents of passed arc buf via * If this is not possible copy the contents of passed arc buf via
@ -1537,7 +1637,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
BP_SET_LSIZE(bp, db->db_size); BP_SET_LSIZE(bp, db->db_size);
} else if (!BP_IS_EMBEDDED(bp)) { } else if (!BP_IS_EMBEDDED(bp)) {
ASSERT(BP_GET_LEVEL(bp) == 0); ASSERT(BP_GET_LEVEL(bp) == 0);
bp->blk_fill = 1; BP_SET_FILL(bp, 1);
} }
} }
} }
@ -1842,6 +1942,20 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
return (0); return (0);
} }
int
dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, dmu_tx_t *tx)
{
dnode_t *dn;
int err;
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
err = dnode_set_nlevels(dn, nlevels, tx);
dnode_rele(dn, FTAG);
return (err);
}
int int
dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
dmu_tx_t *tx) dmu_tx_t *tx)
@ -1916,6 +2030,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
boolean_t dedup = B_FALSE; boolean_t dedup = B_FALSE;
boolean_t nopwrite = B_FALSE; boolean_t nopwrite = B_FALSE;
boolean_t dedup_verify = os->os_dedup_verify; boolean_t dedup_verify = os->os_dedup_verify;
boolean_t encrypt = B_FALSE;
int copies = os->os_copies; int copies = os->os_copies;
/* /*
@ -2003,16 +2118,44 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
} }
zp->zp_checksum = checksum; /*
zp->zp_compress = compress; * All objects in an encrypted objset are protected from modification
ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); * via a MAC. Encrypted objects store their IV and salt in the last DVA
* in the bp, so we cannot use all copies. Encrypted objects are also
* not subject to nopwrite since writing the same data will still
* result in a new ciphertext. Only encrypted blocks can be dedup'd
* to avoid ambiguity in the dedup code since the DDT does not store
* object types.
*/
if (os->os_encrypted && (wp & WP_NOFILL) == 0) {
encrypt = B_TRUE;
if (DMU_OT_IS_ENCRYPTED(type)) {
copies = MIN(copies, SPA_DVAS_PER_BP - 1);
nopwrite = B_FALSE;
} else {
dedup = B_FALSE;
}
if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)
compress = ZIO_COMPRESS_EMPTY;
}
zp->zp_compress = compress;
zp->zp_checksum = checksum;
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
zp->zp_level = level; zp->zp_level = level;
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa)); zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
zp->zp_dedup = dedup; zp->zp_dedup = dedup;
zp->zp_dedup_verify = dedup && dedup_verify; zp->zp_dedup_verify = dedup && dedup_verify;
zp->zp_nopwrite = nopwrite; zp->zp_nopwrite = nopwrite;
zp->zp_encrypt = encrypt;
zp->zp_byteorder = ZFS_HOST_BYTEORDER;
bzero(zp->zp_salt, ZIO_DATA_SALT_LEN);
bzero(zp->zp_iv, ZIO_DATA_IV_LEN);
bzero(zp->zp_mac, ZIO_DATA_MAC_LEN);
ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
} }
/* /*
@ -2267,6 +2410,7 @@ EXPORT_SYMBOL(dmu_object_info_from_dnode);
EXPORT_SYMBOL(dmu_object_info_from_db); EXPORT_SYMBOL(dmu_object_info_from_db);
EXPORT_SYMBOL(dmu_object_size_from_db); EXPORT_SYMBOL(dmu_object_size_from_db);
EXPORT_SYMBOL(dmu_object_dnsize_from_db); EXPORT_SYMBOL(dmu_object_dnsize_from_db);
EXPORT_SYMBOL(dmu_object_set_nlevels);
EXPORT_SYMBOL(dmu_object_set_blocksize); EXPORT_SYMBOL(dmu_object_set_blocksize);
EXPORT_SYMBOL(dmu_object_set_checksum); EXPORT_SYMBOL(dmu_object_set_checksum);
EXPORT_SYMBOL(dmu_object_set_compress); EXPORT_SYMBOL(dmu_object_set_compress);

View File

@ -56,6 +56,7 @@
#include <sys/vdev.h> #include <sys/vdev.h>
#include <sys/policy.h> #include <sys/policy.h>
#include <sys/spa_impl.h> #include <sys/spa_impl.h>
#include <sys/dmu_send.h>
/* /*
* Needed to close a window in dnode_move() that allows the objset to be freed * Needed to close a window in dnode_move() that allows the objset to be freed
@ -391,16 +392,23 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (!BP_IS_HOLE(os->os_rootbp)) { if (!BP_IS_HOLE(os->os_rootbp)) {
arc_flags_t aflags = ARC_FLAG_WAIT; arc_flags_t aflags = ARC_FLAG_WAIT;
zbookmark_phys_t zb; zbookmark_phys_t zb;
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
if (DMU_OS_IS_L2CACHEABLE(os)) if (DMU_OS_IS_L2CACHEABLE(os))
aflags |= ARC_FLAG_L2CACHE; aflags |= ARC_FLAG_L2CACHE;
if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
ASSERT(BP_IS_AUTHENTICATED(bp));
zio_flags |= ZIO_FLAG_RAW;
}
dprintf_bp(os->os_rootbp, "reading %s", ""); dprintf_bp(os->os_rootbp, "reading %s", "");
err = arc_read(NULL, spa, os->os_rootbp, err = arc_read(NULL, spa, os->os_rootbp,
arc_getbuf_func, &os->os_phys_buf, arc_getbuf_func, &os->os_phys_buf,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (err != 0) { if (err != 0) {
kmem_free(os, sizeof (objset_t)); kmem_free(os, sizeof (objset_t));
/* convert checksum errors into IO errors */ /* convert checksum errors into IO errors */
@ -441,6 +449,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (ds != NULL) { if (ds != NULL) {
boolean_t needlock = B_FALSE; boolean_t needlock = B_FALSE;
os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
/* /*
* Note: it's valid to open the objset if the dataset is * Note: it's valid to open the objset if the dataset is
* long-held, in which case the pool_config lock will not * long-held, in which case the pool_config lock will not
@ -450,6 +460,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
needlock = B_TRUE; needlock = B_TRUE;
dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
} }
err = dsl_prop_register(ds, err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os); primary_cache_changed_cb, os);
@ -517,6 +528,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/* It's the meta-objset. */ /* It's the meta-objset. */
os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
os->os_compress = ZIO_COMPRESS_ON; os->os_compress = ZIO_COMPRESS_ON;
os->os_encrypted = B_FALSE;
os->os_copies = spa_max_replication(spa); os->os_copies = spa_max_replication(spa);
os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
os->os_dedup_verify = B_FALSE; os->os_dedup_verify = B_FALSE;
@ -603,16 +615,18 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
* can be held at a time. * can be held at a time.
*/ */
int int
dmu_objset_hold(const char *name, void *tag, objset_t **osp) dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
objset_t **osp)
{ {
dsl_pool_t *dp; dsl_pool_t *dp;
dsl_dataset_t *ds; dsl_dataset_t *ds;
int err; int err;
ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_pool_hold(name, tag, &dp); err = dsl_pool_hold(name, tag, &dp);
if (err != 0) if (err != 0)
return (err); return (err);
err = dsl_dataset_hold(dp, name, tag, &ds); err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds);
if (err != 0) { if (err != 0) {
dsl_pool_rele(dp, tag); dsl_pool_rele(dp, tag);
return (err); return (err);
@ -627,23 +641,38 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
return (err); return (err);
} }
int
dmu_objset_hold(const char *name, void *tag, objset_t **osp)
{
return (dmu_objset_hold_flags(name, B_FALSE, tag, osp));
}
static int static int
dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp) boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{ {
int err; int err;
err = dmu_objset_from_ds(ds, osp); err = dmu_objset_from_ds(ds, osp);
if (err != 0) { if (err != 0) {
dsl_dataset_disown(ds, tag); return (err);
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
dsl_dataset_disown(ds, tag);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} else if (!readonly && dsl_dataset_is_snapshot(ds)) { } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
dsl_dataset_disown(ds, tag);
return (SET_ERROR(EROFS)); return (SET_ERROR(EROFS));
} }
return (err);
/* if we are decrypting, we can now check MACs in os->os_phys_buf */
if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa,
ds->ds_object, B_FALSE);
if (err != 0)
return (err);
ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf));
}
return (0);
} }
/* /*
@ -653,49 +682,71 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
*/ */
int int
dmu_objset_own(const char *name, dmu_objset_type_t type, dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp) boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{ {
dsl_pool_t *dp; dsl_pool_t *dp;
dsl_dataset_t *ds; dsl_dataset_t *ds;
int err; int err;
ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_pool_hold(name, FTAG, &dp); err = dsl_pool_hold(name, FTAG, &dp);
if (err != 0) if (err != 0)
return (err); return (err);
err = dsl_dataset_own(dp, name, tag, &ds); err = dsl_dataset_own(dp, name, flags, tag, &ds);
if (err != 0) { if (err != 0) {
dsl_pool_rele(dp, FTAG); dsl_pool_rele(dp, FTAG);
return (err); return (err);
} }
err = dmu_objset_own_impl(ds, type, readonly, tag, osp); err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
if (err != 0) {
dsl_dataset_disown(ds, flags, tag);
dsl_pool_rele(dp, FTAG);
return (err);
}
dsl_pool_rele(dp, FTAG); dsl_pool_rele(dp, FTAG);
if (err == 0 && dmu_objset_userobjspace_upgradable(*osp)) if (dmu_objset_userobjspace_upgradable(*osp))
dmu_objset_userobjspace_upgrade(*osp); dmu_objset_userobjspace_upgrade(*osp);
return (err); return (0);
} }
int int
dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp) boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{ {
dsl_dataset_t *ds; dsl_dataset_t *ds;
int err; int err;
ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_dataset_own_obj(dp, obj, tag, &ds); err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
if (err != 0) if (err != 0)
return (err); return (err);
return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
if (err != 0) {
dsl_dataset_disown(ds, flags, tag);
return (err);
}
return (0);
}
void
dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
{
ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
dsl_pool_t *dp = dmu_objset_pool(os);
dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
dsl_pool_rele(dp, tag);
} }
void void
dmu_objset_rele(objset_t *os, void *tag) dmu_objset_rele(objset_t *os, void *tag)
{ {
dsl_pool_t *dp = dmu_objset_pool(os); dmu_objset_rele_flags(os, B_FALSE, tag);
dsl_dataset_rele(os->os_dsl_dataset, tag);
dsl_pool_rele(dp, tag);
} }
/* /*
@ -710,7 +761,7 @@ dmu_objset_rele(objset_t *os, void *tag)
* same name so that it can be partially torn down and reconstructed. * same name so that it can be partially torn down and reconstructed.
*/ */
void void
dmu_objset_refresh_ownership(objset_t *os, void *tag) dmu_objset_refresh_ownership(objset_t *os, boolean_t decrypt, void *tag)
{ {
dsl_pool_t *dp; dsl_pool_t *dp;
dsl_dataset_t *ds, *newds; dsl_dataset_t *ds, *newds;
@ -724,20 +775,22 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
dsl_dataset_name(ds, name); dsl_dataset_name(ds, name);
dp = dmu_objset_pool(os); dp = dmu_objset_pool(os);
dsl_pool_config_enter(dp, FTAG); dsl_pool_config_enter(dp, FTAG);
dmu_objset_disown(os, tag); dmu_objset_disown(os, decrypt, tag);
VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); VERIFY0(dsl_dataset_own(dp, name,
(decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, &newds));
VERIFY3P(newds, ==, os->os_dsl_dataset); VERIFY3P(newds, ==, os->os_dsl_dataset);
dsl_pool_config_exit(dp, FTAG); dsl_pool_config_exit(dp, FTAG);
} }
void void
dmu_objset_disown(objset_t *os, void *tag) dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
{ {
/* /*
* Stop upgrading thread * Stop upgrading thread
*/ */
dmu_objset_upgrade_stop(os); dmu_objset_upgrade_stop(os);
dsl_dataset_disown(os->os_dsl_dataset, tag); dsl_dataset_disown(os->os_dsl_dataset,
(decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
} }
void void
@ -820,6 +873,8 @@ dmu_objset_evict(objset_t *os)
} else { } else {
mutex_exit(&os->os_lock); mutex_exit(&os->os_lock);
} }
} }
void void
@ -866,16 +921,20 @@ dmu_objset_snap_cmtime(objset_t *os)
return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
} }
/* called from dsl for meta-objset */
objset_t * objset_t *
dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
dmu_objset_type_t type, dmu_tx_t *tx) dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx)
{ {
objset_t *os; objset_t *os;
dnode_t *mdn; dnode_t *mdn;
ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dmu_tx_is_syncing(tx));
if (blksz == 0)
blksz = DNODE_BLOCK_SIZE;
if (blksz == 0)
ibs = DN_MAX_INDBLKSHIFT;
if (ds != NULL) if (ds != NULL)
VERIFY0(dmu_objset_from_ds(ds, &os)); VERIFY0(dmu_objset_from_ds(ds, &os));
else else
@ -883,8 +942,8 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
mdn = DMU_META_DNODE(os); mdn = DMU_META_DNODE(os);
dnode_allocate(mdn, DMU_OT_DNODE, DNODE_BLOCK_SIZE, DN_MAX_INDBLKSHIFT, dnode_allocate(mdn, DMU_OT_DNODE, blksz, ibs, DMU_OT_NONE, 0,
DMU_OT_NONE, 0, DNODE_MIN_SLOTS, tx); DNODE_MIN_SLOTS, tx);
/* /*
* We don't want to have to increase the meta-dnode's nlevels * We don't want to have to increase the meta-dnode's nlevels
@ -898,22 +957,25 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
* to convergence, so minimizing its dn_nlevels matters. * to convergence, so minimizing its dn_nlevels matters.
*/ */
if (ds != NULL) { if (ds != NULL) {
int levels = 1; if (levels == 0) {
levels = 1;
/* /*
* Determine the number of levels necessary for the meta-dnode * Determine the number of levels necessary for the
* to contain DN_MAX_OBJECT dnodes. Note that in order to * meta-dnode to contain DN_MAX_OBJECT dnodes. Note
* ensure that we do not overflow 64 bits, there has to be * that in order to ensure that we do not overflow
* a nlevels that gives us a number of blocks > DN_MAX_OBJECT * 64 bits, there has to be a nlevels that gives us a
* but < 2^64. Therefore, * number of blocks > DN_MAX_OBJECT but < 2^64.
* (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)
* less than (64 - log2(DN_MAX_OBJECT)) (16). * (10) must be less than (64 - log2(DN_MAX_OBJECT))
*/ * (16).
while ((uint64_t)mdn->dn_nblkptr << */
(mdn->dn_datablkshift - DNODE_SHIFT + while ((uint64_t)mdn->dn_nblkptr <<
(levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) *
DN_MAX_OBJECT) (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
levels++; DN_MAX_OBJECT)
levels++;
}
mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
mdn->dn_nlevels = levels; mdn->dn_nlevels = levels;
@ -923,7 +985,13 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
ASSERT(type != DMU_OST_ANY); ASSERT(type != DMU_OST_ANY);
ASSERT(type < DMU_OST_NUMTYPES); ASSERT(type < DMU_OST_NUMTYPES);
os->os_phys->os_type = type; os->os_phys->os_type = type;
if (dmu_objset_userused_enabled(os)) {
/*
* Enable user accounting if it is enabled and this is not an
* encrypted receive.
*/
if (dmu_objset_userused_enabled(os) &&
(!os->os_encrypted || !dmu_objset_is_receiving(os))) {
os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
if (dmu_objset_userobjused_enabled(os)) { if (dmu_objset_userobjused_enabled(os)) {
ds->ds_feature_activation_needed[ ds->ds_feature_activation_needed[
@ -939,6 +1007,14 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
return (os); return (os);
} }
/* called from dsl for meta-objset */
objset_t *
dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
dmu_objset_type_t type, dmu_tx_t *tx)
{
return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx));
}
typedef struct dmu_objset_create_arg { typedef struct dmu_objset_create_arg {
const char *doca_name; const char *doca_name;
cred_t *doca_cred; cred_t *doca_cred;
@ -947,6 +1023,7 @@ typedef struct dmu_objset_create_arg {
void *doca_userarg; void *doca_userarg;
dmu_objset_type_t doca_type; dmu_objset_type_t doca_type;
uint64_t doca_flags; uint64_t doca_flags;
dsl_crypto_params_t *doca_dcp;
} dmu_objset_create_arg_t; } dmu_objset_create_arg_t;
/*ARGSUSED*/ /*ARGSUSED*/
@ -972,8 +1049,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG); dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EEXIST)); return (SET_ERROR(EEXIST));
} }
error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp);
if (error != 0) {
dsl_dir_rele(pdd, FTAG);
return (error);
}
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
doca->doca_cred); doca->doca_cred);
dsl_dir_rele(pdd, FTAG); dsl_dir_rele(pdd, FTAG);
return (error); return (error);
@ -990,13 +1075,15 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
uint64_t obj; uint64_t obj;
blkptr_t *bp; blkptr_t *bp;
objset_t *os; objset_t *os;
zio_t *rzio;
VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
doca->doca_cred, tx); doca->doca_cred, doca->doca_dcp, tx);
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj,
DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bp = dsl_dataset_get_blkptr(ds); bp = dsl_dataset_get_blkptr(ds);
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
@ -1008,18 +1095,56 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
doca->doca_cred, tx); doca->doca_cred, tx);
} }
/*
* The doca_userfunc() will write out some data that needs to be
* encrypted if the dataset is encrypted (specifically the root
* directory). This data must be written out before the encryption
* key mapping is removed by dsl_dataset_rele_flags(). Force the
* I/O to occur immediately by invoking the relevant sections of
* dsl_pool_sync().
*/
if (os->os_encrypted) {
dsl_dataset_t *tmpds = NULL;
boolean_t need_sync_done = B_FALSE;
rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
tmpds = txg_list_remove(&dp->dp_dirty_datasets, tx->tx_txg);
if (tmpds != NULL) {
ASSERT3P(ds, ==, tmpds);
dsl_dataset_sync(ds, rzio, tx);
need_sync_done = B_TRUE;
}
VERIFY0(zio_wait(rzio));
dmu_objset_do_userquota_updates(os, tx);
taskq_wait(dp->dp_sync_taskq);
rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
tmpds = txg_list_remove(&dp->dp_dirty_datasets, tx->tx_txg);
if (tmpds != NULL) {
ASSERT3P(ds, ==, tmpds);
dmu_buf_rele(ds->ds_dbuf, ds);
dsl_dataset_sync(ds, rzio, tx);
}
VERIFY0(zio_wait(rzio));
if (need_sync_done)
dsl_dataset_sync_done(ds, tx);
}
spa_history_log_internal_ds(ds, "create", tx, ""); spa_history_log_internal_ds(ds, "create", tx, "");
zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE); zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
dsl_dir_rele(pdd, FTAG); dsl_dir_rele(pdd, FTAG);
} }
int int
dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg)
{ {
dmu_objset_create_arg_t doca; dmu_objset_create_arg_t doca;
dsl_crypto_params_t tmp_dcp = { 0 };
doca.doca_name = name; doca.doca_name = name;
doca.doca_cred = CRED(); doca.doca_cred = CRED();
@ -1028,9 +1153,19 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
doca.doca_userarg = arg; doca.doca_userarg = arg;
doca.doca_type = type; doca.doca_type = type;
/*
* Some callers (mostly for testing) do not provide a dcp on their
* own but various code inside the sync task will require it to be
* allocated. Rather than adding NULL checks throughout this code
* or adding dummy dcp's to all of the callers we simply create a
* dummy one here and use that. This zero dcp will have the same
* effect as asking for inheritence of all encryption params.
*/
doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
return (dsl_sync_task(name, return (dsl_sync_task(name,
dmu_objset_create_check, dmu_objset_create_sync, &doca, dmu_objset_create_check, dmu_objset_create_sync, &doca,
5, ZFS_SPACE_CHECK_NORMAL)); 6, ZFS_SPACE_CHECK_NORMAL));
} }
typedef struct dmu_objset_clone_arg { typedef struct dmu_objset_clone_arg {
@ -1070,18 +1205,29 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG); dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EDQUOT)); return (SET_ERROR(EDQUOT));
} }
dsl_dir_rele(pdd, FTAG);
error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
if (error != 0) if (error != 0) {
dsl_dir_rele(pdd, FTAG);
return (error); return (error);
}
/* You can only clone snapshots, not the head datasets. */ /* You can only clone snapshots, not the head datasets. */
if (!origin->ds_is_snapshot) { if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir);
if (error != 0) {
dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(pdd, FTAG);
return (error);
}
dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(pdd, FTAG);
return (0); return (0);
} }
@ -1101,7 +1247,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
obj = dsl_dataset_create_sync(pdd, tail, origin, 0, obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
doca->doca_cred, tx); doca->doca_cred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
dsl_dataset_name(origin, namebuf); dsl_dataset_name(origin, namebuf);
@ -1124,7 +1270,7 @@ dmu_objset_clone(const char *clone, const char *origin)
return (dsl_sync_task(clone, return (dsl_sync_task(clone,
dmu_objset_clone_check, dmu_objset_clone_sync, &doca, dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
5, ZFS_SPACE_CHECK_NORMAL)); 6, ZFS_SPACE_CHECK_NORMAL));
} }
int int
@ -1232,6 +1378,7 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
blkptr_t *bp = zio->io_bp; blkptr_t *bp = zio->io_bp;
objset_t *os = arg; objset_t *os = arg;
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
uint64_t fill = 0;
ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
@ -1243,9 +1390,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
* objects that are stored in the objset_phys_t -- the meta * objects that are stored in the objset_phys_t -- the meta
* dnode and user/group accounting objects). * dnode and user/group accounting objects).
*/ */
bp->blk_fill = 0;
for (i = 0; i < dnp->dn_nblkptr; i++) for (i = 0; i < dnp->dn_nblkptr; i++)
bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
BP_SET_FILL(bp, fill);
if (os->os_dsl_dataset != NULL) if (os->os_dsl_dataset != NULL)
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
*os->os_rootbp = *bp; *os->os_rootbp = *bp;
@ -1334,6 +1483,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
dmu_write_policy(os, NULL, 0, 0, &zp); dmu_write_policy(os, NULL, 0, 0, &zp);
/*
* If we are either claiming the ZIL or doing a raw receive write out
* the os_phys_buf raw. Neither of these actions will effect the MAC
* at this point.
*/
if (arc_is_unauthenticated(os->os_phys_buf) || os->os_next_write_raw) {
ASSERT(os->os_encrypted);
os->os_next_write_raw = B_FALSE;
arc_convert_to_raw(os->os_phys_buf,
os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
DMU_OT_OBJSET, NULL, NULL, NULL);
}
zio = arc_write(pio, os->os_spa, tx->tx_txg, zio = arc_write(pio, os->os_spa, tx->tx_txg,
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
@ -1357,7 +1519,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
txgoff = tx->tx_txg & TXG_MASK; txgoff = tx->tx_txg & TXG_MASK;
if (dmu_objset_userused_enabled(os)) { if (dmu_objset_userused_enabled(os) &&
(!os->os_encrypted || !dmu_objset_is_receiving(os))) {
/* /*
* We must create the list here because it uses the * We must create the list here because it uses the
* dn_dirty_link[] of this txg. But it may already * dn_dirty_link[] of this txg. But it may already
@ -1637,6 +1800,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(os)) if (!dmu_objset_userused_enabled(os))
return; return;
/* if this is a raw receive just return and handle accounting later */
if (os->os_encrypted && dmu_objset_is_receiving(os))
return;
/* Allocate the user/groupused objects if necessary. */ /* Allocate the user/groupused objects if necessary. */
if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
VERIFY0(zap_create_claim(os, VERIFY0(zap_create_claim(os,
@ -1716,6 +1883,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(dn->dn_objset)) if (!dmu_objset_userused_enabled(dn->dn_objset))
return; return;
/*
* Raw receives introduce a problem with user accounting. Raw
* receives cannot update the user accounting info because the
* user ids and the sizes are encrypted. To guarantee that we
* never end up with bad user accounting, we simply disable it
* during raw receives. We also disable this for normal receives
* so that an incremental raw receive may be done on top of an
* existing non-raw receive.
*/
if (os->os_encrypted && dmu_objset_is_receiving(os))
return;
if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
DN_ID_CHKED_SPILL))) DN_ID_CHKED_SPILL)))
return; return;
@ -2493,8 +2672,10 @@ EXPORT_SYMBOL(dmu_objset_ds);
EXPORT_SYMBOL(dmu_objset_type); EXPORT_SYMBOL(dmu_objset_type);
EXPORT_SYMBOL(dmu_objset_name); EXPORT_SYMBOL(dmu_objset_name);
EXPORT_SYMBOL(dmu_objset_hold); EXPORT_SYMBOL(dmu_objset_hold);
EXPORT_SYMBOL(dmu_objset_hold_flags);
EXPORT_SYMBOL(dmu_objset_own); EXPORT_SYMBOL(dmu_objset_own);
EXPORT_SYMBOL(dmu_objset_rele); EXPORT_SYMBOL(dmu_objset_rele);
EXPORT_SYMBOL(dmu_objset_rele_flags);
EXPORT_SYMBOL(dmu_objset_disown); EXPORT_SYMBOL(dmu_objset_disown);
EXPORT_SYMBOL(dmu_objset_from_ds); EXPORT_SYMBOL(dmu_objset_from_ds);
EXPORT_SYMBOL(dmu_objset_create); EXPORT_SYMBOL(dmu_objset_create);
@ -2512,6 +2693,7 @@ EXPORT_SYMBOL(dmu_objset_dnodesize);
EXPORT_SYMBOL(dmu_objset_sync); EXPORT_SYMBOL(dmu_objset_sync);
EXPORT_SYMBOL(dmu_objset_is_dirty); EXPORT_SYMBOL(dmu_objset_is_dirty);
EXPORT_SYMBOL(dmu_objset_create_impl_dnstats);
EXPORT_SYMBOL(dmu_objset_create_impl); EXPORT_SYMBOL(dmu_objset_create_impl);
EXPORT_SYMBOL(dmu_objset_open_impl); EXPORT_SYMBOL(dmu_objset_open_impl);
EXPORT_SYMBOL(dmu_objset_evict); EXPORT_SYMBOL(dmu_objset_evict);

File diff suppressed because it is too large Load Diff

View File

@ -132,7 +132,7 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
claim_txg); claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT));
zil_free(zilog); zil_free(zilog);
} }
@ -181,6 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
const blkptr_t *bp, const zbookmark_phys_t *zb) const blkptr_t *bp, const zbookmark_phys_t *zb)
{ {
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
int zio_flags = ZIO_FLAG_CANFAIL;
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
return; return;
@ -196,8 +197,11 @@ traverse_prefetch_metadata(traverse_data_t *td,
if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
return; return;
if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
zio_flags |= ZIO_FLAG_RAW;
(void) arc_read(NULL, td->td_spa, bp, NULL, NULL, (void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
} }
static boolean_t static boolean_t
@ -294,6 +298,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
zbookmark_phys_t *czb; zbookmark_phys_t *czb;
ASSERT(!BP_IS_PROTECTED(bp));
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0) if (err != 0)
@ -324,14 +330,23 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_FLAG_WAIT; uint32_t flags = ARC_FLAG_WAIT;
uint32_t zio_flags = ZIO_FLAG_CANFAIL;
int32_t i; int32_t i;
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
dnode_phys_t *child_dnp; dnode_phys_t *child_dnp;
/*
* dnode blocks might have their bonus buffers encrypted, so
* we must be careful to honor TRAVERSE_NO_DECRYPT
*/
if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
zio_flags |= ZIO_FLAG_RAW;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
if (err != 0) if (err != 0)
goto post; goto post;
child_dnp = buf->b_data; child_dnp = buf->b_data;
for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) { for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
@ -347,11 +362,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
break; break;
} }
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
uint32_t zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t flags = ARC_FLAG_WAIT; arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp; objset_phys_t *osp;
if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
zio_flags |= ZIO_FLAG_RAW;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
if (err != 0) if (err != 0)
goto post; goto post;
@ -500,6 +519,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{ {
prefetch_data_t *pfd = arg; prefetch_data_t *pfd = arg;
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
ASSERT(pfd->pd_bytes_fetched >= 0); ASSERT(pfd->pd_bytes_fetched >= 0);
@ -518,8 +538,11 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
cv_broadcast(&pfd->pd_cv); cv_broadcast(&pfd->pd_cv);
mutex_exit(&pfd->pd_mtx); mutex_exit(&pfd->pd_mtx);
if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
zio_flags |= ZIO_FLAG_RAW;
(void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb); zio_flags, &aflags, zb);
return (0); return (0);
} }
@ -599,13 +622,17 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
/* See comment on ZIL traversal in dsl_scan_visitds. */ /* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) { if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
uint32_t flags = ARC_FLAG_WAIT; uint32_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp; objset_phys_t *osp;
arc_buf_t *buf; arc_buf_t *buf;
err = arc_read(NULL, td->td_spa, rootbp, if ((td->td_flags & TRAVERSE_NO_DECRYPT) &&
arc_getbuf_func, &buf, BP_IS_PROTECTED(rootbp))
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, czb); zio_flags |= ZIO_FLAG_RAW;
err = arc_read(NULL, td->td_spa, rootbp, arc_getbuf_func,
&buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, czb);
if (err != 0) if (err != 0)
return (err); return (err);

View File

@ -1246,7 +1246,12 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
rw_exit(&mdn->dn_struct_rwlock); rw_exit(&mdn->dn_struct_rwlock);
if (db == NULL) if (db == NULL)
return (SET_ERROR(EIO)); return (SET_ERROR(EIO));
err = dbuf_read(db, NULL, DB_RF_CANFAIL);
/*
* We do not need to decrypt to read the dnode so it doesn't matter
* if we get the encrypted or decrypted version.
*/
err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT);
if (err) { if (err) {
dbuf_rele(db, FTAG); dbuf_rele(db, FTAG);
return (err); return (err);
@ -1550,11 +1555,73 @@ fail:
return (SET_ERROR(ENOTSUP)); return (SET_ERROR(ENOTSUP));
} }
static void
dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx)
{
uint64_t txgoff = tx->tx_txg & TXG_MASK;
int old_nlevels = dn->dn_nlevels;
dmu_buf_impl_t *db;
list_t *list;
dbuf_dirty_record_t *new, *dr, *dr_next;
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
dn->dn_nlevels = new_nlevels;
ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
dn->dn_next_nlevels[txgoff] = new_nlevels;
/* dirty the left indirects */
db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
ASSERT(db != NULL);
new = dbuf_dirty(db, tx);
dbuf_rele(db, FTAG);
/* transfer the dirty records to the new indirect */
mutex_enter(&dn->dn_mtx);
mutex_enter(&new->dt.di.dr_mtx);
list = &dn->dn_dirty_records[txgoff];
for (dr = list_head(list); dr; dr = dr_next) {
dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
if (dr->dr_dbuf->db_level != new_nlevels-1 &&
dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
list_remove(&dn->dn_dirty_records[txgoff], dr);
list_insert_tail(&new->dt.di.dr_children, dr);
dr->dr_parent = new;
}
}
mutex_exit(&new->dt.di.dr_mtx);
mutex_exit(&dn->dn_mtx);
}
int
dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx)
{
int ret = 0;
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
if (dn->dn_nlevels == nlevels) {
ret = 0;
goto out;
} else if (nlevels < dn->dn_nlevels) {
ret = SET_ERROR(EINVAL);
goto out;
}
dnode_set_nlevels_impl(dn, nlevels, tx);
out:
rw_exit(&dn->dn_struct_rwlock);
return (ret);
}
/* read-holding callers must not rely on the lock being continuously held */ /* read-holding callers must not rely on the lock being continuously held */
void void
dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
{ {
uint64_t txgoff = tx->tx_txg & TXG_MASK;
int epbs, new_nlevels; int epbs, new_nlevels;
uint64_t sz; uint64_t sz;
@ -1594,41 +1661,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
ASSERT3U(new_nlevels, <=, DN_MAX_LEVELS); ASSERT3U(new_nlevels, <=, DN_MAX_LEVELS);
if (new_nlevels > dn->dn_nlevels) { if (new_nlevels > dn->dn_nlevels)
int old_nlevels = dn->dn_nlevels; dnode_set_nlevels_impl(dn, new_nlevels, tx);
dmu_buf_impl_t *db;
list_t *list;
dbuf_dirty_record_t *new, *dr, *dr_next;
dn->dn_nlevels = new_nlevels;
ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
dn->dn_next_nlevels[txgoff] = new_nlevels;
/* dirty the left indirects */
db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
ASSERT(db != NULL);
new = dbuf_dirty(db, tx);
dbuf_rele(db, FTAG);
/* transfer the dirty records to the new indirect */
mutex_enter(&dn->dn_mtx);
mutex_enter(&new->dt.di.dr_mtx);
list = &dn->dn_dirty_records[txgoff];
for (dr = list_head(list); dr; dr = dr_next) {
dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
if (dr->dr_dbuf->db_level != new_nlevels-1 &&
dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
list_remove(&dn->dn_dirty_records[txgoff], dr);
list_insert_tail(&new->dt.di.dr_children, dr);
dr->dr_parent = new;
}
}
mutex_exit(&new->dt.di.dr_mtx);
mutex_exit(&dn->dn_mtx);
}
out: out:
if (have_read) if (have_read)
@ -1987,7 +2021,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
*/ */
return (SET_ERROR(ESRCH)); return (SET_ERROR(ESRCH));
} }
error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); error = dbuf_read(db, NULL,
DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT);
if (error) { if (error) {
dbuf_rele(db, FTAG); dbuf_rele(db, FTAG);
return (error); return (error);

View File

@ -31,6 +31,7 @@
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/dmu_tx.h> #include <sys/dmu_tx.h>
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/dmu_send.h>
#include <sys/dsl_dataset.h> #include <sys/dsl_dataset.h>
#include <sys/spa.h> #include <sys/spa.h>
#include <sys/range_tree.h> #include <sys/range_tree.h>
@ -557,6 +558,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
void void
dnode_sync(dnode_t *dn, dmu_tx_t *tx) dnode_sync(dnode_t *dn, dmu_tx_t *tx)
{ {
objset_t *os = dn->dn_objset;
dnode_phys_t *dnp = dn->dn_phys; dnode_phys_t *dnp = dn->dn_phys;
int txgoff = tx->tx_txg & TXG_MASK; int txgoff = tx->tx_txg & TXG_MASK;
list_t *list = &dn->dn_dirty_records[txgoff]; list_t *list = &dn->dn_dirty_records[txgoff];
@ -572,8 +574,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
if (dmu_objset_userused_enabled(dn->dn_objset) && /*
!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { * Do user accounting if it is enabled and this is not
* an encrypted receive.
*/
if (dmu_objset_userused_enabled(os) &&
!DMU_OBJECT_IS_SPECIAL(dn->dn_object) &&
(!os->os_encrypted || !dmu_objset_is_receiving(os))) {
mutex_enter(&dn->dn_mtx); mutex_enter(&dn->dn_mtx);
dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
dn->dn_oldflags = dn->dn_phys->dn_flags; dn->dn_oldflags = dn->dn_phys->dn_flags;
@ -584,7 +591,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx); mutex_exit(&dn->dn_mtx);
dmu_objset_userquota_get_ids(dn, B_FALSE, tx); dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
} else { } else {
/* Once we account for it, we should always account for it. */ /* Once we account for it, we should always account for it */
ASSERT(!(dn->dn_phys->dn_flags & ASSERT(!(dn->dn_phys->dn_flags &
DNODE_FLAG_USERUSED_ACCOUNTED)); DNODE_FLAG_USERUSED_ACCOUNTED));
ASSERT(!(dn->dn_phys->dn_flags & ASSERT(!(dn->dn_phys->dn_flags &

2611
module/zfs/dsl_crypt.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -386,8 +386,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
} }
int int
dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
dsl_dataset_t **dsp) ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
{ {
objset_t *mos = dp->dp_meta_objset; objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf; dmu_buf_t *dbuf;
@ -548,11 +548,27 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
*dsp = ds; *dsp = ds;
if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) {
err = spa_keystore_create_mapping(dp->dp_spa, ds, ds);
if (err != 0) {
dsl_dataset_rele(ds, tag);
return (SET_ERROR(EACCES));
}
}
return (0); return (0);
} }
int int
dsl_dataset_hold(dsl_pool_t *dp, const char *name, dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **dsp)
{
return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp));
}
int
dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp) void *tag, dsl_dataset_t **dsp)
{ {
dsl_dir_t *dd; dsl_dir_t *dd;
@ -568,7 +584,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
ASSERT(dsl_pool_config_held(dp)); ASSERT(dsl_pool_config_held(dp));
obj = dsl_dir_phys(dd)->dd_head_dataset_obj; obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
if (obj != 0) if (obj != 0)
err = dsl_dataset_hold_obj(dp, obj, tag, &ds); err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds);
else else
err = SET_ERROR(ENOENT); err = SET_ERROR(ENOENT);
@ -577,16 +593,18 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
dsl_dataset_t *snap_ds; dsl_dataset_t *snap_ds;
if (*snapname++ != '@') { if (*snapname++ != '@') {
dsl_dataset_rele(ds, tag); dsl_dataset_rele_flags(ds, flags, tag);
dsl_dir_rele(dd, FTAG); dsl_dir_rele(dd, FTAG);
return (SET_ERROR(ENOENT)); return (SET_ERROR(ENOENT));
} }
dprintf("looking for snapshot '%s'\n", snapname); dprintf("looking for snapshot '%s'\n", snapname);
err = dsl_dataset_snap_lookup(ds, snapname, &obj); err = dsl_dataset_snap_lookup(ds, snapname, &obj);
if (err == 0) if (err == 0) {
err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag,
dsl_dataset_rele(ds, tag); &snap_ds);
}
dsl_dataset_rele_flags(ds, flags, tag);
if (err == 0) { if (err == 0) {
mutex_enter(&snap_ds->ds_lock); mutex_enter(&snap_ds->ds_lock);
@ -604,14 +622,21 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
} }
int int
dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dataset_t **dsp)
{
return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));
}
int
dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp) void *tag, dsl_dataset_t **dsp)
{ {
int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);
if (err != 0) if (err != 0)
return (err); return (err);
if (!dsl_dataset_tryown(*dsp, tag)) { if (!dsl_dataset_tryown(*dsp, tag)) {
dsl_dataset_rele(*dsp, tag); dsl_dataset_rele_flags(*dsp, flags, tag);
*dsp = NULL; *dsp = NULL;
return (SET_ERROR(EBUSY)); return (SET_ERROR(EBUSY));
} }
@ -619,14 +644,14 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
} }
int int
dsl_dataset_own(dsl_pool_t *dp, const char *name, dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp) void *tag, dsl_dataset_t **dsp)
{ {
int err = dsl_dataset_hold(dp, name, tag, dsp); int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);
if (err != 0) if (err != 0)
return (err); return (err);
if (!dsl_dataset_tryown(*dsp, tag)) { if (!dsl_dataset_tryown(*dsp, tag)) {
dsl_dataset_rele(*dsp, tag); dsl_dataset_rele_flags(*dsp, flags, tag);
return (SET_ERROR(EBUSY)); return (SET_ERROR(EBUSY));
} }
return (0); return (0);
@ -707,13 +732,25 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
} }
void void
dsl_dataset_rele(dsl_dataset_t *ds, void *tag) dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
{ {
if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 &&
(flags & DS_HOLD_FLAG_DECRYPT)) {
(void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa,
ds->ds_object, ds);
}
dmu_buf_rele(ds->ds_dbuf, tag); dmu_buf_rele(ds->ds_dbuf, tag);
} }
void void
dsl_dataset_disown(dsl_dataset_t *ds, void *tag) dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
{
dsl_dataset_rele_flags(ds, 0, tag);
}
void
dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
{ {
ASSERT3P(ds->ds_owner, ==, tag); ASSERT3P(ds->ds_owner, ==, tag);
ASSERT(ds->ds_dbuf != NULL); ASSERT(ds->ds_dbuf != NULL);
@ -722,7 +759,7 @@ dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
ds->ds_owner = NULL; ds->ds_owner = NULL;
mutex_exit(&ds->ds_lock); mutex_exit(&ds->ds_lock);
dsl_dataset_long_rele(ds, tag); dsl_dataset_long_rele(ds, tag);
dsl_dataset_rele(ds, tag); dsl_dataset_rele_flags(ds, flags, tag);
} }
boolean_t boolean_t
@ -751,7 +788,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds)
return (rv); return (rv);
} }
static void void
dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
{ {
spa_t *spa = dmu_tx_pool(tx)->dp_spa; spa_t *spa = dmu_tx_pool(tx)->dp_spa;
@ -781,7 +818,7 @@ dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
uint64_t uint64_t
dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx) dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx)
{ {
dsl_pool_t *dp = dd->dd_pool; dsl_pool_t *dp = dd->dd_pool;
dmu_buf_t *dbuf; dmu_buf_t *dbuf;
@ -881,6 +918,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
} }
} }
/* handle encryption */
dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx);
if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
@ -903,6 +943,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
zio_t *zio; zio_t *zio;
bzero(&os->os_zil_header, sizeof (os->os_zil_header)); bzero(&os->os_zil_header, sizeof (os->os_zil_header));
if (os->os_encrypted)
os->os_next_write_raw = B_TRUE;
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
dsl_dataset_sync(ds, zio, tx); dsl_dataset_sync(ds, zio, tx);
@ -916,7 +958,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
uint64_t uint64_t
dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) dsl_dataset_t *origin, uint64_t flags, cred_t *cr,
dsl_crypto_params_t *dcp, dmu_tx_t *tx)
{ {
dsl_pool_t *dp = pdd->dd_pool; dsl_pool_t *dp = pdd->dd_pool;
uint64_t dsobj, ddobj; uint64_t dsobj, ddobj;
@ -928,7 +971,7 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
dsobj = dsl_dataset_create_sync_dd(dd, origin, dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp,
flags & ~DS_CREATE_FLAG_NODIRTY, tx); flags & ~DS_CREATE_FLAG_NODIRTY, tx);
dsl_deleg_set_create_perms(dd, tx, cr); dsl_deleg_set_create_perms(dd, tx, cr);
@ -1821,6 +1864,10 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
DS_FIELD_RESUME_COMPRESSOK) == 0) { DS_FIELD_RESUME_COMPRESSOK) == 0) {
fnvlist_add_boolean(token_nv, "compressok"); fnvlist_add_boolean(token_nv, "compressok");
} }
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_RAWOK) == 0) {
fnvlist_add_boolean(token_nv, "rawok");
}
packed = fnvlist_pack(token_nv, &packed_size); packed = fnvlist_pack(token_nv, &packed_size);
fnvlist_free(token_nv); fnvlist_free(token_nv);
compressed = kmem_alloc(packed_size, KM_SLEEP); compressed = kmem_alloc(packed_size, KM_SLEEP);
@ -1851,6 +1898,7 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
void void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{ {
int err;
dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_pool_t *dp = ds->ds_dir->dd_pool;
uint64_t refd, avail, uobjs, aobjs, ratio; uint64_t refd, avail, uobjs, aobjs, ratio;
@ -1901,12 +1949,12 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
ds->ds_userrefs); ds->ds_userrefs);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
DS_IS_DEFER_DESTROY(ds) ? 1 : 0); DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
dsl_dataset_crypt_stats(ds, nv);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
uint64_t written, comp, uncomp; uint64_t written, comp, uncomp;
dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_pool_t *dp = ds->ds_dir->dd_pool;
dsl_dataset_t *prev; dsl_dataset_t *prev;
int err;
err = dsl_dataset_hold_obj(dp, err = dsl_dataset_hold_obj(dp,
dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
@ -2340,7 +2388,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
fnvlist_add_string(ddra->ddra_result, "target", namebuf); fnvlist_add_string(ddra->ddra_result, "target", namebuf);
cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
@ -2427,6 +2475,23 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EXDEV)); return (SET_ERROR(EXDEV));
} }
snap = list_head(&ddpa->shared_snaps);
if (snap == NULL) {
err = SET_ERROR(ENOENT);
goto out;
}
origin_ds = snap->ds;
/*
* Encrypted clones share a DSL Crypto Key with their origin's dsl dir.
* When doing a promote we must make sure the encryption root for
* both the target and the target's origin does not change to avoid
* needing to rewrap encryption keys
*/
err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir);
if (err != 0)
goto out;
/* /*
* Compute and check the amount of space to transfer. Since this is * Compute and check the amount of space to transfer. Since this is
* so expensive, don't do the preliminary check. * so expensive, don't do the preliminary check.
@ -2436,13 +2501,6 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
return (0); return (0);
} }
snap = list_head(&ddpa->shared_snaps);
if (snap == NULL) {
err = SET_ERROR(ENOENT);
goto out;
}
origin_ds = snap->ds;
/* compute origin's new unique space */ /* compute origin's new unique space */
snap = list_tail(&ddpa->clone_snaps); snap = list_tail(&ddpa->clone_snaps);
ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
@ -2611,6 +2669,8 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
NULL, FTAG, &odd)); NULL, FTAG, &odd));
dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx);
/* change origin's next snap */ /* change origin's next snap */
dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj;
@ -3692,11 +3752,14 @@ MODULE_PARM_DESC(zfs_max_recordsize, "Max allowed record size");
#endif #endif
EXPORT_SYMBOL(dsl_dataset_hold); EXPORT_SYMBOL(dsl_dataset_hold);
EXPORT_SYMBOL(dsl_dataset_hold_flags);
EXPORT_SYMBOL(dsl_dataset_hold_obj); EXPORT_SYMBOL(dsl_dataset_hold_obj);
EXPORT_SYMBOL(dsl_dataset_hold_obj_flags);
EXPORT_SYMBOL(dsl_dataset_own); EXPORT_SYMBOL(dsl_dataset_own);
EXPORT_SYMBOL(dsl_dataset_own_obj); EXPORT_SYMBOL(dsl_dataset_own_obj);
EXPORT_SYMBOL(dsl_dataset_name); EXPORT_SYMBOL(dsl_dataset_name);
EXPORT_SYMBOL(dsl_dataset_rele); EXPORT_SYMBOL(dsl_dataset_rele);
EXPORT_SYMBOL(dsl_dataset_rele_flags);
EXPORT_SYMBOL(dsl_dataset_disown); EXPORT_SYMBOL(dsl_dataset_disown);
EXPORT_SYMBOL(dsl_dataset_tryown); EXPORT_SYMBOL(dsl_dataset_tryown);
EXPORT_SYMBOL(dsl_dataset_create_sync); EXPORT_SYMBOL(dsl_dataset_create_sync);

View File

@ -598,8 +598,8 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
ka.ds = ds; ka.ds = ds;
ka.tx = tx; ka.tx = tx;
VERIFY0(traverse_dataset(ds, VERIFY0(traverse_dataset(ds,
dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST, dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST |
kill_blkptr, &ka)); TRAVERSE_NO_DECRYPT, kill_blkptr, &ka));
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
dsl_dataset_phys(ds)->ds_unique_bytes == 0); dsl_dataset_phys(ds)->ds_unique_bytes == 0);
} }
@ -706,6 +706,11 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
for (t = 0; t < DD_USED_NUM; t++) for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
if (dd->dd_crypto_obj != 0) {
dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx);
(void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
}
VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
@ -951,7 +956,8 @@ dsl_destroy_head(const char *name)
* remove the objects from open context so that the txg sync * remove the objects from open context so that the txg sync
* is not too long. * is not too long.
*/ */
error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE,
FTAG, &os);
if (error == 0) { if (error == 0) {
uint64_t obj; uint64_t obj;
uint64_t prev_snap_txg = uint64_t prev_snap_txg =
@ -963,7 +969,7 @@ dsl_destroy_head(const char *name)
(void) dmu_free_long_object(os, obj); (void) dmu_free_long_object(os, obj);
/* sync out all frees */ /* sync out all frees */
txg_wait_synced(dmu_objset_pool(os), 0); txg_wait_synced(dmu_objset_pool(os), 0);
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_FALSE, FTAG);
} }
} }

View File

@ -159,6 +159,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
{ {
dmu_buf_t *dbuf; dmu_buf_t *dbuf;
dsl_dir_t *dd; dsl_dir_t *dd;
dmu_object_info_t doi;
int err; int err;
ASSERT(dsl_pool_config_held(dp)); ASSERT(dsl_pool_config_held(dp));
@ -167,14 +168,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
if (err != 0) if (err != 0)
return (err); return (err);
dd = dmu_buf_get_user(dbuf); dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
{ dmu_object_info_from_db(dbuf, &doi);
dmu_object_info_t doi; ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
dmu_object_info_from_db(dbuf, &doi); ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
}
#endif
if (dd == NULL) { if (dd == NULL) {
dsl_dir_t *winner; dsl_dir_t *winner;
@ -182,6 +180,15 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_object = ddobj; dd->dd_object = ddobj;
dd->dd_dbuf = dbuf; dd->dd_dbuf = dbuf;
dd->dd_pool = dp; dd->dd_pool = dp;
if (dsl_dir_is_zapified(dd) &&
zap_contains(dp->dp_meta_objset, ddobj,
DD_FIELD_CRYPTO_KEY_OBJ) == 0) {
VERIFY0(zap_lookup(dp->dp_meta_objset,
ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
sizeof (uint64_t), 1, &dd->dd_crypto_obj));
}
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
dsl_prop_init(dd); dsl_prop_init(dd);
@ -918,6 +925,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
dmu_buf_rele(dbuf, FTAG); dmu_buf_rele(dbuf, FTAG);
return (ddobj); return (ddobj);
@ -935,6 +943,8 @@ dsl_dir_is_clone(dsl_dir_t *dd)
void void
dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
{ {
uint64_t intval;
mutex_enter(&dd->dd_lock); mutex_enter(&dd->dd_lock);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
dsl_dir_phys(dd)->dd_used_bytes); dsl_dir_phys(dd)->dd_used_bytes);
@ -962,18 +972,17 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
mutex_exit(&dd->dd_lock); mutex_exit(&dd->dd_lock);
if (dsl_dir_is_zapified(dd)) { if (dsl_dir_is_zapified(dd)) {
uint64_t count;
objset_t *os = dd->dd_pool->dp_meta_objset; objset_t *os = dd->dd_pool->dp_meta_objset;
if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
sizeof (count), 1, &count) == 0) { sizeof (intval), 1, &intval) == 0) {
dsl_prop_nvlist_add_uint64(nv, dsl_prop_nvlist_add_uint64(nv,
ZFS_PROP_FILESYSTEM_COUNT, count); ZFS_PROP_FILESYSTEM_COUNT, intval);
} }
if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
sizeof (count), 1, &count) == 0) { sizeof (intval), 1, &intval) == 0) {
dsl_prop_nvlist_add_uint64(nv, dsl_prop_nvlist_add_uint64(nv,
ZFS_PROP_SNAPSHOT_COUNT, count); ZFS_PROP_SNAPSHOT_COUNT, intval);
} }
} }
@ -1814,6 +1823,14 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
} }
} }
/* check for encryption errors */
error = dsl_dir_rename_crypt_check(dd, newparent);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (SET_ERROR(EACCES));
}
/* no rename into our descendant */ /* no rename into our descendant */
if (closest_common_ancestor(dd, newparent) == dd) { if (closest_common_ancestor(dd, newparent) == dd) {
dsl_dir_rele(newparent, FTAG); dsl_dir_rele(newparent, FTAG);

View File

@ -359,7 +359,8 @@ dsl_pool_close(dsl_pool_t *dp)
} }
dsl_pool_t * dsl_pool_t *
dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
uint64_t txg)
{ {
int err; int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
@ -373,6 +374,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
/* create and open the MOS (meta-objset) */ /* create and open the MOS (meta-objset) */
dp->dp_meta_objset = dmu_objset_create_impl(spa, dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
spa->spa_meta_objset = dp->dp_meta_objset;
/* create the pool directory */ /* create the pool directory */
err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@ -410,8 +412,19 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
dsl_pool_create_origin(dp, tx); dsl_pool_create_origin(dp, tx);
/*
* Some features may be needed when creating the root dataset, so we
* create the feature objects here.
*/
if (spa_version(spa) >= SPA_VERSION_FEATURES)
spa_feature_create_zap_objects(spa, tx);
if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF &&
dcp->cp_crypt != ZIO_CRYPT_INHERIT)
spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx);
/* create the root dataset */ /* create the root dataset */
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx);
/* create the root objset */ /* create the root objset */
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
@ -865,7 +878,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
/* create the origin dir, ds, & snap-ds */ /* create the origin dir, ds, & snap-ds */
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
NULL, 0, kcred, tx); NULL, 0, kcred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj,

View File

@ -963,7 +963,7 @@ typedef enum dsl_prop_getflags {
DSL_PROP_GET_INHERITING = 0x1, /* searching parent of target ds */ DSL_PROP_GET_INHERITING = 0x1, /* searching parent of target ds */
DSL_PROP_GET_SNAPSHOT = 0x2, /* snapshot dataset */ DSL_PROP_GET_SNAPSHOT = 0x2, /* snapshot dataset */
DSL_PROP_GET_LOCAL = 0x4, /* local properties */ DSL_PROP_GET_LOCAL = 0x4, /* local properties */
DSL_PROP_GET_RECEIVED = 0x8 /* received properties */ DSL_PROP_GET_RECEIVED = 0x8, /* received properties */
} dsl_prop_getflags_t; } dsl_prop_getflags_t;
static int static int
@ -1130,6 +1130,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
if (err) if (err)
break; break;
} }
out: out:
if (err) { if (err) {
nvlist_free(*nvp); nvlist_free(*nvp);

View File

@ -683,7 +683,7 @@ dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
zilog = zil_alloc(dp->dp_meta_objset, zh); zilog = zil_alloc(dp->dp_meta_objset, zh);
(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa, (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
claim_txg); claim_txg, B_FALSE);
zil_free(zilog); zil_free(zilog);
} }
@ -695,6 +695,7 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
{ {
zbookmark_phys_t czb; zbookmark_phys_t czb;
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
if (zfs_no_scrub_prefetch) if (zfs_no_scrub_prefetch)
return; return;
@ -703,11 +704,16 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)) (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
return; return;
if (BP_IS_PROTECTED(bp)) {
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE);
ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
zio_flags |= ZIO_FLAG_RAW;
}
SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid); SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp, (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb);
ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
} }
static boolean_t static boolean_t
@ -793,6 +799,11 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
arc_buf_t *buf; arc_buf_t *buf;
if (BP_IS_PROTECTED(bp)) {
ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
zio_flags |= ZIO_FLAG_RAW;
}
err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
if (err) { if (err) {

View File

@ -1169,6 +1169,8 @@ spa_activate(spa_t *spa, int mode)
spa_error_entry_compare, sizeof (spa_error_entry_t), spa_error_entry_compare, sizeof (spa_error_entry_t),
offsetof(spa_error_entry_t, se_avl)); offsetof(spa_error_entry_t, se_avl));
spa_keystore_init(&spa->spa_keystore);
/* /*
* This taskq is used to perform zvol-minor-related tasks * This taskq is used to perform zvol-minor-related tasks
* asynchronously. This has several advantages, including easy * asynchronously. This has several advantages, including easy
@ -1246,10 +1248,11 @@ spa_deactivate(spa_t *spa)
* still have errors left in the queues. Empty them just in case. * still have errors left in the queues. Empty them just in case.
*/ */
spa_errlog_drain(spa); spa_errlog_drain(spa);
avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_scrub);
avl_destroy(&spa->spa_errlist_last); avl_destroy(&spa->spa_errlist_last);
spa_keystore_fini(&spa->spa_keystore);
spa->spa_state = POOL_STATE_UNINITIALIZED; spa->spa_state = POOL_STATE_UNINITIALIZED;
mutex_enter(&spa->spa_proc_lock); mutex_enter(&spa->spa_proc_lock);
@ -2094,8 +2097,8 @@ spa_load_verify(spa_t *spa)
if (spa_load_verify_metadata) { if (spa_load_verify_metadata) {
error = traverse_pool(spa, spa->spa_verify_min_txg, error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
spa_load_verify_cb, rio); TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio);
} }
(void) zio_wait(rio); (void) zio_wait(rio);
@ -2301,7 +2304,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
spa->spa_loaded_ts.tv_nsec = 0; spa->spa_loaded_ts.tv_nsec = 0;
} }
if (error != EBADF) { if (error != EBADF) {
zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0);
} }
} }
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
@ -3978,12 +3981,28 @@ spa_l2cache_drop(spa_t *spa)
} }
} }
/*
* Verify encryption parameters for spa creation. If we are encrypting, we must
* have the encryption feature flag enabled.
*/
static int
spa_create_check_encryption_params(dsl_crypto_params_t *dcp,
boolean_t has_encryption)
{
if (dcp->cp_crypt != ZIO_CRYPT_OFF &&
dcp->cp_crypt != ZIO_CRYPT_INHERIT &&
!has_encryption)
return (SET_ERROR(ENOTSUP));
return (dmu_objset_create_crypt_check(NULL, dcp));
}
/* /*
* Pool Creation * Pool Creation
*/ */
int int
spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t *zplprops) nvlist_t *zplprops, dsl_crypto_params_t *dcp)
{ {
spa_t *spa; spa_t *spa;
char *altroot = NULL; char *altroot = NULL;
@ -3994,8 +4013,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
uint64_t txg = TXG_INITIAL; uint64_t txg = TXG_INITIAL;
nvlist_t **spares, **l2cache; nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache; uint_t nspares, nl2cache;
uint64_t version, obj; uint64_t version, obj, root_dsobj = 0;
boolean_t has_features; boolean_t has_features;
boolean_t has_encryption;
spa_feature_t feat;
char *feat_name;
nvpair_t *elem; nvpair_t *elem;
int c, i; int c, i;
char *poolname; char *poolname;
@ -4038,10 +4060,28 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME;
has_features = B_FALSE; has_features = B_FALSE;
has_encryption = B_FALSE;
for (elem = nvlist_next_nvpair(props, NULL); for (elem = nvlist_next_nvpair(props, NULL);
elem != NULL; elem = nvlist_next_nvpair(props, elem)) { elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
if (zpool_prop_feature(nvpair_name(elem))) if (zpool_prop_feature(nvpair_name(elem))) {
has_features = B_TRUE; has_features = B_TRUE;
feat_name = strchr(nvpair_name(elem), '@') + 1;
VERIFY0(zfeature_lookup_name(feat_name, &feat));
if (feat == SPA_FEATURE_ENCRYPTION)
has_encryption = B_TRUE;
}
}
/* verify encryption params, if they were provided */
if (dcp != NULL) {
error = spa_create_check_encryption_params(dcp, has_encryption);
if (error != 0) {
spa_deactivate(spa);
spa_remove(spa);
mutex_exit(&spa_namespace_lock);
return (error);
}
} }
if (has_features || nvlist_lookup_uint64(props, if (has_features || nvlist_lookup_uint64(props,
@ -4131,8 +4171,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
} }
spa->spa_is_initializing = B_TRUE; spa->spa_is_initializing = B_TRUE;
spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg);
spa->spa_meta_objset = dp->dp_meta_objset;
spa->spa_is_initializing = B_FALSE; spa->spa_is_initializing = B_FALSE;
/* /*
@ -4157,9 +4196,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
cmn_err(CE_PANIC, "failed to add pool config"); cmn_err(CE_PANIC, "failed to add pool config");
} }
if (spa_version(spa) >= SPA_VERSION_FEATURES)
spa_feature_create_zap_objects(spa, tx);
if (zap_add(spa->spa_meta_objset, if (zap_add(spa->spa_meta_objset,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
sizeof (uint64_t), 1, &version, tx) != 0) { sizeof (uint64_t), 1, &version, tx) != 0) {
@ -4220,15 +4256,26 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
dmu_tx_commit(tx); dmu_tx_commit(tx);
spa->spa_sync_on = B_TRUE;
txg_sync_start(spa->spa_dsl_pool);
mmp_thread_start(spa);
/* /*
* We explicitly wait for the first transaction to complete so that our * If the root dataset is encrypted we will need to create key mappings
* bean counters are appropriately updated. * for the zio layer before we start to write any data to disk and hold
* them until after the first txg has been synced. Waiting for the first
* transaction to complete also ensures that our bean counters are
* appropriately updated.
*/ */
txg_wait_synced(spa->spa_dsl_pool, txg); if (dp->dp_root_dir->dd_crypto_obj != 0) {
root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj;
VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj,
dp->dp_root_dir, FTAG));
}
spa->spa_sync_on = B_TRUE;
txg_sync_start(dp);
mmp_thread_start(spa);
txg_wait_synced(dp, txg);
if (dp->dp_root_dir->dd_crypto_obj != 0)
VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG));
spa_config_sync(spa, B_FALSE, B_TRUE); spa_config_sync(spa, B_FALSE, B_TRUE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE);

View File

@ -305,7 +305,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
*/ */
if (target->spa_ccw_fail_time == 0) { if (target->spa_ccw_fail_time == 0) {
zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
target, NULL, NULL, 0, 0); target, NULL, NULL, NULL, 0, 0);
} }
target->spa_ccw_fail_time = gethrtime(); target->spa_ccw_fail_time = gethrtime();
spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);

View File

@ -90,9 +90,8 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb)
* during spa_errlog_sync(). * during spa_errlog_sync().
*/ */
void void
spa_log_error(spa_t *spa, zio_t *zio) spa_log_error(spa_t *spa, const zbookmark_phys_t *zb)
{ {
zbookmark_phys_t *zb = &zio->io_logical->io_bookmark;
spa_error_entry_t search; spa_error_entry_t search;
spa_error_entry_t *new; spa_error_entry_t *new;
avl_tree_t *tree; avl_tree_t *tree;

View File

@ -385,11 +385,16 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
{ {
int err = 0; int err = 0;
dmu_tx_t *tx; dmu_tx_t *tx;
nvlist_t *nvarg; nvlist_t *nvarg, *in_nvl = NULL;
if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl);
if (err == 0) {
(void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS);
}
tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
err = dmu_tx_assign(tx, TXG_WAIT); err = dmu_tx_assign(tx, TXG_WAIT);
if (err) { if (err) {

View File

@ -1414,6 +1414,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
char type[256]; char type[256];
char *checksum = NULL; char *checksum = NULL;
char *compress = NULL; char *compress = NULL;
char *crypt_type = NULL;
if (bp != NULL) { if (bp != NULL) {
if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) { if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
@ -1427,6 +1428,15 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name, (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
sizeof (type)); sizeof (type));
} }
if (BP_IS_ENCRYPTED(bp)) {
crypt_type = "encrypted";
} else if (BP_IS_AUTHENTICATED(bp)) {
crypt_type = "authenticated";
} else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {
crypt_type = "indirect-MAC";
} else {
crypt_type = "unencrypted";
}
if (!BP_IS_EMBEDDED(bp)) { if (!BP_IS_EMBEDDED(bp)) {
checksum = checksum =
zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
@ -1435,7 +1445,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
} }
SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum, SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
compress); crypt_type, compress);
} }
void void

View File

@ -1050,7 +1050,7 @@ vdev_probe_done(zio_t *zio)
} else { } else {
ASSERT(zio->io_error != 0); ASSERT(zio->io_error != 0);
zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
spa, vd, NULL, 0, 0); spa, vd, NULL, NULL, 0, 0);
zio->io_error = SET_ERROR(ENXIO); zio->io_error = SET_ERROR(ENXIO);
} }
@ -1397,7 +1397,7 @@ vdev_open(vdev_t *vd)
if (ashift > vd->vdev_top->vdev_ashift && if (ashift > vd->vdev_top->vdev_ashift &&
vd->vdev_ops->vdev_op_leaf) { vd->vdev_ops->vdev_op_leaf) {
zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT, zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
spa, vd, NULL, 0, 0); spa, vd, NULL, NULL, 0, 0);
} }
vd->vdev_max_asize = max_asize; vd->vdev_max_asize = max_asize;
@ -3590,7 +3590,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
} }
zfs_ereport_post(class, spa, vd, NULL, save_state, 0); zfs_ereport_post(class, spa, vd, NULL, NULL,
save_state, 0);
} }
/* Erase any notion of persistent removed state */ /* Erase any notion of persistent removed state */
@ -3758,7 +3759,7 @@ vdev_deadman(vdev_t *vd)
fio->io_timestamp, delta, fio->io_timestamp, delta,
vq->vq_io_complete_ts); vq->vq_io_complete_ts);
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
spa, vd, fio, 0, 0); spa, vd, &fio->io_bookmark, fio, 0, 0);
} }
} }
mutex_exit(&vq->vq_lock); mutex_exit(&vq->vq_lock);

View File

@ -1766,9 +1766,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
zbc.zbc_has_cksum = 0; zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected; zbc.zbc_injected = rm->rm_ecksuminjected;
zfs_ereport_post_checksum(zio->io_spa, vd, zio, zfs_ereport_post_checksum(zio->io_spa, vd,
rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
&zbc); rc->rc_abd, bad_data, &zbc);
} }
} }
@ -2256,7 +2256,8 @@ vdev_raidz_io_done(zio_t *zio)
zfs_ereport_start_checksum( zfs_ereport_start_checksum(
zio->io_spa, zio->io_spa,
vd->vdev_child[rc->rc_devidx], vd->vdev_child[rc->rc_devidx],
zio, rc->rc_offset, rc->rc_size, &zio->io_bookmark, zio,
rc->rc_offset, rc->rc_size,
(void *)(uintptr_t)c, &zbc); (void *)(uintptr_t)c, &zbc);
} }
} }

View File

@ -424,8 +424,8 @@ spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
* We create feature flags ZAP objects in two instances: during pool * We create feature flags ZAP objects in two instances: during pool
* creation and during pool upgrade. * creation and during pool upgrade.
*/ */
ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && ASSERT((!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL) ||
tx->tx_txg == TXG_INITIAL)); dsl_pool_sync_context(spa_get_dsl(spa)));
spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,

View File

@ -2204,7 +2204,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
* placed into the working_mode, giving the caller a mask of denied * placed into the working_mode, giving the caller a mask of denied
* accesses. Returns: * accesses. Returns:
* 0 if all AoI granted * 0 if all AoI granted
* EACCESS if the denied mask is non-zero * EACCES if the denied mask is non-zero
* other error if abnormal failure (e.g., IO error) * other error if abnormal failure (e.g., IO error)
* *
* A secondary usage of the function is to determine if any of the * A secondary usage of the function is to determine if any of the

View File

@ -142,8 +142,8 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
static void static void
zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, const char *subclass, spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb,
uint64_t stateoroffset, uint64_t size) zio_t *zio, uint64_t stateoroffset, uint64_t size)
{ {
nvlist_t *ereport, *detector; nvlist_t *ereport, *detector;
@ -413,24 +413,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
DATA_TYPE_UINT64, zio->io_size, NULL); DATA_TYPE_UINT64, zio->io_size, NULL);
} }
/*
* Payload for I/Os with corresponding logical information.
*/
if (zio->io_logical != NULL)
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
DATA_TYPE_UINT64,
zio->io_logical->io_bookmark.zb_objset,
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
DATA_TYPE_UINT64,
zio->io_logical->io_bookmark.zb_object,
FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
DATA_TYPE_INT64,
zio->io_logical->io_bookmark.zb_level,
FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
DATA_TYPE_UINT64,
zio->io_logical->io_bookmark.zb_blkid, NULL);
} else if (vd != NULL) { } else if (vd != NULL) {
/* /*
* If we have a vdev but no zio, this is a device fault, and the * If we have a vdev but no zio, this is a device fault, and the
@ -442,6 +424,20 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
DATA_TYPE_UINT64, stateoroffset, NULL); DATA_TYPE_UINT64, stateoroffset, NULL);
} }
/*
* Payload for I/Os with corresponding logical information.
*/
if (zb != NULL && (zio == NULL || zio->io_logical != NULL))
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
DATA_TYPE_UINT64, zb->zb_objset,
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
DATA_TYPE_UINT64, zb->zb_object,
FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
DATA_TYPE_INT64, zb->zb_level,
FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
DATA_TYPE_UINT64, zb->zb_blkid, NULL);
mutex_exit(&spa->spa_errlist_lock); mutex_exit(&spa->spa_errlist_lock);
*ereport_out = ereport; *ereport_out = ereport;
@ -771,8 +767,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
#endif #endif
void void
zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
uint64_t stateoroffset, uint64_t size) zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t size)
{ {
#ifdef _KERNEL #ifdef _KERNEL
nvlist_t *ereport = NULL; nvlist_t *ereport = NULL;
@ -781,8 +777,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
if (zfs_is_ratelimiting_event(subclass, vd)) if (zfs_is_ratelimiting_event(subclass, vd))
return; return;
zfs_ereport_start(&ereport, &detector, zfs_ereport_start(&ereport, &detector, subclass, spa, vd,
subclass, spa, vd, zio, stateoroffset, size); zb, zio, stateoroffset, size);
if (ereport == NULL) if (ereport == NULL)
return; return;
@ -793,7 +789,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
} }
void void
zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length, void *arg, struct zio *zio, uint64_t offset, uint64_t length, void *arg,
zio_bad_cksum_t *info) zio_bad_cksum_t *info)
{ {
@ -823,7 +819,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
#ifdef _KERNEL #ifdef _KERNEL
zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length);
if (report->zcr_ereport == NULL) { if (report->zcr_ereport == NULL) {
zfs_ereport_free_checksum(report); zfs_ereport_free_checksum(report);
@ -879,7 +875,7 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
void void
zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length, struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc) const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
{ {
@ -888,8 +884,8 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
nvlist_t *detector = NULL; nvlist_t *detector = NULL;
zfs_ecksum_info_t *info; zfs_ecksum_info_t *info;
zfs_ereport_start(&ereport, &detector, zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM,
FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); spa, vd, zb, zio, offset, length);
if (ereport == NULL) if (ereport == NULL)
return; return;

View File

@ -34,7 +34,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
* Copyright (c) 2017 Datto Inc. * Copyright (c) 2017 Datto Inc. All rights reserved.
* Copyright 2017 RackTop Systems. * Copyright 2017 RackTop Systems.
*/ */
@ -185,6 +185,7 @@
#include <sys/dsl_scan.h> #include <sys/dsl_scan.h>
#include <sharefs/share.h> #include <sharefs/share.h>
#include <sys/fm/util.h> #include <sys/fm/util.h>
#include <sys/dsl_crypt.h>
#include <sys/dmu_send.h> #include <sys/dmu_send.h>
#include <sys/dsl_destroy.h> #include <sys/dsl_destroy.h>
@ -565,12 +566,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
* Try to own the dataset; abort if there is any error, * Try to own the dataset; abort if there is any error,
* (e.g., already mounted, in use, or other error). * (e.g., already mounted, in use, or other error).
*/ */
error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
setsl_tag, &os); setsl_tag, &os);
if (error != 0) if (error != 0)
return (SET_ERROR(EPERM)); return (SET_ERROR(EPERM));
dmu_objset_disown(os, setsl_tag); dmu_objset_disown(os, B_TRUE, setsl_tag);
if (new_default) { if (new_default) {
needed_priv = PRIV_FILE_DOWNGRADE_SL; needed_priv = PRIV_FILE_DOWNGRADE_SL;
@ -1301,6 +1302,20 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (error); return (error);
} }
static int
zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_LOAD_KEY, cr));
}
static int
zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_CHANGE_KEY, cr));
}
/* /*
* Returns the nvlist as specified by the user in the zfs_cmd_t. * Returns the nvlist as specified by the user in the zfs_cmd_t.
*/ */
@ -1462,7 +1477,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
if (zfsvfs->z_sb) { if (zfsvfs->z_sb) {
deactivate_super(zfsvfs->z_sb); deactivate_super(zfsvfs->z_sb);
} else { } else {
dmu_objset_disown(zfsvfs->z_os, zfsvfs); dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
zfsvfs_free(zfsvfs); zfsvfs_free(zfsvfs);
} }
} }
@ -1474,6 +1489,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
nvlist_t *config, *props = NULL; nvlist_t *config, *props = NULL;
nvlist_t *rootprops = NULL; nvlist_t *rootprops = NULL;
nvlist_t *zplprops = NULL; nvlist_t *zplprops = NULL;
dsl_crypto_params_t *dcp = NULL;
if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
zc->zc_iflags, &config))) zc->zc_iflags, &config)))
@ -1488,6 +1504,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
if (props) { if (props) {
nvlist_t *nvl = NULL; nvlist_t *nvl = NULL;
nvlist_t *hidden_args = NULL;
uint64_t version = SPA_VERSION; uint64_t version = SPA_VERSION;
(void) nvlist_lookup_uint64(props, (void) nvlist_lookup_uint64(props,
@ -1506,6 +1523,18 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
} }
(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
} }
(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
&hidden_args);
error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
rootprops, hidden_args, &dcp);
if (error != 0) {
nvlist_free(config);
nvlist_free(props);
return (error);
}
(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
error = zfs_fill_zplprops_root(version, rootprops, error = zfs_fill_zplprops_root(version, rootprops,
zplprops, NULL); zplprops, NULL);
@ -1513,7 +1542,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
goto pool_props_bad; goto pool_props_bad;
} }
error = spa_create(zc->zc_name, config, props, zplprops); error = spa_create(zc->zc_name, config, props, zplprops, dcp);
/* /*
* Set the remaining root properties * Set the remaining root properties
@ -1527,6 +1556,7 @@ pool_props_bad:
nvlist_free(zplprops); nvlist_free(zplprops);
nvlist_free(config); nvlist_free(config);
nvlist_free(props); nvlist_free(props);
dsl_crypto_params_free(dcp, !!error);
return (error); return (error);
} }
@ -1802,15 +1832,16 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc)
int error; int error;
/* XXX reading from objset not owned */ /* XXX reading from objset not owned */
if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
FTAG, &os)) != 0)
return (error); return (error);
if (dmu_objset_type(os) != DMU_OST_ZFS) { if (dmu_objset_type(os) != DMU_OST_ZFS) {
dmu_objset_rele(os, FTAG); dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
sizeof (zc->zc_value)); sizeof (zc->zc_value));
dmu_objset_rele(os, FTAG); dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (error); return (error);
} }
@ -1831,15 +1862,16 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
int error; int error;
/* XXX reading from objset not owned */ /* XXX reading from objset not owned */
if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
FTAG, &os)) != 0)
return (error); return (error);
if (dmu_objset_type(os) != DMU_OST_ZFS) { if (dmu_objset_type(os) != DMU_OST_ZFS) {
dmu_objset_rele(os, FTAG); dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
sizeof (zc->zc_value)); sizeof (zc->zc_value));
dmu_objset_rele(os, FTAG); dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (error); return (error);
} }
@ -2385,7 +2417,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
{ {
const char *propname = nvpair_name(pair); const char *propname = nvpair_name(pair);
zfs_prop_t prop = zfs_name_to_prop(propname); zfs_prop_t prop = zfs_name_to_prop(propname);
uint64_t intval; uint64_t intval = 0;
char *strval = NULL;
int err = -1; int err = -1;
if (prop == ZPROP_INVAL) { if (prop == ZPROP_INVAL) {
@ -2401,10 +2434,12 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
&pair) == 0); &pair) == 0);
} }
if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) /* all special properties are numeric except for keylocation */
return (-1); if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
strval = fnvpair_value_string(pair);
VERIFY(0 == nvpair_value_uint64(pair, &intval)); } else {
intval = fnvpair_value_uint64(pair);
}
switch (prop) { switch (prop) {
case ZFS_PROP_QUOTA: case ZFS_PROP_QUOTA:
@ -2421,6 +2456,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
} else { } else {
err = dsl_dir_activate_fs_ss_limit(dsname); err = dsl_dir_activate_fs_ss_limit(dsname);
} }
/*
* Set err to -1 to force the zfs_set_prop_nvlist code down the
* default path to set the value in the nvlist.
*/
if (err == 0)
err = -1;
break;
case ZFS_PROP_KEYLOCATION:
err = dsl_crypto_can_set_keylocation(dsname, strval);
/* /*
* Set err to -1 to force the zfs_set_prop_nvlist code down the * Set err to -1 to force the zfs_set_prop_nvlist code down the
* default path to set the value in the nvlist. * default path to set the value in the nvlist.
@ -3156,6 +3201,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
* innvl: { * innvl: {
* "type" -> dmu_objset_type_t (int32) * "type" -> dmu_objset_type_t (int32)
* (optional) "props" -> { prop -> value } * (optional) "props" -> { prop -> value }
* (optional) "hidden_args" -> { "wkeydata" -> value }
* raw uint8_t array of encryption wrapping key data (32 bytes)
* } * }
* *
* outnvl: propname -> error code (int32) * outnvl: propname -> error code (int32)
@ -3166,15 +3213,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
int error = 0; int error = 0;
zfs_creat_t zct = { 0 }; zfs_creat_t zct = { 0 };
nvlist_t *nvprops = NULL; nvlist_t *nvprops = NULL;
nvlist_t *hidden_args = NULL;
void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
int32_t type32; int32_t type32;
dmu_objset_type_t type; dmu_objset_type_t type;
boolean_t is_insensitive = B_FALSE; boolean_t is_insensitive = B_FALSE;
dsl_crypto_params_t *dcp = NULL;
if (nvlist_lookup_int32(innvl, "type", &type32) != 0) if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
type = type32; type = type32;
(void) nvlist_lookup_nvlist(innvl, "props", &nvprops); (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
switch (type) { switch (type) {
case DMU_OST_ZFS: case DMU_OST_ZFS:
@ -3240,9 +3290,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
} }
} }
error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
hidden_args, &dcp);
if (error != 0) {
nvlist_free(zct.zct_zplprops);
return (error);
}
error = dmu_objset_create(fsname, type, error = dmu_objset_create(fsname, type,
is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
nvlist_free(zct.zct_zplprops); nvlist_free(zct.zct_zplprops);
dsl_crypto_params_free(dcp, !!error);
/* /*
* It would be nice to do this atomically. * It would be nice to do this atomically.
@ -3277,6 +3336,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
* innvl: { * innvl: {
* "origin" -> name of origin snapshot * "origin" -> name of origin snapshot
* (optional) "props" -> { prop -> value } * (optional) "props" -> { prop -> value }
* (optional) "hidden_args" -> { "wkeydata" -> value }
* raw uint8_t array of encryption wrapping key data (32 bytes)
* } * }
* *
* outputs: * outputs:
@ -3299,9 +3360,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (dataset_namecheck(origin_name, NULL, NULL) != 0) if (dataset_namecheck(origin_name, NULL, NULL) != 0)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
error = dmu_objset_clone(fsname, origin_name); error = dmu_objset_clone(fsname, origin_name);
if (error != 0)
return (error);
/* /*
* It would be nice to do this atomically. * It would be nice to do this atomically.
@ -4160,7 +4220,11 @@ extract_delay_props(nvlist_t *props)
{ {
nvlist_t *delayprops; nvlist_t *delayprops;
nvpair_t *nvp, *tmp; nvpair_t *nvp, *tmp;
static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 }; static const zfs_prop_t delayable[] = {
ZFS_PROP_REFQUOTA,
ZFS_PROP_KEYLOCATION,
0
};
int i; int i;
VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@ -4704,6 +4768,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
boolean_t embedok = (zc->zc_flags & 0x1); boolean_t embedok = (zc->zc_flags & 0x1);
boolean_t large_block_ok = (zc->zc_flags & 0x2); boolean_t large_block_ok = (zc->zc_flags & 0x2);
boolean_t compressok = (zc->zc_flags & 0x4); boolean_t compressok = (zc->zc_flags & 0x4);
boolean_t rawok = (zc->zc_flags & 0x8);
if (zc->zc_obj != 0) { if (zc->zc_obj != 0) {
dsl_pool_t *dp; dsl_pool_t *dp;
@ -4735,7 +4800,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
if (error != 0) if (error != 0)
return (error); return (error);
error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
FTAG, &tosnap);
if (error != 0) { if (error != 0) {
dsl_pool_rele(dp, FTAG); dsl_pool_rele(dp, FTAG);
return (error); return (error);
@ -4751,7 +4817,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
} }
} }
error = dmu_send_estimate(tosnap, fromsnap, compressok, error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
&zc->zc_objset_type); &zc->zc_objset_type);
if (fromsnap != NULL) if (fromsnap != NULL)
@ -4765,7 +4831,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
off = fp->f_offset; off = fp->f_offset;
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
zc->zc_fromobj, embedok, large_block_ok, compressok, zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
zc->zc_cookie, fp->f_vnode, &off); zc->zc_cookie, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
@ -5152,7 +5218,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
error = zfs_suspend_fs(zfsvfs); error = zfs_suspend_fs(zfsvfs);
if (error == 0) { if (error == 0) {
dmu_objset_refresh_ownership(zfsvfs->z_os, dmu_objset_refresh_ownership(zfsvfs->z_os,
zfsvfs); B_TRUE, zfsvfs);
error = zfs_resume_fs(zfsvfs, ds); error = zfs_resume_fs(zfsvfs, ds);
} }
} }
@ -5161,12 +5227,12 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
deactivate_super(zfsvfs->z_sb); deactivate_super(zfsvfs->z_sb);
} else { } else {
/* XXX kind of reading contents without owning */ /* XXX kind of reading contents without owning */
error = dmu_objset_hold(zc->zc_name, FTAG, &os); error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
if (error != 0) if (error != 0)
return (error); return (error);
error = dmu_objset_userspace_upgrade(os); error = dmu_objset_userspace_upgrade(os);
dmu_objset_rele(os, FTAG); dmu_objset_rele_flags(os, B_TRUE, FTAG);
} }
return (error); return (error);
@ -5185,7 +5251,7 @@ zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
objset_t *os; objset_t *os;
int error; int error;
error = dmu_objset_hold(zc->zc_name, FTAG, &os); error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
if (error != 0) if (error != 0)
return (error); return (error);
@ -5209,7 +5275,7 @@ zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
} }
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
dsl_dataset_rele(dmu_objset_ds(os), FTAG); dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
return (error); return (error);
} }
@ -5745,6 +5811,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
* presence indicates DRR_WRITE_EMBEDDED records are permitted * presence indicates DRR_WRITE_EMBEDDED records are permitted
* (optional) "compressok" -> (value ignored) * (optional) "compressok" -> (value ignored)
* presence indicates compressed DRR_WRITE records are permitted * presence indicates compressed DRR_WRITE records are permitted
* (optional) "rawok" -> (value ignored)
* presence indicates raw encrypted records should be used.
* (optional) "resume_object" and "resume_offset" -> (uint64) * (optional) "resume_object" and "resume_offset" -> (uint64)
* if present, resume send stream from specified object and offset. * if present, resume send stream from specified object and offset.
* } * }
@ -5763,6 +5831,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
boolean_t largeblockok; boolean_t largeblockok;
boolean_t embedok; boolean_t embedok;
boolean_t compressok; boolean_t compressok;
boolean_t rawok;
uint64_t resumeobj = 0; uint64_t resumeobj = 0;
uint64_t resumeoff = 0; uint64_t resumeoff = 0;
@ -5775,6 +5844,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok"); largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok"); embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok"); compressok = nvlist_exists(innvl, "compressok");
rawok = nvlist_exists(innvl, "rawok");
(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj); (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff); (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
@ -5784,7 +5854,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
off = fp->f_offset; off = fp->f_offset;
error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
fd, resumeobj, resumeoff, fp->f_vnode, &off); rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off; fp->f_offset = off;
@ -5824,6 +5894,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
/* LINTED E_FUNC_SET_NOT_USED */ /* LINTED E_FUNC_SET_NOT_USED */
boolean_t embedok; boolean_t embedok;
boolean_t compressok; boolean_t compressok;
boolean_t rawok;
uint64_t space; uint64_t space;
error = dsl_pool_hold(snapname, FTAG, &dp); error = dsl_pool_hold(snapname, FTAG, &dp);
@ -5839,6 +5910,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok"); largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok"); embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok"); compressok = nvlist_exists(innvl, "compressok");
rawok = nvlist_exists(innvl, "rawok");
error = nvlist_lookup_string(innvl, "from", &fromname); error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) { if (error == 0) {
@ -5852,8 +5924,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0) if (error != 0)
goto out; goto out;
error = dmu_send_estimate(tosnap, fromsnap, compressok, error = dmu_send_estimate(tosnap, fromsnap,
&space); compressok || rawok, &space);
dsl_dataset_rele(fromsnap, FTAG); dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) { } else if (strchr(fromname, '#') != NULL) {
/* /*
@ -5868,7 +5940,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0) if (error != 0)
goto out; goto out;
error = dmu_send_estimate_from_txg(tosnap, error = dmu_send_estimate_from_txg(tosnap,
frombm.zbm_creation_txg, compressok, &space); frombm.zbm_creation_txg, compressok || rawok,
&space);
} else { } else {
/* /*
* from is not properly formatted as a snapshot or * from is not properly formatted as a snapshot or
@ -5879,7 +5952,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
} }
} else { } else {
// If estimating the size of a full send, use dmu_send_estimate // If estimating the size of a full send, use dmu_send_estimate
error = dmu_send_estimate(tosnap, NULL, compressok, &space); error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
&space);
} }
fnvlist_add_uint64(outnvl, "space", space); fnvlist_add_uint64(outnvl, "space", space);
@ -5928,6 +6002,124 @@ zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
return (err); return (err);
} }
/*
* Load a user's wrapping key into the kernel.
* innvl: {
* "hidden_args" -> { "wkeydata" -> value }
* raw uint8_t array of encryption wrapping key data (32 bytes)
* (optional) "noop" -> (value ignored)
* presence indicated key should only be verified, not loaded
* }
*/
/* ARGSUSED */
static int
zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
{
int ret;
dsl_crypto_params_t *dcp = NULL;
nvlist_t *hidden_args;
boolean_t noop = nvlist_exists(innvl, "noop");
if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
ret = SET_ERROR(EINVAL);
goto error;
}
ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
if (ret != 0) {
ret = SET_ERROR(EINVAL);
goto error;
}
ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
hidden_args, &dcp);
if (ret != 0)
goto error;
ret = spa_keystore_load_wkey(dsname, dcp, noop);
if (ret != 0)
goto error;
dsl_crypto_params_free(dcp, noop);
return (0);
error:
dsl_crypto_params_free(dcp, B_TRUE);
return (ret);
}
/*
* Unload a user's wrapping key from the kernel.
* Both innvl and outnvl are unused.
*/
/* ARGSUSED */
static int
zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
{
int ret = 0;
if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
ret = (SET_ERROR(EINVAL));
goto out;
}
ret = spa_keystore_unload_wkey(dsname);
if (ret != 0)
goto out;
out:
return (ret);
}
/*
* Changes a user's wrapping key used to decrypt a dataset. The keyformat,
* keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
* here to change how the key is derived in userspace.
*
* innvl: {
* "hidden_args" (optional) -> { "wkeydata" -> value }
* raw uint8_t array of new encryption wrapping key data (32 bytes)
* "props" (optional) -> { prop -> value }
* }
*
* outnvl is unused
*/
/* ARGSUSED */
static int
zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
{
int ret;
uint64_t cmd = DCP_CMD_NONE;
dsl_crypto_params_t *dcp = NULL;
nvlist_t *args = NULL, *hidden_args = NULL;
if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
ret = (SET_ERROR(EINVAL));
goto error;
}
(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
(void) nvlist_lookup_nvlist(innvl, "props", &args);
(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
if (ret != 0)
goto error;
ret = spa_keystore_change_key(dsname, dcp);
if (ret != 0)
goto error;
dsl_crypto_params_free(dcp, B_FALSE);
return (0);
error:
dsl_crypto_params_free(dcp, B_TRUE);
return (ret);
}
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void static void
@ -6099,6 +6291,16 @@ zfs_ioctl_init(void)
zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW, zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME, zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
zfs_ioc_load_key, zfs_secpolicy_load_key,
DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
zfs_ioc_unload_key, zfs_secpolicy_load_key,
DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
zfs_ioc_change_key, zfs_secpolicy_change_key,
DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
B_TRUE, B_TRUE);
zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC, zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME, zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,

View File

@ -1048,7 +1048,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
* We claim to always be readonly so we can open snapshots; * We claim to always be readonly so we can open snapshots;
* other ZPL code will prevent us from writing to snapshots. * other ZPL code will prevent us from writing to snapshots.
*/ */
error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE,
zfsvfs, &os);
if (error) { if (error) {
kmem_free(zfsvfs, sizeof (zfsvfs_t)); kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error); return (error);
@ -1080,7 +1081,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
error = zfsvfs_init(zfsvfs, os); error = zfsvfs_init(zfsvfs, os);
if (error != 0) { if (error != 0) {
dmu_objset_disown(os, zfsvfs); dmu_objset_disown(os, B_TRUE, zfsvfs);
*zfvp = NULL; *zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t)); kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error); return (error);
@ -1669,7 +1670,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb); zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
out: out:
if (error) { if (error) {
dmu_objset_disown(zfsvfs->z_os, zfsvfs); dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
zfsvfs_free(zfsvfs); zfsvfs_free(zfsvfs);
/* /*
* make sure we don't have dangling sb->s_fs_info which * make sure we don't have dangling sb->s_fs_info which
@ -1729,7 +1730,8 @@ zfs_umount(struct super_block *sb)
zfsvfs_t *zfsvfs = sb->s_fs_info; zfsvfs_t *zfsvfs = sb->s_fs_info;
objset_t *os; objset_t *os;
arc_remove_prune_callback(zfsvfs->z_arc_prune); if (zfsvfs->z_arc_prune != NULL)
arc_remove_prune_callback(zfsvfs->z_arc_prune);
VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
os = zfsvfs->z_os; os = zfsvfs->z_os;
zpl_bdi_destroy(sb); zpl_bdi_destroy(sb);
@ -1749,7 +1751,7 @@ zfs_umount(struct super_block *sb)
/* /*
* Finally release the objset * Finally release the objset
*/ */
dmu_objset_disown(os, zfsvfs); dmu_objset_disown(os, B_TRUE, zfsvfs);
} }
zfsvfs_free(zfsvfs); zfsvfs_free(zfsvfs);

View File

@ -193,8 +193,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
* Read a log block and make sure it's valid. * Read a log block and make sure it's valid.
*/ */
static int static int
zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
char **end) blkptr_t *nbp, void *dst, char **end)
{ {
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t aflags = ARC_FLAG_WAIT; arc_flags_t aflags = ARC_FLAG_WAIT;
@ -208,11 +208,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
zio_flags |= ZIO_FLAG_SPECULATIVE; zio_flags |= ZIO_FLAG_SPECULATIVE;
if (!decrypt)
zio_flags |= ZIO_FLAG_RAW;
SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (error == 0) { if (error == 0) {
zio_cksum_t cksum = bp->blk_cksum; zio_cksum_t cksum = bp->blk_cksum;
@ -287,6 +290,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
if (zilog->zl_header->zh_claim_txg == 0) if (zilog->zl_header->zh_claim_txg == 0)
zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
/*
* If we are not using the resulting data, we are just checking that
* it hasn't been corrupted so we don't need to waste CPU time
* decompressing and decrypting it.
*/
if (wbuf == NULL)
zio_flags |= ZIO_FLAG_RAW;
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
@ -307,7 +318,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
*/ */
int int
zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
boolean_t decrypt)
{ {
const zil_header_t *zh = zilog->zl_header; const zil_header_t *zh = zilog->zl_header;
boolean_t claimed = !!zh->zh_claim_txg; boolean_t claimed = !!zh->zh_claim_txg;
@ -348,7 +360,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
if (blk_seq > claim_blk_seq) if (blk_seq > claim_blk_seq)
break; break;
if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0)
error = parse_blk_func(zilog, &blk, arg, txg);
if (error != 0)
break; break;
ASSERT3U(max_blk_seq, <, blk_seq); ASSERT3U(max_blk_seq, <, blk_seq);
max_blk_seq = blk_seq; max_blk_seq = blk_seq;
@ -357,7 +371,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq)
break; break;
error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
lrbuf, &end);
if (error != 0) if (error != 0)
break; break;
@ -367,7 +382,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
ASSERT3U(reclen, >=, sizeof (lr_t)); ASSERT3U(reclen, >=, sizeof (lr_t));
if (lr->lrc_seq > claim_lr_seq) if (lr->lrc_seq > claim_lr_seq)
goto done; goto done;
if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0)
error = parse_lr_func(zilog, lr, arg, txg);
if (error != 0)
goto done; goto done;
ASSERT3U(max_lr_seq, <, lr->lrc_seq); ASSERT3U(max_lr_seq, <, lr->lrc_seq);
max_lr_seq = lr->lrc_seq; max_lr_seq = lr->lrc_seq;
@ -382,7 +399,8 @@ done:
zilog->zl_parse_lr_count = lr_count; zilog->zl_parse_lr_count = lr_count;
ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
(max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) ||
(decrypt && error == EIO));
zil_bp_tree_fini(zilog); zil_bp_tree_fini(zilog);
zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
@ -423,9 +441,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
* waited for all writes to be stable first), so it is semantically * waited for all writes to be stable first), so it is semantically
* correct to declare this the end of the log. * correct to declare this the end of the log.
*/ */
if (lr->lr_blkptr.blk_birth >= first_txg && if (lr->lr_blkptr.blk_birth >= first_txg) {
(error = zil_read_log_data(zilog, lr, NULL)) != 0) error = zil_read_log_data(zilog, lr, NULL);
return (error); if (error != 0)
return (error);
}
return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg));
} }
@ -579,7 +600,7 @@ zil_create(zilog_t *zilog)
BP_ZERO(&blk); BP_ZERO(&blk);
} }
error = zio_alloc_zil(zilog->zl_spa, txg, &blk, error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
ZIL_MIN_BLKSZ, &slog); ZIL_MIN_BLKSZ, &slog);
fastwrite = TRUE; fastwrite = TRUE;
@ -673,7 +694,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
{ {
ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT(list_is_empty(&zilog->zl_lwb_list));
(void) zil_parse(zilog, zil_free_log_block, (void) zil_parse(zilog, zil_free_log_block,
zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE);
} }
int int
@ -687,7 +708,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
int error; int error;
error = dmu_objset_own_obj(dp, ds->ds_object, error = dmu_objset_own_obj(dp, ds->ds_object,
DMU_OST_ANY, B_FALSE, FTAG, &os); DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os);
if (error != 0) { if (error != 0) {
/* /*
* EBUSY indicates that the objset is inconsistent, in which * EBUSY indicates that the objset is inconsistent, in which
@ -708,8 +729,10 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
if (!BP_IS_HOLE(&zh->zh_log)) if (!BP_IS_HOLE(&zh->zh_log))
zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
BP_ZERO(&zh->zh_log); BP_ZERO(&zh->zh_log);
if (os->os_encrypted)
os->os_next_write_raw = B_TRUE;
dsl_dataset_dirty(dmu_objset_ds(os), tx); dsl_dataset_dirty(dmu_objset_ds(os), tx);
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_FALSE, FTAG);
return (0); return (0);
} }
@ -723,7 +746,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
ASSERT3U(zh->zh_claim_txg, <=, first_txg); ASSERT3U(zh->zh_claim_txg, <=, first_txg);
if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
(void) zil_parse(zilog, zil_claim_log_block, (void) zil_parse(zilog, zil_claim_log_block,
zil_claim_log_record, tx, first_txg); zil_claim_log_record, tx, first_txg, B_FALSE);
zh->zh_claim_txg = first_txg; zh->zh_claim_txg = first_txg;
zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
@ -734,7 +757,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
} }
ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_FALSE, FTAG);
return (0); return (0);
} }
@ -792,7 +815,8 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
* which will update spa_max_claim_txg. See spa_load() for details. * which will update spa_max_claim_txg. See spa_load() for details.
*/ */
error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa)); zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa),
B_FALSE);
return ((error == ECKSUM || error == ENOENT) ? 0 : error); return ((error == ECKSUM || error == ENOENT) ? 0 : error);
} }
@ -1060,7 +1084,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1); zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
BP_ZERO(bp); BP_ZERO(bp);
error = zio_alloc_zil(spa, txg, bp, zil_blksz, &slog); error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
if (slog) { if (slog) {
ZIL_STAT_BUMP(zil_itx_metaslab_slog_count); ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused); ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused);
@ -2269,7 +2293,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t replay_func[TX_MAX_TYPE])
zilog->zl_replay_time = ddi_get_lbolt(); zilog->zl_replay_time = ddi_get_lbolt();
ASSERT(zilog->zl_replay_blks == 0); ASSERT(zilog->zl_replay_blks == 0);
(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
zh->zh_claim_txg); zh->zh_claim_txg, B_TRUE);
vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
zil_destroy(zilog, B_FALSE); zil_destroy(zilog, B_FALSE);

View File

@ -43,6 +43,7 @@
#include <sys/time.h> #include <sys/time.h>
#include <sys/trace_zio.h> #include <sys/trace_zio.h>
#include <sys/abd.h> #include <sys/abd.h>
#include <sys/dsl_crypt.h>
/* /*
* ========================================================================== * ==========================================================================
@ -368,7 +369,7 @@ zio_pop_transforms(zio_t *zio)
/* /*
* ========================================================================== * ==========================================================================
* I/O transform callbacks for subblocks and decompression * I/O transform callbacks for subblocks, decompression, and decryption
* ========================================================================== * ==========================================================================
*/ */
static void static void
@ -394,6 +395,126 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
} }
} }
static void
zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
{
int ret;
void *tmp;
blkptr_t *bp = zio->io_bp;
uint64_t lsize = BP_GET_LSIZE(bp);
dmu_object_type_t ot = BP_GET_TYPE(bp);
uint8_t salt[ZIO_DATA_SALT_LEN];
uint8_t iv[ZIO_DATA_IV_LEN];
uint8_t mac[ZIO_DATA_MAC_LEN];
boolean_t no_crypt = B_FALSE;
ASSERT(BP_USES_CRYPT(bp));
ASSERT3U(size, !=, 0);
if (zio->io_error != 0)
return;
/*
* Verify the cksum of MACs stored in an indirect bp. It will always
* be possible to verify this since it does not require an encryption
* key.
*/
if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {
zio_crypt_decode_mac_bp(bp, mac);
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
/*
* We haven't decompressed the data yet, but
* zio_crypt_do_indirect_mac_checksum() requires
* decompressed data to be able to parse out the MACs
* from the indirect block. We decompress it now and
* throw away the result after we are finished.
*/
tmp = zio_buf_alloc(lsize);
ret = zio_decompress_data(BP_GET_COMPRESS(bp),
zio->io_abd, tmp, zio->io_size, lsize);
if (ret != 0) {
ret = SET_ERROR(EIO);
goto error;
}
ret = zio_crypt_do_indirect_mac_checksum(B_FALSE,
tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac);
zio_buf_free(tmp, lsize);
} else {
ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE,
zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac);
}
abd_copy(data, zio->io_abd, size);
if (ret != 0)
goto error;
return;
}
/*
* If this is an authenticated block, just check the MAC. It would be
* nice to separate this out into its own flag, but for the moment
* enum zio_flag is out of bits.
*/
if (BP_IS_AUTHENTICATED(bp)) {
if (ot == DMU_OT_OBJSET) {
ret = spa_do_crypt_objset_mac_abd(B_FALSE, zio->io_spa,
zio->io_bookmark.zb_objset, zio->io_abd, size,
BP_SHOULD_BYTESWAP(bp));
} else {
zio_crypt_decode_mac_bp(bp, mac);
ret = spa_do_crypt_mac_abd(B_FALSE, zio->io_spa,
zio->io_bookmark.zb_objset, zio->io_abd, size, mac);
}
abd_copy(data, zio->io_abd, size);
if (ret != 0)
goto error;
return;
}
zio_crypt_decode_params_bp(bp, salt, iv);
if (ot == DMU_OT_INTENT_LOG) {
tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t));
zio_crypt_decode_mac_zil(tmp, mac);
abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t));
} else {
zio_crypt_decode_mac_bp(bp, mac);
}
ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, zio->io_bookmark.zb_objset,
bp, bp->blk_birth, size, data, zio->io_abd, iv, mac, salt,
&no_crypt);
if (no_crypt)
abd_copy(data, zio->io_abd, size);
if (ret != 0)
goto error;
return;
error:
/* assert that the key was found unless this was speculative */
ASSERT(ret != ENOENT || (zio->io_flags & ZIO_FLAG_SPECULATIVE));
/*
* If there was a decryption / authentication error return EIO as
* the io_error. If this was not a speculative zio, create an ereport.
*/
if (ret == ECKSUM) {
ret = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0);
}
} else {
zio->io_error = ret;
}
}
/* /*
* ========================================================================== * ==========================================================================
* I/O parent/child relationships and pipeline interlocks * I/O parent/child relationships and pipeline interlocks
@ -606,7 +727,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
ASSERT(vd || stage == ZIO_STAGE_OPEN); ASSERT(vd || stage == ZIO_STAGE_OPEN);
IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0); IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0);
zio = kmem_cache_alloc(zio_cache, KM_SLEEP); zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
bzero(zio, sizeof (zio_t)); bzero(zio, sizeof (zio_t));
@ -844,9 +965,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
* Data can be NULL if we are going to call zio_write_override() to * Data can be NULL if we are going to call zio_write_override() to
* provide the already-allocated BP. But we may need the data to * provide the already-allocated BP. But we may need the data to
* verify a dedup hit (if requested). In this case, don't try to * verify a dedup hit (if requested). In this case, don't try to
* dedup (just take the already-allocated BP verbatim). * dedup (just take the already-allocated BP verbatim). Encrypted
* dedup blocks need data as well so we also disable dedup in this
* case.
*/ */
if (data == NULL && zio->io_prop.zp_dedup_verify) { if (data == NULL &&
(zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) {
zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE;
} }
@ -1186,16 +1310,23 @@ static int
zio_read_bp_init(zio_t *zio) zio_read_bp_init(zio_t *zio)
{ {
blkptr_t *bp = zio->io_bp; blkptr_t *bp = zio->io_bp;
uint64_t psize =
BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
zio->io_child_type == ZIO_CHILD_LOGICAL && zio->io_child_type == ZIO_CHILD_LOGICAL &&
!(zio->io_flags & ZIO_FLAG_RAW)) { !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
uint64_t psize =
BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
psize, psize, zio_decompress); psize, psize, zio_decompress);
} }
if (((BP_IS_PROTECTED(bp) && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) ||
BP_HAS_INDIRECT_MAC_CKSUM(bp)) &&
zio->io_child_type == ZIO_CHILD_LOGICAL) {
zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
psize, psize, zio_decrypt);
}
if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
int psize = BPE_GET_PSIZE(bp); int psize = BPE_GET_PSIZE(bp);
void *data = abd_borrow_buf(zio->io_abd, psize); void *data = abd_borrow_buf(zio->io_abd, psize);
@ -1222,7 +1353,6 @@ zio_read_bp_init(zio_t *zio)
static int static int
zio_write_bp_init(zio_t *zio) zio_write_bp_init(zio_t *zio)
{ {
if (!IO_IS_ALLOCATING(zio)) if (!IO_IS_ALLOCATING(zio))
return (ZIO_PIPELINE_CONTINUE); return (ZIO_PIPELINE_CONTINUE);
@ -1261,7 +1391,8 @@ zio_write_bp_init(zio_t *zio)
ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { if (BP_GET_CHECKSUM(bp) == zp->zp_checksum &&
!zp->zp_encrypt) {
BP_SET_DEDUP(bp, 1); BP_SET_DEDUP(bp, 1);
zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
return (ZIO_PIPELINE_CONTINUE); return (ZIO_PIPELINE_CONTINUE);
@ -1290,8 +1421,6 @@ zio_write_compress(zio_t *zio)
uint64_t psize = zio->io_size; uint64_t psize = zio->io_size;
int pass = 1; int pass = 1;
EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0);
/* /*
* If our children haven't all reached the ready stage, * If our children haven't all reached the ready stage,
* wait for them and then repeat this pipeline stage. * wait for them and then repeat this pipeline stage.
@ -1341,13 +1470,15 @@ zio_write_compress(zio_t *zio)
} }
/* If it's a compressed write that is not raw, compress the buffer. */ /* If it's a compressed write that is not raw, compress the buffer. */
if (compress != ZIO_COMPRESS_OFF && psize == lsize) { if (compress != ZIO_COMPRESS_OFF &&
!(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
void *cbuf = zio_buf_alloc(lsize); void *cbuf = zio_buf_alloc(lsize);
psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
if (psize == 0 || psize == lsize) { if (psize == 0 || psize == lsize) {
compress = ZIO_COMPRESS_OFF; compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize); zio_buf_free(cbuf, lsize);
} else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && } else if (!zp->zp_dedup && !zp->zp_encrypt &&
psize <= BPE_PAYLOAD_SIZE &&
zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
encode_embedded_bp_compressed(bp, encode_embedded_bp_compressed(bp,
@ -1445,6 +1576,8 @@ zio_write_compress(zio_t *zio)
if (zp->zp_dedup) { if (zp->zp_dedup) {
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
ASSERT(!zp->zp_encrypt ||
DMU_OT_IS_ENCRYPTED(zp->zp_type));
zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
} }
if (zp->zp_nopwrite) { if (zp->zp_nopwrite) {
@ -1868,7 +2001,8 @@ zio_suspend(spa_t *spa, zio_t *zio)
cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O " cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O "
"failure and has been suspended.\n", spa_name(spa)); "failure and has been suspended.\n", spa_name(spa));
zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
NULL, NULL, 0, 0);
mutex_enter(&spa->spa_suspend_lock); mutex_enter(&spa->spa_suspend_lock);
@ -2298,11 +2432,19 @@ zio_write_gang_block(zio_t *pio)
uint64_t resid = pio->io_size; uint64_t resid = pio->io_size;
uint64_t lsize; uint64_t lsize;
int copies = gio->io_prop.zp_copies; int copies = gio->io_prop.zp_copies;
int gbh_copies = MIN(copies + 1, spa_max_replication(spa)); int gbh_copies;
zio_prop_t zp; zio_prop_t zp;
int g, error; int g, error;
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
/*
* encrypted blocks need DVA[2] free so encrypted gang headers can't
* have a third copy.
*/
gbh_copies = MIN(copies + 1, spa_max_replication(spa));
if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP)
gbh_copies = SPA_DVAS_PER_BP - 1;
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
@ -2376,12 +2518,16 @@ zio_write_gang_block(zio_t *pio)
zp.zp_checksum = gio->io_prop.zp_checksum; zp.zp_checksum = gio->io_prop.zp_checksum;
zp.zp_compress = ZIO_COMPRESS_OFF; zp.zp_compress = ZIO_COMPRESS_OFF;
zp.zp_encrypt = gio->io_prop.zp_encrypt;
zp.zp_type = DMU_OT_NONE; zp.zp_type = DMU_OT_NONE;
zp.zp_level = 0; zp.zp_level = 0;
zp.zp_copies = gio->io_prop.zp_copies; zp.zp_copies = gio->io_prop.zp_copies;
zp.zp_dedup = B_FALSE; zp.zp_dedup = B_FALSE;
zp.zp_dedup_verify = B_FALSE; zp.zp_dedup_verify = B_FALSE;
zp.zp_nopwrite = B_FALSE; zp.zp_nopwrite = B_FALSE;
bzero(zp.zp_salt, ZIO_DATA_SALT_LEN);
bzero(zp.zp_iv, ZIO_DATA_IV_LEN);
bzero(zp.zp_mac, ZIO_DATA_MAC_LEN);
cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
abd_get_offset(pio->io_abd, pio->io_size - resid), lsize, abd_get_offset(pio->io_abd, pio->io_size - resid), lsize,
@ -2460,6 +2606,7 @@ zio_nop_write(zio_t *zio)
if (BP_IS_HOLE(bp_orig) || if (BP_IS_HOLE(bp_orig) ||
!(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags & !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags &
ZCHECKSUM_FLAG_NOPWRITE) || ZCHECKSUM_FLAG_NOPWRITE) ||
BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) ||
BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) || BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) ||
BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) || BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) ||
BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) || BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) ||
@ -2609,7 +2756,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
* pushed the I/O transforms. That's an important optimization * pushed the I/O transforms. That's an important optimization
* because otherwise we'd compress/encrypt all dmu_sync() data twice. * because otherwise we'd compress/encrypt all dmu_sync() data twice.
* However, we should never get a raw, override zio so in these * However, we should never get a raw, override zio so in these
* cases we can compare the io_data directly. This is useful because * cases we can compare the io_abd directly. This is useful because
* it allows us to do dedup verification even if we don't have access * it allows us to do dedup verification even if we don't have access
* to the original data (for instance, if the encryption keys aren't * to the original data (for instance, if the encryption keys aren't
* loaded). * loaded).
@ -3097,8 +3244,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
* Try to allocate an intent log block. Return 0 on success, errno on failure. * Try to allocate an intent log block. Return 0 on success, errno on failure.
*/ */
int int
zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size, zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
boolean_t *slog) uint64_t size, boolean_t *slog)
{ {
int error = 1; int error = 1;
zio_alloc_list_t io_alloc_list; zio_alloc_list_t io_alloc_list;
@ -3130,6 +3277,23 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
BP_SET_LEVEL(new_bp, 0); BP_SET_LEVEL(new_bp, 0);
BP_SET_DEDUP(new_bp, 0); BP_SET_DEDUP(new_bp, 0);
BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);
/*
* encrypted blocks will require an IV and salt. We generate
* these now since we will not be rewriting the bp at
* rewrite time.
*/
if (os->os_encrypted) {
uint8_t iv[ZIO_DATA_IV_LEN];
uint8_t salt[ZIO_DATA_SALT_LEN];
BP_SET_CRYPT(new_bp, B_TRUE);
VERIFY0(spa_crypt_get_salt(spa,
dmu_objset_id(os), salt));
VERIFY0(zio_crypt_generate_iv(iv));
zio_crypt_encode_params_bp(new_bp, salt, iv);
}
} }
return (error); return (error);
@ -3462,6 +3626,146 @@ zio_vdev_io_bypass(zio_t *zio)
zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1; zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1;
} }
/*
* ==========================================================================
* Encrypt and store encryption parameters
* ==========================================================================
*/
/*
* This function is used for ZIO_STAGE_ENCRYPT. It is responsible for
* managing the storage of encryption parameters and passing them to the
* lower-level encryption functions.
*/
static int
zio_encrypt(zio_t *zio)
{
zio_prop_t *zp = &zio->io_prop;
spa_t *spa = zio->io_spa;
blkptr_t *bp = zio->io_bp;
uint64_t psize = BP_GET_PSIZE(bp);
dmu_object_type_t ot = BP_GET_TYPE(bp);
void *enc_buf = NULL;
abd_t *eabd = NULL;
uint8_t salt[ZIO_DATA_SALT_LEN];
uint8_t iv[ZIO_DATA_IV_LEN];
uint8_t mac[ZIO_DATA_MAC_LEN];
boolean_t no_crypt = B_FALSE;
/* the root zio already encrypted the data */
if (zio->io_child_type == ZIO_CHILD_GANG)
return (ZIO_PIPELINE_CONTINUE);
/* only ZIL blocks are re-encrypted on rewrite */
if (!IO_IS_ALLOCATING(zio) && ot != DMU_OT_INTENT_LOG)
return (ZIO_PIPELINE_CONTINUE);
if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) {
BP_SET_CRYPT(bp, B_FALSE);
return (ZIO_PIPELINE_CONTINUE);
}
/* if we are doing raw encryption set the provided encryption params */
if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) {
BP_SET_CRYPT(bp, B_TRUE);
BP_SET_BYTEORDER(bp, zp->zp_byteorder);
if (ot != DMU_OT_OBJSET)
zio_crypt_encode_mac_bp(bp, zp->zp_mac);
if (DMU_OT_IS_ENCRYPTED(ot))
zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv);
return (ZIO_PIPELINE_CONTINUE);
}
/* indirect blocks only maintain a cksum of the lower level MACs */
if (BP_GET_LEVEL(bp) > 0) {
BP_SET_CRYPT(bp, B_TRUE);
VERIFY0(zio_crypt_do_indirect_mac_checksum_abd(B_TRUE,
zio->io_orig_abd, BP_GET_LSIZE(bp), BP_SHOULD_BYTESWAP(bp),
mac));
zio_crypt_encode_mac_bp(bp, mac);
return (ZIO_PIPELINE_CONTINUE);
}
/*
* Objset blocks are a special case since they have 2 256-bit MACs
* embedded within them.
*/
if (ot == DMU_OT_OBJSET) {
ASSERT0(DMU_OT_IS_ENCRYPTED(ot));
ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
BP_SET_CRYPT(bp, B_TRUE);
VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa,
zio->io_bookmark.zb_objset, zio->io_abd, psize,
BP_SHOULD_BYTESWAP(bp)));
return (ZIO_PIPELINE_CONTINUE);
}
/* unencrypted object types are only authenticated with a MAC */
if (!DMU_OT_IS_ENCRYPTED(ot)) {
BP_SET_CRYPT(bp, B_TRUE);
VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa,
zio->io_bookmark.zb_objset, zio->io_abd, psize, mac));
zio_crypt_encode_mac_bp(bp, mac);
return (ZIO_PIPELINE_CONTINUE);
}
/*
* Later passes of sync-to-convergence may decide to rewrite data
* in place to avoid more disk reallocations. This presents a problem
* for encryption because this consitutes rewriting the new data with
* the same encryption key and IV. However, this only applies to blocks
* in the MOS (particularly the spacemaps) and we do not encrypt the
* MOS. We assert that the zio is allocating or an intent log write
* to enforce this.
*/
ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG);
ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG);
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION));
ASSERT3U(psize, !=, 0);
enc_buf = zio_buf_alloc(psize);
eabd = abd_get_from_buf(enc_buf, psize);
abd_take_ownership_of_buf(eabd, B_TRUE);
/*
* For an explanation of what encryption parameters are stored
* where, see the block comment in zio_crypt.c.
*/
if (ot == DMU_OT_INTENT_LOG) {
zio_crypt_decode_params_bp(bp, salt, iv);
} else {
BP_SET_CRYPT(bp, B_TRUE);
}
/* Perform the encryption. This should not fail */
VERIFY0(spa_do_crypt_abd(B_TRUE, spa, zio->io_bookmark.zb_objset, bp,
zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt));
/* encode encryption metadata into the bp */
if (ot == DMU_OT_INTENT_LOG) {
/*
* ZIL blocks store the MAC in the embedded checksum, so the
* transform must always be applied.
*/
zio_crypt_encode_mac_zil(enc_buf, mac);
zio_push_transform(zio, eabd, psize, psize, NULL);
} else {
BP_SET_CRYPT(bp, B_TRUE);
zio_crypt_encode_params_bp(bp, salt, iv);
zio_crypt_encode_mac_bp(bp, mac);
if (no_crypt) {
ASSERT3U(ot, ==, DMU_OT_DNODE);
abd_free(eabd);
} else {
zio_push_transform(zio, eabd, psize, psize, NULL);
}
}
return (ZIO_PIPELINE_CONTINUE);
}
/* /*
* ========================================================================== * ==========================================================================
* Generate and verify checksums * Generate and verify checksums
@ -3523,8 +3827,8 @@ zio_checksum_verify(zio_t *zio)
if (error == ECKSUM && if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
zfs_ereport_start_checksum(zio->io_spa, zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, zio, zio->io_offset, zio->io_vd, &zio->io_bookmark, zio,
zio->io_size, NULL, &info); zio->io_offset, zio->io_size, NULL, &info);
} }
} }
@ -3824,7 +4128,7 @@ zio_done(zio_t *zio)
if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) { if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) {
if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
zio->io_vd, zio, 0, 0); zio->io_vd, &zio->io_bookmark, zio, 0, 0);
} }
if (zio->io_error) { if (zio->io_error) {
@ -3837,7 +4141,7 @@ zio_done(zio_t *zio)
if (zio->io_error != ECKSUM && zio->io_vd != NULL && if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
!vdev_is_dead(zio->io_vd)) !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
zio->io_vd, zio, 0, 0); zio->io_vd, &zio->io_bookmark, zio, 0, 0);
if ((zio->io_error == EIO || !(zio->io_flags & if ((zio->io_error == EIO || !(zio->io_flags &
(ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
@ -3846,9 +4150,9 @@ zio_done(zio_t *zio)
* For logical I/O requests, tell the SPA to log the * For logical I/O requests, tell the SPA to log the
* error and generate a logical data ereport. * error and generate a logical data ereport.
*/ */
spa_log_error(zio->io_spa, zio); spa_log_error(zio->io_spa, &zio->io_bookmark);
zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa, zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa,
NULL, zio, 0, 0); NULL, &zio->io_bookmark, zio, 0, 0);
} }
} }
@ -4046,6 +4350,7 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_free_bp_init, zio_free_bp_init,
zio_issue_async, zio_issue_async,
zio_write_compress, zio_write_compress,
zio_encrypt,
zio_checksum_generate, zio_checksum_generate,
zio_nop_write, zio_nop_write,
zio_ddt_read_start, zio_ddt_read_start,

View File

@ -308,6 +308,25 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
mutex_exit(&spa->spa_cksum_tmpls_lock); mutex_exit(&spa->spa_cksum_tmpls_lock);
} }
/* convenience function to update a checksum to accomodate an encryption MAC */
static void
zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor)
{
/*
* Weak checksums do not have their entropy spread evenly
* across the bits of the checksum. Therefore, when truncating
* a weak checksum we XOR the first 2 words with the last 2 so
* that we don't "lose" any entropy unnecessarily.
*/
if (xor) {
cksum->zc_word[0] ^= cksum->zc_word[2];
cksum->zc_word[1] ^= cksum->zc_word[3];
}
cksum->zc_word[2] = saved->zc_word[2];
cksum->zc_word[3] = saved->zc_word[3];
}
/* /*
* Generate the checksum. * Generate the checksum.
*/ */
@ -319,8 +338,9 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
blkptr_t *bp = zio->io_bp; blkptr_t *bp = zio->io_bp;
uint64_t offset = zio->io_offset; uint64_t offset = zio->io_offset;
zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_checksum_info_t *ci = &zio_checksum_table[checksum];
zio_cksum_t cksum; zio_cksum_t cksum, saved;
spa_t *spa = zio->io_spa; spa_t *spa = zio->io_spa;
boolean_t insecure = (ci->ci_flags & ZCHECKSUM_FLAG_DEDUP) == 0;
ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
ASSERT(ci->ci_func[0] != NULL); ASSERT(ci->ci_func[0] != NULL);
@ -331,6 +351,8 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
zio_eck_t eck; zio_eck_t eck;
size_t eck_offset; size_t eck_offset;
bzero(&saved, sizeof (zio_cksum_t));
if (checksum == ZIO_CHECKSUM_ZILOG2) { if (checksum == ZIO_CHECKSUM_ZILOG2) {
zil_chain_t zilc; zil_chain_t zilc;
abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t)); abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
@ -347,31 +369,36 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
if (checksum == ZIO_CHECKSUM_GANG_HEADER) { if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
zio_checksum_gang_verifier(&eck.zec_cksum, bp); zio_checksum_gang_verifier(&eck.zec_cksum, bp);
abd_copy_from_buf_off(abd, &eck.zec_cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t));
} else if (checksum == ZIO_CHECKSUM_LABEL) { } else if (checksum == ZIO_CHECKSUM_LABEL) {
zio_checksum_label_verifier(&eck.zec_cksum, offset); zio_checksum_label_verifier(&eck.zec_cksum, offset);
abd_copy_from_buf_off(abd, &eck.zec_cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t));
} else { } else {
bp->blk_cksum = eck.zec_cksum; saved = eck.zec_cksum;
eck.zec_cksum = bp->blk_cksum;
} }
abd_copy_from_buf_off(abd, &zec_magic, abd_copy_from_buf_off(abd, &zec_magic,
eck_offset + offsetof(zio_eck_t, zec_magic), eck_offset + offsetof(zio_eck_t, zec_magic),
sizeof (zec_magic)); sizeof (zec_magic));
abd_copy_from_buf_off(abd, &eck.zec_cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t));
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
&cksum); &cksum);
if (bp != NULL && BP_USES_CRYPT(bp) &&
BP_GET_TYPE(bp) != DMU_OT_OBJSET)
zio_checksum_handle_crypt(&cksum, &saved, insecure);
abd_copy_from_buf_off(abd, &cksum, abd_copy_from_buf_off(abd, &cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum), eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t)); sizeof (zio_cksum_t));
} else { } else {
saved = bp->blk_cksum;
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
&bp->blk_cksum); &cksum);
if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
zio_checksum_handle_crypt(&cksum, &saved, insecure);
bp->blk_cksum = cksum;
} }
} }
@ -458,6 +485,26 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
spa->spa_cksum_tmpls[checksum], &actual_cksum); spa->spa_cksum_tmpls[checksum], &actual_cksum);
} }
/*
* MAC checksums are a special case since half of this checksum will
* actually be the encryption MAC. This will be verified by the
* decryption process, so we just check the truncated checksum now.
* Objset blocks use embedded MACs so we don't truncate the checksum
* for them.
*/
if (bp != NULL && BP_USES_CRYPT(bp) &&
BP_GET_TYPE(bp) != DMU_OT_OBJSET) {
if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) {
actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2];
actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3];
}
actual_cksum.zc_word[2] = 0;
actual_cksum.zc_word[3] = 0;
expected_cksum.zc_word[2] = 0;
expected_cksum.zc_word[3] = 0;
}
if (info != NULL) { if (info != NULL) {
info->zbc_expected = expected_cksum; info->zbc_expected = expected_cksum;
info->zbc_actual = actual_cksum; info->zbc_actual = actual_cksum;

2037
module/zfs/zio_crypt.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -451,7 +451,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
if (zv == NULL || zv->zv_objset == NULL) { if (zv == NULL || zv->zv_objset == NULL) {
if (zv != NULL) if (zv != NULL)
rw_exit(&zv->zv_suspend_lock); rw_exit(&zv->zv_suspend_lock);
if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE,
FTAG, &os)) != 0) { FTAG, &os)) != 0) {
if (zv != NULL) if (zv != NULL)
mutex_exit(&zv->zv_state_lock); mutex_exit(&zv->zv_state_lock);
@ -478,7 +478,7 @@ out:
kmem_free(doi, sizeof (dmu_object_info_t)); kmem_free(doi, sizeof (dmu_object_info_t));
if (owned) { if (owned) {
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
if (zv != NULL) if (zv != NULL)
zv->zv_objset = NULL; zv->zv_objset = NULL;
} else { } else {
@ -1268,7 +1268,7 @@ zvol_first_open(zvol_state_t *zv)
} }
/* lie and say we're read-only */ /* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os); error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, 1, zv, &os);
if (error) if (error)
goto out_mutex; goto out_mutex;
@ -1277,7 +1277,7 @@ zvol_first_open(zvol_state_t *zv)
error = zvol_setup_zv(zv); error = zvol_setup_zv(zv);
if (error) { if (error) {
dmu_objset_disown(os, zv); dmu_objset_disown(os, 1, zv);
zv->zv_objset = NULL; zv->zv_objset = NULL;
} }
@ -1295,7 +1295,7 @@ zvol_last_close(zvol_state_t *zv)
zvol_shutdown_zv(zv); zvol_shutdown_zv(zv);
dmu_objset_disown(zv->zv_objset, zv); dmu_objset_disown(zv->zv_objset, 1, zv);
zv->zv_objset = NULL; zv->zv_objset = NULL;
} }
@ -1756,7 +1756,7 @@ zvol_create_minor_impl(const char *name)
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
if (error) if (error)
goto out_doi; goto out_doi;
@ -1822,7 +1822,7 @@ zvol_create_minor_impl(const char *name)
zv->zv_objset = NULL; zv->zv_objset = NULL;
out_dmu_objset_disown: out_dmu_objset_disown:
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
out_doi: out_doi:
kmem_free(doi, sizeof (dmu_object_info_t)); kmem_free(doi, sizeof (dmu_object_info_t));
@ -1887,11 +1887,11 @@ zvol_prefetch_minors_impl(void *arg)
char *dsname = job->name; char *dsname = job->name;
objset_t *os = NULL; objset_t *os = NULL;
job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, FTAG, job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, B_TRUE,
&os); FTAG, &os);
if (job->error == 0) { if (job->error == 0) {
dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ); dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
dmu_objset_disown(os, FTAG); dmu_objset_disown(os, B_TRUE, FTAG);
} }
} }

View File

@ -210,14 +210,14 @@ zpios_dmu_setup(run_args_t *run_args)
t->start = zpios_timespec_now(); t->start = zpios_timespec_now();
(void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL); rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL);
if (rc) { if (rc) {
zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) " zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
"failed: %d\n", name, rc); "failed: %d\n", name, rc);
goto out; goto out;
} }
rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os); rc = dmu_objset_own(name, DMU_OST_OTHER, 0, 1, zpios_tag, &os);
if (rc) { if (rc) {
zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) " zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
"failed: %d\n", name, rc); "failed: %d\n", name, rc);
@ -429,7 +429,7 @@ zpios_remove_objset(run_args_t *run_args)
} }
} }
dmu_objset_disown(run_args->os, zpios_tag); dmu_objset_disown(run_args->os, 1, zpios_tag);
if (run_args->flags & DMU_REMOVE) { if (run_args->flags & DMU_REMOVE) {
rc = dsl_destroy_head(name); rc = dsl_destroy_head(name);

View File

@ -183,6 +183,7 @@ Requires: fio
Requires: acl Requires: acl
Requires: sudo Requires: sudo
Requires: sysstat Requires: sysstat
Requires: rng-tools
%description test %description test
This package contains test infrastructure and support scripts for This package contains test infrastructure and support scripts for

View File

@ -70,11 +70,16 @@ post =
[tests/functional/cli_root/zfs] [tests/functional/cli_root/zfs]
tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg']
[tests/functional/cli_root/zfs_change-key]
tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
'zfs_change-key_pbkdf2iters']
[tests/functional/cli_root/zfs_clone] [tests/functional/cli_root/zfs_clone]
tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos',
'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos',
'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg',
'zfs_clone_010_pos'] 'zfs_clone_010_pos', 'zfs_clone_encrypted']
[tests/functional/cli_root/zfs_copies] [tests/functional/cli_root/zfs_copies]
tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos',
@ -85,7 +90,8 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos',
'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg',
'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos',
'zfs_create_013_pos', 'zfs_create_014_pos'] 'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted',
'zfs_create_crypt_combos']
[tests/functional/cli_root/zfs_destroy] [tests/functional/cli_root/zfs_destroy]
tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos',
@ -103,17 +109,22 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos',
[tests/functional/cli_root/zfs_inherit] [tests/functional/cli_root/zfs_inherit]
tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos']
[tests/functional/cli_root/zfs_load-key]
tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file',
'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive']
# zfs_mount_006_pos - https://github.com/zfsonlinux/zfs/issues/4990 # zfs_mount_006_pos - https://github.com/zfsonlinux/zfs/issues/4990
[tests/functional/cli_root/zfs_mount] [tests/functional/cli_root/zfs_mount]
tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos',
'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg',
'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos'] 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos',
'zfs_mount_encrypted']
[tests/functional/cli_root/zfs_promote] [tests/functional/cli_root/zfs_promote]
tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos',
'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg',
'zfs_promote_007_neg', 'zfs_promote_008_pos'] 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot']
[tests/functional/cli_root/zfs_property] [tests/functional/cli_root/zfs_property]
tests = ['zfs_written_property_001_pos'] tests = ['zfs_written_property_001_pos']
@ -125,7 +136,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg',
'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos',
'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos', 'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos',
'receive-o-x_props_override'] 'receive-o-x_props_override', 'zfs_receive_from_encrypted',
'zfs_receive_to_encrypted', 'zfs_receive_raw',
'zfs_receive_raw_incremental']
# zfs_rename_006_pos - https://github.com/zfsonlinux/zfs/issues/5647 # zfs_rename_006_pos - https://github.com/zfsonlinux/zfs/issues/5647
# zfs_rename_009_neg - https://github.com/zfsonlinux/zfs/issues/5648 # zfs_rename_009_neg - https://github.com/zfsonlinux/zfs/issues/5648
@ -134,7 +147,8 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_004_neg', 'zfs_rename_005_neg',
'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos',
'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
'zfs_rename_013_pos'] 'zfs_rename_013_pos', 'zfs_rename_encrypted_child',
'zfs_rename_to_encrypted']
[tests/functional/cli_root/zfs_reservation] [tests/functional/cli_root/zfs_reservation]
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
@ -146,7 +160,7 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
[tests/functional/cli_root/zfs_send] [tests/functional/cli_root/zfs_send]
tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos',
'zfs_send_007_pos'] 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw']
[tests/functional/cli_root/zfs_set] [tests/functional/cli_root/zfs_set]
tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
@ -157,7 +171,7 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos', 'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos',
'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg',
'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos', 'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos',
'mountpoint_003_pos', 'ro_props_001_pos'] 'mountpoint_003_pos', 'ro_props_001_pos', 'zfs_set_keylocation']
[tests/functional/cli_root/zfs_share] [tests/functional/cli_root/zfs_share]
tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos',
@ -171,6 +185,9 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg',
'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg',
'zfs_snapshot_009_pos'] 'zfs_snapshot_009_pos']
[tests/functional/cli_root/zfs_unload-key]
tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive']
[tests/functional/cli_root/zfs_unmount] [tests/functional/cli_root/zfs_unmount]
tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos',
'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos',
@ -213,6 +230,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos',
'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg',
'zpool_create_024_pos', 'zpool_create_024_pos',
'zpool_create_encrypted', 'zpool_create_crypt_combos',
'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
'zpool_create_features_005_pos', 'zpool_create_features_005_pos',
@ -251,7 +269,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
'zpool_import_features_001_pos', 'zpool_import_features_002_neg', 'zpool_import_features_001_pos', 'zpool_import_features_002_neg',
'zpool_import_features_003_pos','zpool_import_missing_001_pos', 'zpool_import_features_003_pos','zpool_import_missing_001_pos',
'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos', 'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos',
'zpool_import_rename_001_pos', 'zpool_import_all_001_pos'] 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
'zpool_import_encrypted', 'zpool_import_encrypted_load']
[tests/functional/cli_root/zpool_labelclear] [tests/functional/cli_root/zpool_labelclear]
tests = ['zpool_labelclear_active', 'zpool_labelclear_exported'] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported']
@ -273,7 +292,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
[tests/functional/cli_root/zpool_scrub] [tests/functional/cli_root/zpool_scrub]
tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
'zpool_scrub_004_pos', 'zpool_scrub_005_pos'] 'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
'zpool_scrub_encrypted_unloaded']
[tests/functional/cli_root/zpool_set] [tests/functional/cli_root/zpool_set]
tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg'] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg']
@ -500,7 +520,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD',
'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
'send-c_recv_dedup'] 'send-c_recv_dedup', 'send_encrypted_heirarchy']
[tests/functional/scrub_mirror] [tests/functional/scrub_mirror]
tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',

View File

@ -5,12 +5,14 @@ dist_pkgdata_SCRIPTS = \
SUBDIRS = \ SUBDIRS = \
zdb \ zdb \
zfs \ zfs \
zfs_change-key \
zfs_clone \ zfs_clone \
zfs_copies \ zfs_copies \
zfs_create \ zfs_create \
zfs_destroy \ zfs_destroy \
zfs_get \ zfs_get \
zfs_inherit \ zfs_inherit \
zfs_load-key \
zfs_mount \ zfs_mount \
zfs_promote \ zfs_promote \
zfs_property \ zfs_property \
@ -22,6 +24,7 @@ SUBDIRS = \
zfs_set \ zfs_set \
zfs_share \ zfs_share \
zfs_snapshot \ zfs_snapshot \
zfs_unload-key \
zfs_unmount \ zfs_unmount \
zfs_unshare \ zfs_unshare \
zfs_upgrade \ zfs_upgrade \

View File

@ -0,0 +1,11 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_change-key
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
zfs_change-key.ksh \
zfs_change-key_child.ksh \
zfs_change-key_inherit.ksh \
zfs_change-key_format.ksh \
zfs_change-key_load.ksh \
zfs_change-key_location.ksh \
zfs_change-key_pbkdf2iters.ksh

View File

@ -0,0 +1,30 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
. $STF_SUITE/include/libtest.shlib
default_cleanup

Some files were not shown because too many files have changed in this diff Show More