Implement zfs_ioc_recv_new() for OpenZFS 2605

Adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy
ZFS_IOC_RECV user/kernel interface.  The new interface supports all
stream options but is currently only used for resumable streams.
This way updated user space utilities will interoperate with older
kernel modules.

ZFS_IOC_RECV_NEW is modeled after the existing ZFS_IOC_SEND_NEW
handler.  Non-Linux OpenZFS platforms have opted to change the
legacy interface in an incompatible fashion instead of adding a
new ioctl.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf
2016-06-09 17:04:12 -07:00
parent 8c62a0d0f3
commit 43e52eddb1
6 changed files with 519 additions and 266 deletions
+212 -82
View File
@@ -945,6 +945,13 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
ZFS_DELEG_PERM_CREATE, cr));
}
/* ARGSUSED */
static int
zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
return (zfs_secpolicy_recv(zc, innvl, cr));
}
int
zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
{
@@ -4046,74 +4053,38 @@ static boolean_t zfs_ioc_recv_inject_err;
#endif
/*
* inputs:
* zc_name name of containing filesystem
* zc_nvlist_src{_size} nvlist of properties to apply
* zc_value name of snapshot to create
* zc_string name of clone origin (if DRR_FLAG_CLONE)
* zc_cookie file descriptor to recv from
* zc_begin_record the BEGIN record of the stream (not byteswapped)
* zc_guid force flag
* zc_cleanup_fd cleanup-on-exit file descriptor
* zc_action_handle handle for this guid/ds mapping (or zero on first call)
* zc_resumable if data is incomplete assume sender will resume
*
* outputs:
* zc_cookie number of bytes read
* zc_nvlist_dst{_size} error for each unapplied received property
* zc_obj zprop_errflags_t
* zc_action_handle handle for this guid/ds mapping
* On failure the 'errors' nvlist may be allocated and will contain a
* descriptions of the failures. It's the callers responsibilty to free.
*/
static int
zfs_ioc_recv(zfs_cmd_t *zc)
zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin,
nvlist_t *props, boolean_t force, boolean_t resumable, int input_fd,
dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
{
file_t *fp;
dmu_recv_cookie_t drc;
boolean_t force = (boolean_t)zc->zc_guid;
int fd;
int error = 0;
int props_error = 0;
nvlist_t *errors;
offset_t off;
nvlist_t *props = NULL; /* sent properties */
nvlist_t *origprops = NULL; /* existing properties */
nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
char *origin = NULL;
char *tosnap;
char tofs[ZFS_MAXNAMELEN];
nvlist_t *origprops = NULL; /* existing properties */
boolean_t first_recvd_props = B_FALSE;
file_t *input_fp;
if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
strchr(zc->zc_value, '@') == NULL ||
strchr(zc->zc_value, '%'))
return (SET_ERROR(EINVAL));
(void) strcpy(tofs, zc->zc_value);
tosnap = strchr(tofs, '@');
*tosnap++ = '\0';
if (zc->zc_nvlist_src != 0 &&
(error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
zc->zc_iflags, &props)) != 0)
return (error);
fd = zc->zc_cookie;
fp = getf(fd);
if (fp == NULL) {
nvlist_free(props);
*errors = NULL;
input_fp = getf(input_fd);
if (input_fp == NULL)
return (SET_ERROR(EBADF));
}
errors = fnvlist_alloc();
if (zc->zc_string[0])
origin = zc->zc_string;
error = dmu_recv_begin(tofs, tosnap,
&zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
begin_record, force, resumable, origin, &drc);
if (error != 0)
goto out;
*read_bytes = 0;
*errflags = 0;
*errors = fnvlist_alloc();
/*
* Set properties before we receive the stream so that they are applied
* to the new data. Note that we must call dmu_recv_stream() if
@@ -4143,14 +4114,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
if (!first_recvd_props)
props_reduce(props, origprops);
if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
(void) nvlist_merge(errors, errlist, 0);
(void) nvlist_merge(*errors, errlist, 0);
nvlist_free(errlist);
if (clear_received_props(tofs, origprops,
first_recvd_props ? NULL : props) != 0)
zc->zc_obj |= ZPROP_ERR_NOCLEAR;
*errflags |= ZPROP_ERR_NOCLEAR;
} else {
zc->zc_obj |= ZPROP_ERR_NOCLEAR;
*errflags |= ZPROP_ERR_NOCLEAR;
}
}
@@ -4160,13 +4131,13 @@ zfs_ioc_recv(zfs_cmd_t *zc)
if (props_error == 0) {
delayprops = extract_delay_props(props);
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
props, errors);
props, *errors);
}
}
off = fp->f_offset;
error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
&zc->zc_action_handle);
off = input_fp->f_offset;
error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
action_handle);
if (error == 0) {
zfs_sb_t *zsb = NULL;
@@ -4192,7 +4163,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
/* Set delayed properties now, after we're done receiving. */
if (delayprops != NULL && error == 0) {
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
delayprops, errors);
delayprops, *errors);
}
}
@@ -4210,23 +4181,10 @@ zfs_ioc_recv(zfs_cmd_t *zc)
nvlist_free(delayprops);
}
/*
* Now that all props, initial and delayed, are set, report the prop
* errors to the caller.
*/
if (zc->zc_nvlist_dst_size != 0 &&
(nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
put_nvlist(zc, errors) != 0)) {
/*
* Caller made zc->zc_nvlist_dst less than the minimum expected
* size or supplied an invalid address.
*/
props_error = SET_ERROR(EINVAL);
}
zc->zc_cookie = off - fp->f_offset;
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
*read_bytes = off - input_fp->f_offset;
if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
input_fp->f_offset = off;
#ifdef DEBUG
if (zfs_ioc_recv_inject_err) {
@@ -4245,14 +4203,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* Since we may have left a $recvd value on the
* system, we can't clear the $hasrecvd flag.
*/
zc->zc_obj |= ZPROP_ERR_NORESTORE;
*errflags |= ZPROP_ERR_NORESTORE;
} else if (first_recvd_props) {
dsl_prop_unset_hasrecvd(tofs);
}
if (origprops == NULL && !drc.drc_newfs) {
/* We failed to stash the original properties. */
zc->zc_obj |= ZPROP_ERR_NORESTORE;
*errflags |= ZPROP_ERR_NORESTORE;
}
/*
@@ -4269,14 +4227,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* We stashed the original properties but failed to
* restore them.
*/
zc->zc_obj |= ZPROP_ERR_NORESTORE;
*errflags |= ZPROP_ERR_NORESTORE;
}
}
out:
nvlist_free(props);
releasef(input_fd);
nvlist_free(origprops);
nvlist_free(errors);
releasef(fd);
if (error == 0)
error = props_error;
@@ -4284,6 +4240,176 @@ out:
return (error);
}
/*
* inputs:
* zc_name name of containing filesystem (unused)
* zc_nvlist_src{_size} nvlist of properties to apply
* zc_value name of snapshot to create
* zc_string name of clone origin (if DRR_FLAG_CLONE)
* zc_cookie file descriptor to recv from
* zc_begin_record the BEGIN record of the stream (not byteswapped)
* zc_guid force flag
* zc_cleanup_fd cleanup-on-exit file descriptor
* zc_action_handle handle for this guid/ds mapping (or zero on first call)
*
* outputs:
* zc_cookie number of bytes read
* zc_obj zprop_errflags_t
* zc_action_handle handle for this guid/ds mapping
* zc_nvlist_dst{_size} error for each unapplied received property
*/
static int
zfs_ioc_recv(zfs_cmd_t *zc)
{
dmu_replay_record_t begin_record;
nvlist_t *errors = NULL;
nvlist_t *props = NULL;
char *origin = NULL;
char *tosnap;
char tofs[ZFS_MAXNAMELEN];
int error = 0;
if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
strchr(zc->zc_value, '@') == NULL ||
strchr(zc->zc_value, '%'))
return (SET_ERROR(EINVAL));
(void) strcpy(tofs, zc->zc_value);
tosnap = strchr(tofs, '@');
*tosnap++ = '\0';
if (zc->zc_nvlist_src != 0 &&
(error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
zc->zc_iflags, &props)) != 0)
return (error);
if (zc->zc_string[0])
origin = zc->zc_string;
begin_record.drr_type = DRR_BEGIN;
begin_record.drr_payloadlen = 0;
begin_record.drr_u.drr_begin = zc->zc_begin_record;
error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, zc->zc_guid,
B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd,
&zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors);
nvlist_free(props);
/*
* Now that all props, initial and delayed, are set, report the prop
* errors to the caller.
*/
if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
(nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
put_nvlist(zc, errors) != 0)) {
/*
* Caller made zc->zc_nvlist_dst less than the minimum expected
* size or supplied an invalid address.
*/
error = SET_ERROR(EINVAL);
}
nvlist_free(errors);
return (error);
}
/*
* innvl: {
* "snapname" -> full name of the snapshot to create
* (optional) "props" -> properties to set (nvlist)
* (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
* "begin_record" -> non-byteswapped dmu_replay_record_t
* "input_fd" -> file descriptor to read stream from (int32)
* (optional) "force" -> force flag (value ignored)
* (optional) "resumable" -> resumable flag (value ignored)
* (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
* (optional) "action_handle" -> handle for this guid/ds mapping
* }
*
* outnvl: {
* "read_bytes" -> number of bytes read
* "error_flags" -> zprop_errflags_t
* "action_handle" -> handle for this guid/ds mapping
* "errors" -> error for each unapplied received property (nvlist)
* }
*/
static int
zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
{
dmu_replay_record_t *begin_record;
uint_t begin_record_size;
nvlist_t *errors = NULL;
nvlist_t *props = NULL;
char *snapname = NULL;
char *origin = NULL;
char *tosnap;
char tofs[ZFS_MAXNAMELEN];
boolean_t force;
boolean_t resumable;
uint64_t action_handle = 0;
uint64_t read_bytes = 0;
uint64_t errflags = 0;
int input_fd = -1;
int cleanup_fd = -1;
int error;
error = nvlist_lookup_string(innvl, "snapname", &snapname);
if (error != 0)
return (SET_ERROR(EINVAL));
if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
strchr(snapname, '@') == NULL ||
strchr(snapname, '%'))
return (SET_ERROR(EINVAL));
(void) strcpy(tofs, snapname);
tosnap = strchr(tofs, '@');
*tosnap++ = '\0';
error = nvlist_lookup_string(innvl, "origin", &origin);
if (error && error != ENOENT)
return (error);
error = nvlist_lookup_byte_array(innvl, "begin_record",
(uchar_t **) &begin_record, &begin_record_size);
if (error != 0 || begin_record_size != sizeof (*begin_record))
return (SET_ERROR(EINVAL));
error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
if (error != 0)
return (SET_ERROR(EINVAL));
force = nvlist_exists(innvl, "force");
resumable = nvlist_exists(innvl, "resumable");
error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
if (error && error != ENOENT)
return (error);
error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
if (error && error != ENOENT)
return (error);
error = nvlist_lookup_nvlist(innvl, "props", &props);
if (error && error != ENOENT)
return (error);
error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, force,
resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
&errflags, &action_handle, &errors);
fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
fnvlist_add_uint64(outnvl, "error_flags", errflags);
fnvlist_add_uint64(outnvl, "action_handle", action_handle);
fnvlist_add_nvlist(outnvl, "errors", errors);
nvlist_free(errors);
nvlist_free(props);
return (error);
}
/*
* inputs:
* zc_name name of snapshot to send
@@ -5555,6 +5681,10 @@ zfs_ioctl_init(void)
POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,