mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-26 11:19:32 +03:00
30af21b025
Redacted send/receive allows users to send subsets of their data to a target system. One possible use case for this feature is to not transmit sensitive information to a data warehousing, test/dev, or analytics environment. Another is to save space by not replicating unimportant data within a given dataset, for example in backup tools like zrepl. Redacted send/receive is a three-stage process. First, a clone (or clones) is made of the snapshot to be sent to the target. In this clone (or clones), all unnecessary or unwanted data is removed or modified. This clone is then snapshotted to create the "redaction snapshot" (or snapshots). Second, the new zfs redact command is used to create a redaction bookmark. The redaction bookmark stores the list of blocks in a snapshot that were modified by the redaction snapshot(s). Finally, the redaction bookmark is passed as a parameter to zfs send. When sending to the snapshot that was redacted, the redaction bookmark is used to filter out blocks that contain sensitive or unwanted information, and those blocks are not included in the send stream. When sending from the redaction bookmark, the blocks it contains are considered as candidate blocks in addition to those blocks in the destination snapshot that were modified since the creation_txg of the redaction bookmark. This step is necessary to allow the target to rehydrate data in the case where some blocks are accidentally or unnecessarily modified in the redaction snapshot. The changes to bookmarks to enable fast space estimation involve adding deadlists to bookmarks. There is also logic to manage the life cycles of these deadlists. The new size estimation process operates in cases where previously an accurate estimate could not be provided. In those cases, a send is performed where no data blocks are read, reducing the runtime significantly and providing a byte-accurate size estimate. Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: Prashanth Sreenivasa <pks@delphix.com> Reviewed-by: John Kennedy <john.kennedy@delphix.com> Reviewed-by: George Wilson <george.wilson@delphix.com> Reviewed-by: Chris Williamson <chris.williamson@delphix.com> Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com> Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com> Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #7958
432 lines
9.7 KiB
C
432 lines
9.7 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
/*
|
|
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
/*
|
|
* Common name validation routines for ZFS. These routines are shared by the
|
|
* userland code as well as the ioctl() layer to ensure that we don't
|
|
* inadvertently expose a hole through direct ioctl()s that never gets tested.
|
|
* In userland, however, we want significantly more information about _why_ the
|
|
* name is invalid. In the kernel, we only care whether it's valid or not.
|
|
* Each routine therefore takes a 'namecheck_err_t' which describes exactly why
|
|
* the name failed to validate.
|
|
*/
|
|
|
|
#if !defined(_KERNEL)
|
|
#include <string.h>
|
|
#endif
|
|
|
|
#include <sys/dsl_dir.h>
|
|
#include <sys/param.h>
|
|
#include <sys/nvpair.h>
|
|
#include "zfs_namecheck.h"
|
|
#include "zfs_deleg.h"
|
|
|
|
/*
|
|
* Deeply nested datasets can overflow the stack, so we put a limit
|
|
* in the amount of nesting a path can have. zfs_max_dataset_nesting
|
|
* can be tuned temporarily to fix existing datasets that exceed our
|
|
* predefined limit.
|
|
*/
|
|
int zfs_max_dataset_nesting = 50;
|
|
|
|
static int
|
|
valid_char(char c)
|
|
{
|
|
return ((c >= 'a' && c <= 'z') ||
|
|
(c >= 'A' && c <= 'Z') ||
|
|
(c >= '0' && c <= '9') ||
|
|
c == '-' || c == '_' || c == '.' || c == ':' || c == ' ');
|
|
}
|
|
|
|
/*
|
|
* Looks at a path and returns its level of nesting (depth).
|
|
*/
|
|
int
|
|
get_dataset_depth(const char *path)
|
|
{
|
|
const char *loc = path;
|
|
int nesting = 0;
|
|
|
|
/*
|
|
* Keep track of nesting until you hit the end of the
|
|
* path or found the snapshot/bookmark seperator.
|
|
*/
|
|
for (int i = 0; loc[i] != '\0' &&
|
|
loc[i] != '@' &&
|
|
loc[i] != '#'; i++) {
|
|
if (loc[i] == '/')
|
|
nesting++;
|
|
}
|
|
|
|
return (nesting);
|
|
}
|
|
|
|
/*
|
|
* Snapshot names must be made up of alphanumeric characters plus the following
|
|
* characters:
|
|
*
|
|
* [-_.: ]
|
|
*
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
int
|
|
zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
|
|
{
|
|
const char *loc;
|
|
|
|
if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
|
|
if (why)
|
|
*why = NAME_ERR_TOOLONG;
|
|
return (-1);
|
|
}
|
|
|
|
if (path[0] == '\0') {
|
|
if (why)
|
|
*why = NAME_ERR_EMPTY_COMPONENT;
|
|
return (-1);
|
|
}
|
|
|
|
for (loc = path; *loc; loc++) {
|
|
if (!valid_char(*loc)) {
|
|
if (why) {
|
|
*why = NAME_ERR_INVALCHAR;
|
|
*what = *loc;
|
|
}
|
|
return (-1);
|
|
}
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
|
|
/*
|
|
* Permissions set name must start with the letter '@' followed by the
|
|
* same character restrictions as snapshot names, except that the name
|
|
* cannot exceed 64 characters.
|
|
*
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
int
|
|
permset_namecheck(const char *path, namecheck_err_t *why, char *what)
|
|
{
|
|
if (strlen(path) >= ZFS_PERMSET_MAXLEN) {
|
|
if (why)
|
|
*why = NAME_ERR_TOOLONG;
|
|
return (-1);
|
|
}
|
|
|
|
if (path[0] != '@') {
|
|
if (why) {
|
|
*why = NAME_ERR_NO_AT;
|
|
*what = path[0];
|
|
}
|
|
return (-1);
|
|
}
|
|
|
|
return (zfs_component_namecheck(&path[1], why, what));
|
|
}
|
|
|
|
/*
|
|
* Dataset paths should not be deeper than zfs_max_dataset_nesting
|
|
* in terms of nesting.
|
|
*
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
int
|
|
dataset_nestcheck(const char *path)
|
|
{
|
|
return ((get_dataset_depth(path) < zfs_max_dataset_nesting) ? 0 : -1);
|
|
}
|
|
|
|
/*
|
|
* Entity names must be of the following form:
|
|
*
|
|
* [component/]*[component][(@|#)component]?
|
|
*
|
|
* Where each component is made up of alphanumeric characters plus the following
|
|
* characters:
|
|
*
|
|
* [-_.:%]
|
|
*
|
|
* We allow '%' here as we use that character internally to create unique
|
|
* names for temporary clones (for online recv).
|
|
*
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
int
|
|
entity_namecheck(const char *path, namecheck_err_t *why, char *what)
|
|
{
|
|
const char *end;
|
|
|
|
/*
|
|
* Make sure the name is not too long.
|
|
*/
|
|
if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
|
|
if (why)
|
|
*why = NAME_ERR_TOOLONG;
|
|
return (-1);
|
|
}
|
|
|
|
/* Explicitly check for a leading slash. */
|
|
if (path[0] == '/') {
|
|
if (why)
|
|
*why = NAME_ERR_LEADING_SLASH;
|
|
return (-1);
|
|
}
|
|
|
|
if (path[0] == '\0') {
|
|
if (why)
|
|
*why = NAME_ERR_EMPTY_COMPONENT;
|
|
return (-1);
|
|
}
|
|
|
|
const char *start = path;
|
|
boolean_t found_delim = B_FALSE;
|
|
for (;;) {
|
|
/* Find the end of this component */
|
|
end = start;
|
|
while (*end != '/' && *end != '@' && *end != '#' &&
|
|
*end != '\0')
|
|
end++;
|
|
|
|
if (*end == '\0' && end[-1] == '/') {
|
|
/* trailing slashes are not allowed */
|
|
if (why)
|
|
*why = NAME_ERR_TRAILING_SLASH;
|
|
return (-1);
|
|
}
|
|
|
|
/* Validate the contents of this component */
|
|
for (const char *loc = start; loc != end; loc++) {
|
|
if (!valid_char(*loc) && *loc != '%') {
|
|
if (why) {
|
|
*why = NAME_ERR_INVALCHAR;
|
|
*what = *loc;
|
|
}
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
if (*end == '\0' || *end == '/') {
|
|
int component_length = end - start;
|
|
/* Validate the contents of this component is not '.' */
|
|
if (component_length == 1) {
|
|
if (start[0] == '.') {
|
|
if (why)
|
|
*why = NAME_ERR_SELF_REF;
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
/* Validate the content of this component is not '..' */
|
|
if (component_length == 2) {
|
|
if (start[0] == '.' && start[1] == '.') {
|
|
if (why)
|
|
*why = NAME_ERR_PARENT_REF;
|
|
return (-1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Snapshot or bookmark delimiter found */
|
|
if (*end == '@' || *end == '#') {
|
|
/* Multiple delimiters are not allowed */
|
|
if (found_delim != 0) {
|
|
if (why)
|
|
*why = NAME_ERR_MULTIPLE_DELIMITERS;
|
|
return (-1);
|
|
}
|
|
|
|
found_delim = B_TRUE;
|
|
}
|
|
|
|
/* Zero-length components are not allowed */
|
|
if (start == end) {
|
|
if (why)
|
|
*why = NAME_ERR_EMPTY_COMPONENT;
|
|
return (-1);
|
|
}
|
|
|
|
/* If we've reached the end of the string, we're OK */
|
|
if (*end == '\0')
|
|
return (0);
|
|
|
|
/*
|
|
* If there is a '/' in a snapshot or bookmark name
|
|
* then report an error
|
|
*/
|
|
if (*end == '/' && found_delim != 0) {
|
|
if (why)
|
|
*why = NAME_ERR_TRAILING_SLASH;
|
|
return (-1);
|
|
}
|
|
|
|
/* Update to the next component */
|
|
start = end + 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Dataset is any entity, except bookmark
|
|
*/
|
|
int
|
|
dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
|
|
{
|
|
int ret = entity_namecheck(path, why, what);
|
|
|
|
if (ret == 0 && strchr(path, '#') != NULL) {
|
|
if (why != NULL) {
|
|
*why = NAME_ERR_INVALCHAR;
|
|
*what = '#';
|
|
}
|
|
return (-1);
|
|
}
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* mountpoint names must be of the following form:
|
|
*
|
|
* /[component][/]*[component][/]
|
|
*
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
int
|
|
mountpoint_namecheck(const char *path, namecheck_err_t *why)
|
|
{
|
|
const char *start, *end;
|
|
|
|
/*
|
|
* Make sure none of the mountpoint component names are too long.
|
|
* If a component name is too long then the mkdir of the mountpoint
|
|
* will fail but then the mountpoint property will be set to a value
|
|
* that can never be mounted. Better to fail before setting the prop.
|
|
* Extra slashes are OK, they will be tossed by the mountpoint mkdir.
|
|
*/
|
|
|
|
if (path == NULL || *path != '/') {
|
|
if (why)
|
|
*why = NAME_ERR_LEADING_SLASH;
|
|
return (-1);
|
|
}
|
|
|
|
/* Skip leading slash */
|
|
start = &path[1];
|
|
do {
|
|
end = start;
|
|
while (*end != '/' && *end != '\0')
|
|
end++;
|
|
|
|
if (end - start >= ZFS_MAX_DATASET_NAME_LEN) {
|
|
if (why)
|
|
*why = NAME_ERR_TOOLONG;
|
|
return (-1);
|
|
}
|
|
start = end + 1;
|
|
|
|
} while (*end != '\0');
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* For pool names, we have the same set of valid characters as described in
|
|
* dataset names, with the additional restriction that the pool name must begin
|
|
* with a letter. The pool names 'raidz' and 'mirror' are also reserved names
|
|
* that cannot be used.
|
|
*
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
int
|
|
pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
|
|
{
|
|
const char *c;
|
|
|
|
/*
|
|
* Make sure the name is not too long.
|
|
* If we're creating a pool with version >= SPA_VERSION_DSL_SCRUB (v11)
|
|
* we need to account for additional space needed by the origin ds which
|
|
* will also be snapshotted: "poolname"+"/"+"$ORIGIN"+"@"+"$ORIGIN".
|
|
* Play it safe and enforce this limit even if the pool version is < 11
|
|
* so it can be upgraded without issues.
|
|
*/
|
|
if (strlen(pool) >= (ZFS_MAX_DATASET_NAME_LEN - 2 -
|
|
strlen(ORIGIN_DIR_NAME) * 2)) {
|
|
if (why)
|
|
*why = NAME_ERR_TOOLONG;
|
|
return (-1);
|
|
}
|
|
|
|
c = pool;
|
|
while (*c != '\0') {
|
|
if (!valid_char(*c)) {
|
|
if (why) {
|
|
*why = NAME_ERR_INVALCHAR;
|
|
*what = *c;
|
|
}
|
|
return (-1);
|
|
}
|
|
c++;
|
|
}
|
|
|
|
if (!(*pool >= 'a' && *pool <= 'z') &&
|
|
!(*pool >= 'A' && *pool <= 'Z')) {
|
|
if (why)
|
|
*why = NAME_ERR_NOLETTER;
|
|
return (-1);
|
|
}
|
|
|
|
if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
|
|
if (why)
|
|
*why = NAME_ERR_RESERVED;
|
|
return (-1);
|
|
}
|
|
|
|
if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
|
|
if (why)
|
|
*why = NAME_ERR_DISKLIKE;
|
|
return (-1);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
#if defined(_KERNEL)
|
|
EXPORT_SYMBOL(entity_namecheck);
|
|
EXPORT_SYMBOL(pool_namecheck);
|
|
EXPORT_SYMBOL(dataset_namecheck);
|
|
EXPORT_SYMBOL(zfs_component_namecheck);
|
|
EXPORT_SYMBOL(dataset_nestcheck);
|
|
EXPORT_SYMBOL(get_dataset_depth);
|
|
EXPORT_SYMBOL(zfs_max_dataset_nesting);
|
|
|
|
module_param(zfs_max_dataset_nesting, int, 0644);
|
|
MODULE_PARM_DESC(zfs_max_dataset_nesting, "Maximum depth of nested datasets");
|
|
#endif
|