2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2010-05-29 00:45:14 +04:00
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
2020-04-23 20:06:57 +03:00
|
|
|
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
|
2019-07-01 02:38:07 +03:00
|
|
|
* Copyright Joyent, Inc.
|
2013-05-25 06:06:23 +04:00
|
|
|
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
2016-03-14 19:04:21 +03:00
|
|
|
* Copyright (c) 2016, Intel Corporation.
|
2017-02-04 00:24:44 +03:00
|
|
|
* Copyright 2016 Nexenta Systems, Inc.
|
2017-10-26 22:26:09 +03:00
|
|
|
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
2019-03-12 23:13:22 +03:00
|
|
|
* Copyright (c) 2019 Datto Inc.
|
2021-02-18 08:30:45 +03:00
|
|
|
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
2008-11-20 23:01:55 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LIBZFS_H
|
|
|
|
#define _LIBZFS_H
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <libnvpair.h>
|
2009-01-16 00:59:39 +03:00
|
|
|
#include <sys/mnttab.h>
|
2008-11-20 23:01:55 +03:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/varargs.h>
|
|
|
|
#include <sys/fs/zfs.h>
|
|
|
|
#include <sys/avl.h>
|
|
|
|
#include <ucred.h>
|
2014-06-06 01:19:08 +04:00
|
|
|
#include <libzfs_core.h>
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Miscellaneous ZFS constants
|
|
|
|
*/
|
|
|
|
#define ZFS_MAXPROPLEN MAXPATHLEN
|
|
|
|
#define ZPOOL_MAXPROPLEN MAXPATHLEN
|
|
|
|
|
|
|
|
/*
|
|
|
|
* libzfs errors
|
|
|
|
*/
|
2013-08-28 15:45:09 +04:00
|
|
|
typedef enum zfs_error {
|
|
|
|
EZFS_SUCCESS = 0, /* no error -- success */
|
2008-11-20 23:01:55 +03:00
|
|
|
EZFS_NOMEM = 2000, /* out of memory */
|
|
|
|
EZFS_BADPROP, /* invalid property value */
|
|
|
|
EZFS_PROPREADONLY, /* cannot set readonly property */
|
|
|
|
EZFS_PROPTYPE, /* property does not apply to dataset type */
|
|
|
|
EZFS_PROPNONINHERIT, /* property is not inheritable */
|
|
|
|
EZFS_PROPSPACE, /* bad quota or reservation */
|
|
|
|
EZFS_BADTYPE, /* dataset is not of appropriate type */
|
|
|
|
EZFS_BUSY, /* pool or dataset is busy */
|
|
|
|
EZFS_EXISTS, /* pool or dataset already exists */
|
|
|
|
EZFS_NOENT, /* no such pool or dataset */
|
|
|
|
EZFS_BADSTREAM, /* bad backup stream */
|
|
|
|
EZFS_DSREADONLY, /* dataset is readonly */
|
|
|
|
EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */
|
|
|
|
EZFS_INVALIDNAME, /* invalid dataset name */
|
|
|
|
EZFS_BADRESTORE, /* unable to restore to destination */
|
|
|
|
EZFS_BADBACKUP, /* backup failed */
|
|
|
|
EZFS_BADTARGET, /* bad attach/detach/replace target */
|
|
|
|
EZFS_NODEVICE, /* no such device in pool */
|
|
|
|
EZFS_BADDEV, /* invalid device to add */
|
|
|
|
EZFS_NOREPLICAS, /* no valid replicas */
|
2020-07-03 21:05:50 +03:00
|
|
|
EZFS_RESILVERING, /* resilvering (healing reconstruction) */
|
2008-11-20 23:01:55 +03:00
|
|
|
EZFS_BADVERSION, /* unsupported version */
|
|
|
|
EZFS_POOLUNAVAIL, /* pool is currently unavailable */
|
|
|
|
EZFS_DEVOVERFLOW, /* too many devices in one vdev */
|
|
|
|
EZFS_BADPATH, /* must be an absolute path */
|
|
|
|
EZFS_CROSSTARGET, /* rename or clone across pool or dataset */
|
|
|
|
EZFS_ZONED, /* used improperly in local zone */
|
|
|
|
EZFS_MOUNTFAILED, /* failed to mount dataset */
|
|
|
|
EZFS_UMOUNTFAILED, /* failed to unmount dataset */
|
2020-10-30 18:55:59 +03:00
|
|
|
EZFS_UNSHARENFSFAILED, /* failed to unshare over nfs */
|
|
|
|
EZFS_SHARENFSFAILED, /* failed to share over nfs */
|
2008-11-20 23:01:55 +03:00
|
|
|
EZFS_PERM, /* permission denied */
|
|
|
|
EZFS_NOSPC, /* out of space */
|
2010-05-29 00:45:14 +04:00
|
|
|
EZFS_FAULT, /* bad address */
|
2008-11-20 23:01:55 +03:00
|
|
|
EZFS_IO, /* I/O error */
|
|
|
|
EZFS_INTR, /* signal received */
|
|
|
|
EZFS_ISSPARE, /* device is a hot spare */
|
|
|
|
EZFS_INVALCONFIG, /* invalid vdev configuration */
|
|
|
|
EZFS_RECURSIVE, /* recursive dependency */
|
|
|
|
EZFS_NOHISTORY, /* no history object */
|
|
|
|
EZFS_POOLPROPS, /* couldn't retrieve pool props */
|
|
|
|
EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */
|
|
|
|
EZFS_POOL_INVALARG, /* invalid argument for this pool operation */
|
|
|
|
EZFS_NAMETOOLONG, /* dataset name is too long */
|
|
|
|
EZFS_OPENFAILED, /* open of device failed */
|
|
|
|
EZFS_NOCAP, /* couldn't get capacity */
|
|
|
|
EZFS_LABELFAILED, /* write of label failed */
|
|
|
|
EZFS_BADWHO, /* invalid permission who */
|
|
|
|
EZFS_BADPERM, /* invalid permission */
|
|
|
|
EZFS_BADPERMSET, /* invalid permission set name */
|
|
|
|
EZFS_NODELEGATION, /* delegated administration is disabled */
|
|
|
|
EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */
|
|
|
|
EZFS_SHARESMBFAILED, /* failed to share over smb */
|
|
|
|
EZFS_BADCACHE, /* bad cache file */
|
|
|
|
EZFS_ISL2CACHE, /* device is for the level 2 ARC */
|
|
|
|
EZFS_VDEVNOTSUP, /* unsupported vdev type */
|
2008-12-03 23:09:06 +03:00
|
|
|
EZFS_NOTSUP, /* ops not supported on this dataset */
|
|
|
|
EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */
|
2009-07-03 02:44:48 +04:00
|
|
|
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
|
2009-08-18 22:43:27 +04:00
|
|
|
EZFS_REFTAG_RELE, /* snapshot release: tag not found */
|
|
|
|
EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */
|
2010-05-29 00:45:14 +04:00
|
|
|
EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */
|
|
|
|
EZFS_PIPEFAILED, /* pipe create failed */
|
|
|
|
EZFS_THREADCREATEFAILED, /* thread create failed */
|
|
|
|
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
|
|
|
|
EZFS_SCRUBBING, /* currently scrubbing */
|
|
|
|
EZFS_NO_SCRUB, /* no active scrub */
|
2010-08-27 01:24:34 +04:00
|
|
|
EZFS_DIFF, /* general failure of zfs diff */
|
|
|
|
EZFS_DIFFDATA, /* bad zfs diff data */
|
|
|
|
EZFS_POOLREADONLY, /* pool is in read-only mode */
|
2017-07-07 08:16:13 +03:00
|
|
|
EZFS_SCRUB_PAUSED, /* scrub currently paused */
|
Multi-modifier protection (MMP)
Add multihost=on|off pool property to control MMP. When enabled
a new thread writes uberblocks to the last slot in each label, at a
set frequency, to indicate to other hosts the pool is actively imported.
These uberblocks are the last synced uberblock with an updated
timestamp. Property defaults to off.
During tryimport, find the "best" uberblock (newest txg and timestamp)
repeatedly, checking for change in the found uberblock. Include the
results of the activity test in the config returned by tryimport.
These results are reported to user in "zpool import".
Allow the user to control the period between MMP writes, and the
duration of the activity test on import, via a new module parameter
zfs_multihost_interval. The period is specified in milliseconds. The
activity test duration is calculated from this value, and from the
mmp_delay in the "best" uberblock found initially.
Add a kstat interface to export statistics about Multiple Modifier
Protection (MMP) updates. Include the last synced txg number, the
timestamp, the delay since the last MMP update, the VDEV GUID, the VDEV
label that received the last MMP update, and the VDEV path. Abbreviated
output below.
$ cat /proc/spl/kstat/zfs/mypool/multihost
31 0 0x01 10 880 105092382393521 105144180101111
txg timestamp mmp_delay vdev_guid vdev_label vdev_path
20468 261337 250274925 68396651780 3 /dev/sda
20468 261339 252023374 6267402363293 1 /dev/sdc
20468 261340 252000858 6698080955233 1 /dev/sdx
20468 261341 251980635 783892869810 2 /dev/sdy
20468 261342 253385953 8923255792467 3 /dev/sdd
20468 261344 253336622 042125143176 0 /dev/sdab
20468 261345 253310522 1200778101278 2 /dev/sde
20468 261346 253286429 0950576198362 2 /dev/sdt
20468 261347 253261545 96209817917 3 /dev/sds
20468 261349 253238188 8555725937673 3 /dev/sdb
Add a new tunable zfs_multihost_history to specify the number of MMP
updates to store history for. By default it is set to zero meaning that
no MMP statistics are stored.
When using ztest to generate activity, for automated tests of the MMP
function, some test functions interfere with the test. For example, the
pool is exported to run zdb and then imported again. Add a new ztest
function, "-M", to alter ztest behavior to prevent this.
Add new tests to verify the new functionality. Tests provided by
Giuseppe Di Natale.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Ned Bass <bass6@llnl.gov>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #745
Closes #6279
2017-07-08 06:20:35 +03:00
|
|
|
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
EZFS_CRYPTOFAILED, /* failed to setup encryption */
|
OpenZFS 7614, 9064 - zfs device evacuation/removal
OpenZFS 7614 - zfs device evacuation/removal
OpenZFS 9064 - remove_mirror should wait for device removal to complete
This project allows top-level vdevs to be removed from the storage pool
with "zpool remove", reducing the total amount of storage in the pool.
This operation copies all allocated regions of the device to be removed
onto other devices, recording the mapping from old to new location.
After the removal is complete, read and free operations to the removed
(now "indirect") vdev must be remapped and performed at the new location
on disk. The indirect mapping table is kept in memory whenever the pool
is loaded, so there is minimal performance overhead when doing operations
on the indirect vdev.
The size of the in-memory mapping table will be reduced when its entries
become "obsolete" because they are no longer used by any block pointers
in the pool. An entry becomes obsolete when all the blocks that use
it are freed. An entry can also become obsolete when all the snapshots
that reference it are deleted, and the block pointers that reference it
have been "remapped" in all filesystems/zvols (and clones). Whenever an
indirect block is written, all the block pointers in it will be "remapped"
to their new (concrete) locations if possible. This process can be
accelerated by using the "zfs remap" command to proactively rewrite all
indirect blocks that reference indirect (removed) vdevs.
Note that when a device is removed, we do not verify the checksum of
the data that is copied. This makes the process much faster, but if it
were used on redundant vdevs (i.e. mirror or raidz vdevs), it would be
possible to copy the wrong data, when we have the correct data on e.g.
the other side of the mirror.
At the moment, only mirrors and simple top-level vdevs can be removed
and no removal is allowed if any of the top-level vdevs are raidz.
Porting Notes:
* Avoid zero-sized kmem_alloc() in vdev_compact_children().
The device evacuation code adds a dependency that
vdev_compact_children() be able to properly empty the vdev_child
array by setting it to NULL and zeroing vdev_children. Under Linux,
kmem_alloc() and related functions return a sentinel pointer rather
than NULL for zero-sized allocations.
* Remove comment regarding "mpt" driver where zfs_remove_max_segment
is initialized to SPA_MAXBLOCKSIZE.
Change zfs_condense_indirect_commit_entry_delay_ticks to
zfs_condense_indirect_commit_entry_delay_ms for consistency with
most other tunables in which delays are specified in ms.
* ZTS changes:
Use set_tunable rather than mdb
Use zpool sync as appropriate
Use sync_pool instead of sync
Kill jobs during test_removal_with_operation to allow unmount/export
Don't add non-disk names such as "mirror" or "raidz" to $DISKS
Use $TEST_BASE_DIR instead of /tmp
Increase HZ from 100 to 1000 which is more common on Linux
removal_multiple_indirection.ksh
Reduce iterations in order to not time out on the code
coverage builders.
removal_resume_export:
Functionally, the test case is correct but there exists a race
where the kernel thread hasn't been fully started yet and is
not visible. Wait for up to 1 second for the removal thread
to be started before giving up on it. Also, increase the
amount of data copied in order that the removal not finish
before the export has a chance to fail.
* MMP compatibility, the concept of concrete versus non-concrete devices
has slightly changed the semantics of vdev_writeable(). Update
mmp_random_leaf_impl() accordingly.
* Updated dbuf_remap() to handle the org.zfsonlinux:large_dnode pool
feature which is not supported by OpenZFS.
* Added support for new vdev removal tracepoints.
* Test cases removal_with_zdb and removal_condense_export have been
intentionally disabled. When run manually they pass as intended,
but when running in the automated test environment they produce
unreliable results on the latest Fedora release.
They may work better once the upstream pool import refectoring is
merged into ZoL at which point they will be re-enabled.
Authored by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Alex Reece <alex@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Richard Laager <rlaager@wiktel.com>
Reviewed by: Tim Chase <tim@chase2k.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Garrett D'Amore <garrett@damore.org>
Ported-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/7614
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/f539f1eb
Closes #6900
2016-09-22 19:30:13 +03:00
|
|
|
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
|
2016-12-17 01:11:29 +03:00
|
|
|
EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */
|
|
|
|
EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */
|
|
|
|
EZFS_NO_CHECKPOINT, /* pool has no checkpoint */
|
|
|
|
EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */
|
|
|
|
EZFS_VDEV_TOO_BIG, /* a device is too big to be used */
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 22:14:01 +03:00
|
|
|
EZFS_IOC_NOTSUPPORTED, /* operation not supported by zfs module */
|
OpenZFS 9102 - zfs should be able to initialize storage devices
PROBLEM
========
The first access to a block incurs a performance penalty on some platforms
(e.g. AWS's EBS, VMware VMDKs). Therefore we recommend that volumes are
"thick provisioned", where supported by the platform (VMware). This can
create a large delay in getting a new virtual machines up and running (or
adding storage to an existing Engine). If the thick provision step is
omitted, write performance will be suboptimal until all blocks on the LUN
have been written.
SOLUTION
=========
This feature introduces a way to 'initialize' the disks at install or in the
background to make sure we don't incur this first read penalty.
When an entire LUN is added to ZFS, we make all space available immediately,
and allow ZFS to find unallocated space and zero it out. This works with
concurrent writes to arbitrary offsets, ensuring that we don't zero out
something that has been (or is in the middle of being) written. This scheme
can also be applied to existing pools (affecting only free regions on the
vdev). Detailed design:
- new subcommand:zpool initialize [-cs] <pool> [<vdev> ...]
- start, suspend, or cancel initialization
- Creates new open-context thread for each vdev
- Thread iterates through all metaslabs in this vdev
- Each metaslab:
- select a metaslab
- load the metaslab
- mark the metaslab as being zeroed
- walk all free ranges within that metaslab and translate
them to ranges on the leaf vdev
- issue a "zeroing" I/O on the leaf vdev that corresponds to
a free range on the metaslab we're working on
- continue until all free ranges for this metaslab have been
"zeroed"
- reset/unmark the metaslab being zeroed
- if more metaslabs exist, then repeat above tasks.
- if no more metaslabs, then we're done.
- progress for the initialization is stored on-disk in the vdev’s
leaf zap object. The following information is stored:
- the last offset that has been initialized
- the state of the initialization process (i.e. active,
suspended, or canceled)
- the start time for the initialization
- progress is reported via the zpool status command and shows
information for each of the vdevs that are initializing
Porting notes:
- Added zfs_initialize_value module parameter to set the pattern
written by "zpool initialize".
- Added zfs_vdev_{initializing,removal}_{min,max}_active module options.
Authored by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Wren Kennedy <john.kennedy@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: loli10K <ezomori.nozomu@gmail.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Richard Lowe <richlowe@richlowe.net>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/9102
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/c3963210eb
Closes #8230
2018-12-19 17:54:59 +03:00
|
|
|
EZFS_TOOMANY, /* argument list too long */
|
|
|
|
EZFS_INITIALIZING, /* currently initializing */
|
|
|
|
EZFS_NO_INITIALIZE, /* no active initialize */
|
2019-02-09 02:44:15 +03:00
|
|
|
EZFS_WRONG_PARENT, /* invalid parent dataset (e.g ZVOL) */
|
2019-03-29 19:13:20 +03:00
|
|
|
EZFS_TRIMMING, /* currently trimming */
|
|
|
|
EZFS_NO_TRIM, /* no active trim */
|
|
|
|
EZFS_TRIM_NOTSUP, /* device does not support trim */
|
2019-05-03 02:42:31 +03:00
|
|
|
EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */
|
2019-07-18 23:02:33 +03:00
|
|
|
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
|
2020-07-03 21:05:50 +03:00
|
|
|
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
|
2008-11-20 23:01:55 +03:00
|
|
|
EZFS_UNKNOWN
|
2013-08-28 15:45:09 +04:00
|
|
|
} zfs_error_t;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The following data structures are all part
|
|
|
|
* of the zfs_allow_t data structure which is
|
|
|
|
* used for printing 'allow' permissions.
|
|
|
|
* It is a linked list of zfs_allow_t's which
|
|
|
|
* then contain avl tree's for user/group/sets/...
|
|
|
|
* and each one of the entries in those trees have
|
|
|
|
* avl tree's for the permissions they belong to and
|
|
|
|
* whether they are local,descendent or local+descendent
|
|
|
|
* permissions. The AVL trees are used primarily for
|
|
|
|
* sorting purposes, but also so that we can quickly find
|
|
|
|
* a given user and or permission.
|
|
|
|
*/
|
|
|
|
typedef struct zfs_perm_node {
|
|
|
|
avl_node_t z_node;
|
|
|
|
char z_pname[MAXPATHLEN];
|
|
|
|
} zfs_perm_node_t;
|
|
|
|
|
|
|
|
typedef struct zfs_allow_node {
|
|
|
|
avl_node_t z_node;
|
|
|
|
char z_key[MAXPATHLEN]; /* name, such as joe */
|
|
|
|
avl_tree_t z_localdescend; /* local+descendent perms */
|
|
|
|
avl_tree_t z_local; /* local permissions */
|
|
|
|
avl_tree_t z_descend; /* descendent permissions */
|
|
|
|
} zfs_allow_node_t;
|
|
|
|
|
|
|
|
typedef struct zfs_allow {
|
|
|
|
struct zfs_allow *z_next;
|
|
|
|
char z_setpoint[MAXPATHLEN];
|
|
|
|
avl_tree_t z_sets;
|
|
|
|
avl_tree_t z_crperms;
|
|
|
|
avl_tree_t z_user;
|
|
|
|
avl_tree_t z_group;
|
|
|
|
avl_tree_t z_everyone;
|
|
|
|
} zfs_allow_t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Basic handle types
|
|
|
|
*/
|
|
|
|
typedef struct zfs_handle zfs_handle_t;
|
|
|
|
typedef struct zpool_handle zpool_handle_t;
|
|
|
|
typedef struct libzfs_handle libzfs_handle_t;
|
|
|
|
|
Add subcommand to wait for background zfs activity to complete
Currently the best way to wait for the completion of a long-running
operation in a pool, like a scrub or device removal, is to poll 'zpool
status' and parse its output, which is neither efficient nor convenient.
This change adds a 'wait' subcommand to the zpool command. When invoked,
'zpool wait' will block until a specified type of background activity
completes. Currently, this subcommand can wait for any of the following:
- Scrubs or resilvers to complete
- Devices to initialized
- Devices to be replaced
- Devices to be removed
- Checkpoints to be discarded
- Background freeing to complete
For example, a scrub that is in progress could be waited for by running
zpool wait -t scrub <pool>
This also adds a -w flag to the attach, checkpoint, initialize, replace,
remove, and scrub subcommands. When used, this flag makes the operations
kicked off by these subcommands synchronous instead of asynchronous.
This functionality is implemented using a new ioctl. The type of
activity to wait for is provided as input to the ioctl, and the ioctl
blocks until all activity of that type has completed. An ioctl was used
over other methods of kernel-userspace communiction primarily for the
sake of portability.
Porting Notes:
This is ported from Delphix OS change DLPX-44432. The following changes
were made while porting:
- Added ZoL-style ioctl input declaration.
- Reorganized error handling in zpool_initialize in libzfs to integrate
better with changes made for TRIM support.
- Fixed check for whether a checkpoint discard is in progress.
Previously it also waited if the pool had a checkpoint, instead of
just if a checkpoint was being discarded.
- Exposed zfs_initialize_chunk_size as a ZoL-style tunable.
- Updated more existing tests to make use of new 'zpool wait'
functionality, tests that don't exist in Delphix OS.
- Used existing ZoL tunable zfs_scan_suspend_progress, together with
zinject, in place of a new tunable zfs_scan_max_blks_per_txg.
- Added support for a non-integral interval argument to zpool wait.
Future work:
ZoL has support for trimming devices, which Delphix OS does not. In the
future, 'zpool wait' could be extended to add the ability to wait for
trim operations to complete.
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: John Gallagher <john.gallagher@delphix.com>
Closes #9162
2019-09-14 04:09:06 +03:00
|
|
|
extern int zpool_wait(zpool_handle_t *, zpool_wait_activity_t);
|
|
|
|
extern int zpool_wait_status(zpool_handle_t *, zpool_wait_activity_t,
|
|
|
|
boolean_t *, boolean_t *);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Library initialization
|
|
|
|
*/
|
|
|
|
extern libzfs_handle_t *libzfs_init(void);
|
|
|
|
extern void libzfs_fini(libzfs_handle_t *);
|
|
|
|
|
|
|
|
extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
|
|
|
|
extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
|
|
|
|
|
|
|
|
extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
|
|
|
|
|
2013-08-28 15:45:09 +04:00
|
|
|
extern void zfs_save_arguments(int argc, char **, char *, int);
|
|
|
|
extern int zpool_log_history(libzfs_handle_t *, const char *);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int libzfs_errno(libzfs_handle_t *);
|
2015-05-21 00:39:52 +03:00
|
|
|
extern const char *libzfs_error_init(int);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern const char *libzfs_error_action(libzfs_handle_t *);
|
|
|
|
extern const char *libzfs_error_description(libzfs_handle_t *);
|
2013-12-12 02:33:41 +04:00
|
|
|
extern int zfs_standard_error(libzfs_handle_t *, int, const char *);
|
2009-01-16 00:59:39 +03:00
|
|
|
extern void libzfs_mnttab_init(libzfs_handle_t *);
|
|
|
|
extern void libzfs_mnttab_fini(libzfs_handle_t *);
|
2009-07-03 02:44:48 +04:00
|
|
|
extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t);
|
2009-01-16 00:59:39 +03:00
|
|
|
extern int libzfs_mnttab_find(libzfs_handle_t *, const char *,
|
|
|
|
struct mnttab *);
|
|
|
|
extern void libzfs_mnttab_add(libzfs_handle_t *, const char *,
|
|
|
|
const char *, const char *);
|
|
|
|
extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Basic handle functions
|
|
|
|
*/
|
|
|
|
extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
|
|
|
|
extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
|
|
|
|
extern void zpool_close(zpool_handle_t *);
|
|
|
|
extern const char *zpool_get_name(zpool_handle_t *);
|
|
|
|
extern int zpool_get_state(zpool_handle_t *);
|
2018-03-10 00:47:32 +03:00
|
|
|
extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
|
2013-07-05 15:01:44 +04:00
|
|
|
extern const char *zpool_pool_state_to_name(pool_state_t);
|
2008-12-03 23:09:06 +03:00
|
|
|
extern void zpool_free_handles(libzfs_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Iterate over all active pools in the system.
|
|
|
|
*/
|
|
|
|
typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
|
|
|
|
extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
|
2017-02-04 00:24:44 +03:00
|
|
|
extern boolean_t zpool_skip_pool(const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Functions to create and destroy pools
|
|
|
|
*/
|
|
|
|
extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
|
2008-12-03 23:09:06 +03:00
|
|
|
nvlist_t *, nvlist_t *);
|
2013-08-28 15:45:09 +04:00
|
|
|
extern int zpool_destroy(zpool_handle_t *, const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_add(zpool_handle_t *, nvlist_t *);
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
typedef struct splitflags {
|
|
|
|
/* do not split, but return the config that would be split off */
|
|
|
|
int dryrun : 1;
|
|
|
|
|
|
|
|
/* after splitting, import the pool */
|
|
|
|
int import : 1;
|
2013-12-29 22:40:46 +04:00
|
|
|
int name_flags;
|
2010-05-29 00:45:14 +04:00
|
|
|
} splitflags_t;
|
|
|
|
|
2019-03-29 19:13:20 +03:00
|
|
|
typedef struct trimflags {
|
|
|
|
/* requested vdevs are for the entire pool */
|
|
|
|
boolean_t fullpool;
|
|
|
|
|
|
|
|
/* request a secure trim, requires support from device */
|
|
|
|
boolean_t secure;
|
|
|
|
|
2020-03-05 02:07:11 +03:00
|
|
|
/* after starting trim, block until trim completes */
|
|
|
|
boolean_t wait;
|
|
|
|
|
2019-03-29 19:13:20 +03:00
|
|
|
/* trim at the requested rate in bytes/second */
|
|
|
|
uint64_t rate;
|
|
|
|
} trimflags_t;
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Functions to manipulate pool and vdev state
|
|
|
|
*/
|
2017-07-07 08:16:13 +03:00
|
|
|
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
|
OpenZFS 9102 - zfs should be able to initialize storage devices
PROBLEM
========
The first access to a block incurs a performance penalty on some platforms
(e.g. AWS's EBS, VMware VMDKs). Therefore we recommend that volumes are
"thick provisioned", where supported by the platform (VMware). This can
create a large delay in getting a new virtual machines up and running (or
adding storage to an existing Engine). If the thick provision step is
omitted, write performance will be suboptimal until all blocks on the LUN
have been written.
SOLUTION
=========
This feature introduces a way to 'initialize' the disks at install or in the
background to make sure we don't incur this first read penalty.
When an entire LUN is added to ZFS, we make all space available immediately,
and allow ZFS to find unallocated space and zero it out. This works with
concurrent writes to arbitrary offsets, ensuring that we don't zero out
something that has been (or is in the middle of being) written. This scheme
can also be applied to existing pools (affecting only free regions on the
vdev). Detailed design:
- new subcommand:zpool initialize [-cs] <pool> [<vdev> ...]
- start, suspend, or cancel initialization
- Creates new open-context thread for each vdev
- Thread iterates through all metaslabs in this vdev
- Each metaslab:
- select a metaslab
- load the metaslab
- mark the metaslab as being zeroed
- walk all free ranges within that metaslab and translate
them to ranges on the leaf vdev
- issue a "zeroing" I/O on the leaf vdev that corresponds to
a free range on the metaslab we're working on
- continue until all free ranges for this metaslab have been
"zeroed"
- reset/unmark the metaslab being zeroed
- if more metaslabs exist, then repeat above tasks.
- if no more metaslabs, then we're done.
- progress for the initialization is stored on-disk in the vdev’s
leaf zap object. The following information is stored:
- the last offset that has been initialized
- the state of the initialization process (i.e. active,
suspended, or canceled)
- the start time for the initialization
- progress is reported via the zpool status command and shows
information for each of the vdevs that are initializing
Porting notes:
- Added zfs_initialize_value module parameter to set the pattern
written by "zpool initialize".
- Added zfs_vdev_{initializing,removal}_{min,max}_active module options.
Authored by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Wren Kennedy <john.kennedy@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: loli10K <ezomori.nozomu@gmail.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Richard Lowe <richlowe@richlowe.net>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/9102
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/c3963210eb
Closes #8230
2018-12-19 17:54:59 +03:00
|
|
|
extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
|
|
|
|
nvlist_t *);
|
Add subcommand to wait for background zfs activity to complete
Currently the best way to wait for the completion of a long-running
operation in a pool, like a scrub or device removal, is to poll 'zpool
status' and parse its output, which is neither efficient nor convenient.
This change adds a 'wait' subcommand to the zpool command. When invoked,
'zpool wait' will block until a specified type of background activity
completes. Currently, this subcommand can wait for any of the following:
- Scrubs or resilvers to complete
- Devices to initialized
- Devices to be replaced
- Devices to be removed
- Checkpoints to be discarded
- Background freeing to complete
For example, a scrub that is in progress could be waited for by running
zpool wait -t scrub <pool>
This also adds a -w flag to the attach, checkpoint, initialize, replace,
remove, and scrub subcommands. When used, this flag makes the operations
kicked off by these subcommands synchronous instead of asynchronous.
This functionality is implemented using a new ioctl. The type of
activity to wait for is provided as input to the ioctl, and the ioctl
blocks until all activity of that type has completed. An ioctl was used
over other methods of kernel-userspace communiction primarily for the
sake of portability.
Porting Notes:
This is ported from Delphix OS change DLPX-44432. The following changes
were made while porting:
- Added ZoL-style ioctl input declaration.
- Reorganized error handling in zpool_initialize in libzfs to integrate
better with changes made for TRIM support.
- Fixed check for whether a checkpoint discard is in progress.
Previously it also waited if the pool had a checkpoint, instead of
just if a checkpoint was being discarded.
- Exposed zfs_initialize_chunk_size as a ZoL-style tunable.
- Updated more existing tests to make use of new 'zpool wait'
functionality, tests that don't exist in Delphix OS.
- Used existing ZoL tunable zfs_scan_suspend_progress, together with
zinject, in place of a new tunable zfs_scan_max_blks_per_txg.
- Added support for a non-integral interval argument to zpool wait.
Future work:
ZoL has support for trimming devices, which Delphix OS does not. In the
future, 'zpool wait' could be extended to add the ability to wait for
trim operations to complete.
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: John Gallagher <john.gallagher@delphix.com>
Closes #9162
2019-09-14 04:09:06 +03:00
|
|
|
extern int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t,
|
|
|
|
nvlist_t *);
|
2019-03-29 19:13:20 +03:00
|
|
|
extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *,
|
|
|
|
trimflags_t *);
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
|
2011-11-12 02:07:54 +04:00
|
|
|
extern int zpool_reguid(zpool_handle_t *);
|
2017-10-26 22:26:09 +03:00
|
|
|
extern int zpool_reopen_one(zpool_handle_t *, void *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2017-05-19 22:33:11 +03:00
|
|
|
extern int zpool_sync_one(zpool_handle_t *, void *);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
|
|
|
|
vdev_state_t *);
|
|
|
|
extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
|
|
|
|
extern int zpool_vdev_attach(zpool_handle_t *, const char *,
|
2020-07-03 21:05:50 +03:00
|
|
|
const char *, nvlist_t *, int, boolean_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
|
|
|
|
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
|
OpenZFS 7614, 9064 - zfs device evacuation/removal
OpenZFS 7614 - zfs device evacuation/removal
OpenZFS 9064 - remove_mirror should wait for device removal to complete
This project allows top-level vdevs to be removed from the storage pool
with "zpool remove", reducing the total amount of storage in the pool.
This operation copies all allocated regions of the device to be removed
onto other devices, recording the mapping from old to new location.
After the removal is complete, read and free operations to the removed
(now "indirect") vdev must be remapped and performed at the new location
on disk. The indirect mapping table is kept in memory whenever the pool
is loaded, so there is minimal performance overhead when doing operations
on the indirect vdev.
The size of the in-memory mapping table will be reduced when its entries
become "obsolete" because they are no longer used by any block pointers
in the pool. An entry becomes obsolete when all the blocks that use
it are freed. An entry can also become obsolete when all the snapshots
that reference it are deleted, and the block pointers that reference it
have been "remapped" in all filesystems/zvols (and clones). Whenever an
indirect block is written, all the block pointers in it will be "remapped"
to their new (concrete) locations if possible. This process can be
accelerated by using the "zfs remap" command to proactively rewrite all
indirect blocks that reference indirect (removed) vdevs.
Note that when a device is removed, we do not verify the checksum of
the data that is copied. This makes the process much faster, but if it
were used on redundant vdevs (i.e. mirror or raidz vdevs), it would be
possible to copy the wrong data, when we have the correct data on e.g.
the other side of the mirror.
At the moment, only mirrors and simple top-level vdevs can be removed
and no removal is allowed if any of the top-level vdevs are raidz.
Porting Notes:
* Avoid zero-sized kmem_alloc() in vdev_compact_children().
The device evacuation code adds a dependency that
vdev_compact_children() be able to properly empty the vdev_child
array by setting it to NULL and zeroing vdev_children. Under Linux,
kmem_alloc() and related functions return a sentinel pointer rather
than NULL for zero-sized allocations.
* Remove comment regarding "mpt" driver where zfs_remove_max_segment
is initialized to SPA_MAXBLOCKSIZE.
Change zfs_condense_indirect_commit_entry_delay_ticks to
zfs_condense_indirect_commit_entry_delay_ms for consistency with
most other tunables in which delays are specified in ms.
* ZTS changes:
Use set_tunable rather than mdb
Use zpool sync as appropriate
Use sync_pool instead of sync
Kill jobs during test_removal_with_operation to allow unmount/export
Don't add non-disk names such as "mirror" or "raidz" to $DISKS
Use $TEST_BASE_DIR instead of /tmp
Increase HZ from 100 to 1000 which is more common on Linux
removal_multiple_indirection.ksh
Reduce iterations in order to not time out on the code
coverage builders.
removal_resume_export:
Functionally, the test case is correct but there exists a race
where the kernel thread hasn't been fully started yet and is
not visible. Wait for up to 1 second for the removal thread
to be started before giving up on it. Also, increase the
amount of data copied in order that the removal not finish
before the export has a chance to fail.
* MMP compatibility, the concept of concrete versus non-concrete devices
has slightly changed the semantics of vdev_writeable(). Update
mmp_random_leaf_impl() accordingly.
* Updated dbuf_remap() to handle the org.zfsonlinux:large_dnode pool
feature which is not supported by OpenZFS.
* Added support for new vdev removal tracepoints.
* Test cases removal_with_zdb and removal_condense_export have been
intentionally disabled. When run manually they pass as intended,
but when running in the automated test environment they produce
unreliable results on the latest Fedora release.
They may work better once the upstream pool import refectoring is
merged into ZoL at which point they will be re-enabled.
Authored by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Alex Reece <alex@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Richard Laager <rlaager@wiktel.com>
Reviewed by: Tim Chase <tim@chase2k.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Garrett D'Amore <garrett@damore.org>
Ported-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/7614
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/f539f1eb
Closes #6900
2016-09-22 19:30:13 +03:00
|
|
|
extern int zpool_vdev_remove_cancel(zpool_handle_t *);
|
|
|
|
extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
|
|
|
|
splitflags_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
|
|
|
|
extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
|
|
|
|
|
|
|
|
extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
|
2008-12-03 23:09:06 +03:00
|
|
|
boolean_t *, boolean_t *);
|
2009-07-03 02:44:48 +04:00
|
|
|
extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
|
|
|
|
boolean_t *, boolean_t *, boolean_t *);
|
2020-06-06 22:54:04 +03:00
|
|
|
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *);
|
2017-05-19 22:30:16 +03:00
|
|
|
extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2018-06-06 19:33:54 +03:00
|
|
|
const char *zpool_get_state_str(zpool_handle_t *);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Functions to manage pool properties
|
|
|
|
*/
|
|
|
|
extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
|
|
|
|
extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
|
2013-10-23 12:33:33 +04:00
|
|
|
size_t proplen, zprop_source_t *, boolean_t literal);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
|
|
|
|
zprop_source_t *);
|
Add subcommand to wait for background zfs activity to complete
Currently the best way to wait for the completion of a long-running
operation in a pool, like a scrub or device removal, is to poll 'zpool
status' and parse its output, which is neither efficient nor convenient.
This change adds a 'wait' subcommand to the zpool command. When invoked,
'zpool wait' will block until a specified type of background activity
completes. Currently, this subcommand can wait for any of the following:
- Scrubs or resilvers to complete
- Devices to initialized
- Devices to be replaced
- Devices to be removed
- Checkpoints to be discarded
- Background freeing to complete
For example, a scrub that is in progress could be waited for by running
zpool wait -t scrub <pool>
This also adds a -w flag to the attach, checkpoint, initialize, replace,
remove, and scrub subcommands. When used, this flag makes the operations
kicked off by these subcommands synchronous instead of asynchronous.
This functionality is implemented using a new ioctl. The type of
activity to wait for is provided as input to the ioctl, and the ioctl
blocks until all activity of that type has completed. An ioctl was used
over other methods of kernel-userspace communiction primarily for the
sake of portability.
Porting Notes:
This is ported from Delphix OS change DLPX-44432. The following changes
were made while porting:
- Added ZoL-style ioctl input declaration.
- Reorganized error handling in zpool_initialize in libzfs to integrate
better with changes made for TRIM support.
- Fixed check for whether a checkpoint discard is in progress.
Previously it also waited if the pool had a checkpoint, instead of
just if a checkpoint was being discarded.
- Exposed zfs_initialize_chunk_size as a ZoL-style tunable.
- Updated more existing tests to make use of new 'zpool wait'
functionality, tests that don't exist in Delphix OS.
- Used existing ZoL tunable zfs_scan_suspend_progress, together with
zinject, in place of a new tunable zfs_scan_max_blks_per_txg.
- Added support for a non-integral interval argument to zpool wait.
Future work:
ZoL has support for trimming devices, which Delphix OS does not. In the
future, 'zpool wait' could be extended to add the ability to wait for
trim operations to complete.
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: John Gallagher <john.gallagher@delphix.com>
Closes #9162
2019-09-14 04:09:06 +03:00
|
|
|
extern int zpool_props_refresh(zpool_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
extern const char *zpool_prop_to_name(zpool_prop_t);
|
|
|
|
extern const char *zpool_prop_values(zpool_prop_t);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Pool health statistics.
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
/*
|
|
|
|
* The following correspond to faults as defined in the (fault.fs.zfs.*)
|
|
|
|
* event namespace. Each is associated with a corresponding message ID.
|
2019-01-03 23:15:46 +03:00
|
|
|
* This must be kept in sync with the zfs_msgid_table in
|
|
|
|
* lib/libzfs/libzfs_status.c.
|
2008-11-20 23:01:55 +03:00
|
|
|
*/
|
|
|
|
ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */
|
|
|
|
ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */
|
|
|
|
ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */
|
|
|
|
ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */
|
|
|
|
ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */
|
|
|
|
ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */
|
|
|
|
ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */
|
|
|
|
ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */
|
|
|
|
ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */
|
|
|
|
ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */
|
|
|
|
ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */
|
Multi-modifier protection (MMP)
Add multihost=on|off pool property to control MMP. When enabled
a new thread writes uberblocks to the last slot in each label, at a
set frequency, to indicate to other hosts the pool is actively imported.
These uberblocks are the last synced uberblock with an updated
timestamp. Property defaults to off.
During tryimport, find the "best" uberblock (newest txg and timestamp)
repeatedly, checking for change in the found uberblock. Include the
results of the activity test in the config returned by tryimport.
These results are reported to user in "zpool import".
Allow the user to control the period between MMP writes, and the
duration of the activity test on import, via a new module parameter
zfs_multihost_interval. The period is specified in milliseconds. The
activity test duration is calculated from this value, and from the
mmp_delay in the "best" uberblock found initially.
Add a kstat interface to export statistics about Multiple Modifier
Protection (MMP) updates. Include the last synced txg number, the
timestamp, the delay since the last MMP update, the VDEV GUID, the VDEV
label that received the last MMP update, and the VDEV path. Abbreviated
output below.
$ cat /proc/spl/kstat/zfs/mypool/multihost
31 0 0x01 10 880 105092382393521 105144180101111
txg timestamp mmp_delay vdev_guid vdev_label vdev_path
20468 261337 250274925 68396651780 3 /dev/sda
20468 261339 252023374 6267402363293 1 /dev/sdc
20468 261340 252000858 6698080955233 1 /dev/sdx
20468 261341 251980635 783892869810 2 /dev/sdy
20468 261342 253385953 8923255792467 3 /dev/sdd
20468 261344 253336622 042125143176 0 /dev/sdab
20468 261345 253310522 1200778101278 2 /dev/sde
20468 261346 253286429 0950576198362 2 /dev/sdt
20468 261347 253261545 96209817917 3 /dev/sds
20468 261349 253238188 8555725937673 3 /dev/sdb
Add a new tunable zfs_multihost_history to specify the number of MMP
updates to store history for. By default it is set to zero meaning that
no MMP statistics are stored.
When using ztest to generate activity, for automated tests of the MMP
function, some test functions interfere with the test. For example, the
pool is exported to run zdb and then imported again. Add a new ztest
function, "-M", to alter ztest behavior to prevent this.
Add new tests to verify the new functionality. Tests provided by
Giuseppe Di Natale.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Ned Bass <bass6@llnl.gov>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #745
Closes #6279
2017-07-08 06:20:35 +03:00
|
|
|
ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */
|
|
|
|
ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */
|
2008-12-03 23:09:06 +03:00
|
|
|
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
|
|
|
|
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
|
2018-03-15 20:56:55 +03:00
|
|
|
ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */
|
2009-02-18 23:51:31 +03:00
|
|
|
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
|
2014-02-21 07:57:17 +04:00
|
|
|
ZPOOL_STATUS_ERRATA, /* informational errata available */
|
2009-02-18 23:51:31 +03:00
|
|
|
|
2012-12-14 03:24:15 +04:00
|
|
|
/*
|
|
|
|
* If the pool has unsupported features but can still be opened in
|
|
|
|
* read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the
|
|
|
|
* pool has unsupported features but cannot be opened at all, its
|
|
|
|
* status is ZPOOL_STATUS_UNSUP_FEAT_READ.
|
|
|
|
*/
|
|
|
|
ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */
|
|
|
|
ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */
|
|
|
|
|
2009-02-18 23:51:31 +03:00
|
|
|
/*
|
|
|
|
* These faults have no corresponding message ID. At the time we are
|
|
|
|
* checking the status, the original reason for the FMA fault (I/O or
|
|
|
|
* checksum errors) has been lost.
|
|
|
|
*/
|
2008-11-20 23:01:55 +03:00
|
|
|
ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */
|
|
|
|
ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following are not faults per se, but still an error possibly
|
|
|
|
* requiring administrative attention. There is no corresponding
|
|
|
|
* message ID.
|
|
|
|
*/
|
2012-12-15 03:00:45 +04:00
|
|
|
ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */
|
|
|
|
ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */
|
2008-11-20 23:01:55 +03:00
|
|
|
ZPOOL_STATUS_RESILVERING, /* device being resilvered */
|
2016-05-05 19:30:05 +03:00
|
|
|
ZPOOL_STATUS_OFFLINE_DEV, /* device offline */
|
2009-08-18 22:43:27 +04:00
|
|
|
ZPOOL_STATUS_REMOVED_DEV, /* removed device */
|
2020-07-03 21:05:50 +03:00
|
|
|
ZPOOL_STATUS_REBUILDING, /* device being rebuilt */
|
|
|
|
ZPOOL_STATUS_REBUILD_SCRUB, /* recommend scrubbing the pool */
|
2020-08-21 22:53:17 +03:00
|
|
|
ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */
|
2021-02-18 08:30:45 +03:00
|
|
|
ZPOOL_STATUS_COMPATIBILITY_ERR, /* bad 'compatibility' property */
|
2021-04-12 19:08:56 +03:00
|
|
|
ZPOOL_STATUS_INCOMPATIBLE_FEAT, /* feature set outside compatibility */
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Finally, the following indicates a healthy pool.
|
|
|
|
*/
|
|
|
|
ZPOOL_STATUS_OK
|
|
|
|
} zpool_status_t;
|
|
|
|
|
2014-02-21 07:57:17 +04:00
|
|
|
extern zpool_status_t zpool_get_status(zpool_handle_t *, char **,
|
|
|
|
zpool_errata_t *);
|
|
|
|
extern zpool_status_t zpool_import_status(nvlist_t *, char **,
|
|
|
|
zpool_errata_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Statistics and configuration functions.
|
|
|
|
*/
|
|
|
|
extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
|
2012-12-14 03:24:15 +04:00
|
|
|
extern nvlist_t *zpool_get_features(zpool_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
|
|
|
|
extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Import and export functions
|
|
|
|
*/
|
2013-08-28 15:45:09 +04:00
|
|
|
extern int zpool_export(zpool_handle_t *, boolean_t, const char *);
|
|
|
|
extern int zpool_export_force(zpool_handle_t *, const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
|
|
|
|
char *altroot);
|
|
|
|
extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
|
2010-08-27 01:24:34 +04:00
|
|
|
nvlist_t *, int);
|
2012-12-14 03:24:15 +04:00
|
|
|
extern void zpool_print_unsup_feat(nvlist_t *config);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Miscellaneous pool functions
|
|
|
|
*/
|
|
|
|
struct zfs_cmd;
|
|
|
|
|
2013-08-28 15:45:09 +04:00
|
|
|
extern const char *zfs_history_event_names[];
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2013-12-29 22:40:46 +04:00
|
|
|
typedef enum {
|
|
|
|
VDEV_NAME_PATH = 1 << 0,
|
|
|
|
VDEV_NAME_GUID = 1 << 1,
|
|
|
|
VDEV_NAME_FOLLOW_LINKS = 1 << 2,
|
|
|
|
VDEV_NAME_TYPE_ID = 1 << 3,
|
|
|
|
} vdev_name_t;
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
|
2013-12-29 22:40:46 +04:00
|
|
|
int name_flags);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zpool_upgrade(zpool_handle_t *, uint64_t);
|
2019-10-28 19:49:44 +03:00
|
|
|
extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *,
|
|
|
|
boolean_t *);
|
2014-02-12 22:30:18 +04:00
|
|
|
extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, unsigned,
|
|
|
|
int);
|
2010-08-26 22:42:43 +04:00
|
|
|
extern int zpool_events_clear(libzfs_handle_t *, int *);
|
2013-11-23 02:52:16 +04:00
|
|
|
extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int);
|
2020-05-20 20:05:33 +03:00
|
|
|
extern void zpool_obj_to_path_ds(zpool_handle_t *, uint64_t, uint64_t, char *,
|
|
|
|
size_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
|
2020-05-20 20:05:33 +03:00
|
|
|
size_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
|
2009-07-03 02:44:48 +04:00
|
|
|
extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
|
|
|
|
nvlist_t *);
|
2016-12-17 01:11:29 +03:00
|
|
|
extern int zpool_checkpoint(zpool_handle_t *);
|
|
|
|
extern int zpool_discard_checkpoint(zpool_handle_t *);
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-14 00:51:51 +03:00
|
|
|
extern boolean_t zpool_is_draid_spare(const char *);
|
2009-07-03 02:44:48 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Basic handle manipulations. These functions do not create or destroy the
|
|
|
|
* underlying datasets, only the references to them.
|
|
|
|
*/
|
|
|
|
extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
|
2011-11-17 22:14:36 +04:00
|
|
|
extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern void zfs_close(zfs_handle_t *);
|
|
|
|
extern zfs_type_t zfs_get_type(const zfs_handle_t *);
|
|
|
|
extern const char *zfs_get_name(const zfs_handle_t *);
|
2008-12-03 23:09:06 +03:00
|
|
|
extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
|
2017-02-04 00:24:44 +03:00
|
|
|
extern const char *zfs_get_pool_name(const zfs_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Property management functions. Some functions are shared with the kernel,
|
|
|
|
* and are found in sys/fs/zfs.h.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* zfs dataset property management
|
|
|
|
*/
|
|
|
|
extern const char *zfs_prop_default_string(zfs_prop_t);
|
|
|
|
extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
|
|
|
|
extern const char *zfs_prop_column_name(zfs_prop_t);
|
|
|
|
extern boolean_t zfs_prop_align_right(zfs_prop_t);
|
|
|
|
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *,
|
|
|
|
uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *);
|
2008-12-03 23:09:06 +03:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
extern const char *zfs_prop_to_name(zfs_prop_t);
|
|
|
|
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
|
2015-07-06 02:11:09 +03:00
|
|
|
extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
|
|
|
|
zprop_source_t *, char *, size_t, boolean_t);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
|
|
|
|
boolean_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
|
|
|
|
zprop_source_t *, char *, size_t);
|
2009-07-03 02:44:48 +04:00
|
|
|
extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
|
|
|
|
uint64_t *propvalue);
|
|
|
|
extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
|
|
|
|
char *propbuf, int proplen, boolean_t literal);
|
2011-11-17 22:14:36 +04:00
|
|
|
extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
|
|
|
|
uint64_t *propvalue);
|
|
|
|
extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
|
|
|
|
char *propbuf, int proplen, boolean_t literal);
|
2012-12-14 03:24:15 +04:00
|
|
|
extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
|
|
|
|
char *buf, size_t len);
|
2011-05-19 22:44:07 +04:00
|
|
|
extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern const char *zfs_prop_values(zfs_prop_t);
|
|
|
|
extern int zfs_prop_is_string(zfs_prop_t prop);
|
2017-10-19 21:18:42 +03:00
|
|
|
extern nvlist_t *zfs_get_all_props(zfs_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
|
2011-11-17 22:14:36 +04:00
|
|
|
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2020-04-01 20:02:06 +03:00
|
|
|
extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t,
|
|
|
|
boolean_t *, boolean_t *);
|
|
|
|
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
/*
|
|
|
|
* zfs encryption management
|
|
|
|
*/
|
|
|
|
extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *);
|
|
|
|
extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *,
|
2017-10-13 20:09:04 +03:00
|
|
|
boolean_t stdin_available, uint8_t **, uint_t *);
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *,
|
|
|
|
nvlist_t *);
|
|
|
|
extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *);
|
|
|
|
extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *);
|
|
|
|
extern int zfs_crypto_unload_key(zfs_handle_t *);
|
|
|
|
extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
typedef struct zprop_list {
|
|
|
|
int pl_prop;
|
|
|
|
char *pl_user_prop;
|
|
|
|
struct zprop_list *pl_next;
|
|
|
|
boolean_t pl_all;
|
|
|
|
size_t pl_width;
|
2010-05-29 00:45:14 +04:00
|
|
|
size_t pl_recvd_width;
|
2008-11-20 23:01:55 +03:00
|
|
|
boolean_t pl_fixed;
|
|
|
|
} zprop_list_t;
|
|
|
|
|
2013-11-19 19:41:37 +04:00
|
|
|
extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t,
|
|
|
|
boolean_t);
|
2009-07-03 02:44:48 +04:00
|
|
|
extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
#define ZFS_MOUNTPOINT_NONE "none"
|
|
|
|
#define ZFS_MOUNTPOINT_LEGACY "legacy"
|
|
|
|
|
2012-12-14 03:24:15 +04:00
|
|
|
#define ZFS_FEATURE_DISABLED "disabled"
|
|
|
|
#define ZFS_FEATURE_ENABLED "enabled"
|
|
|
|
#define ZFS_FEATURE_ACTIVE "active"
|
|
|
|
|
|
|
|
#define ZFS_UNSUPPORTED_INACTIVE "inactive"
|
|
|
|
#define ZFS_UNSUPPORTED_READONLY "readonly"
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* zpool property management
|
|
|
|
*/
|
2020-11-14 01:38:29 +03:00
|
|
|
extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **, boolean_t);
|
2012-12-14 03:24:15 +04:00
|
|
|
extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *,
|
|
|
|
size_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern const char *zpool_prop_default_string(zpool_prop_t);
|
|
|
|
extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
|
|
|
|
extern const char *zpool_prop_column_name(zpool_prop_t);
|
|
|
|
extern boolean_t zpool_prop_align_right(zpool_prop_t);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Functions shared by zfs and zpool property management.
|
|
|
|
*/
|
|
|
|
extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
|
|
|
|
boolean_t ordered, zfs_type_t type);
|
|
|
|
extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
|
|
|
|
zfs_type_t);
|
|
|
|
extern void zprop_free_list(zprop_list_t *);
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
#define ZFS_GET_NCOLS 5
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
GET_COL_NONE,
|
|
|
|
GET_COL_NAME,
|
|
|
|
GET_COL_PROPERTY,
|
|
|
|
GET_COL_VALUE,
|
|
|
|
GET_COL_RECVD,
|
|
|
|
GET_COL_SOURCE
|
|
|
|
} zfs_get_column_t;
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Functions for printing zfs or zpool properties
|
|
|
|
*/
|
|
|
|
typedef struct zprop_get_cbdata {
|
|
|
|
int cb_sources;
|
2010-05-29 00:45:14 +04:00
|
|
|
zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
|
|
|
|
int cb_colwidths[ZFS_GET_NCOLS + 1];
|
2008-11-20 23:01:55 +03:00
|
|
|
boolean_t cb_scripted;
|
|
|
|
boolean_t cb_literal;
|
|
|
|
boolean_t cb_first;
|
|
|
|
zprop_list_t *cb_proplist;
|
|
|
|
zfs_type_t cb_type;
|
|
|
|
} zprop_get_cbdata_t;
|
|
|
|
|
|
|
|
void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
|
2010-05-29 00:45:14 +04:00
|
|
|
const char *, const char *, zprop_source_t, const char *,
|
|
|
|
const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Iterator functions.
|
|
|
|
*/
|
|
|
|
typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
|
|
|
|
extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
|
|
|
|
extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
|
|
|
|
extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
|
|
|
|
extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
|
2019-03-12 23:13:22 +03:00
|
|
|
extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *,
|
|
|
|
uint64_t, uint64_t);
|
|
|
|
extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *,
|
|
|
|
uint64_t, uint64_t);
|
2011-11-17 22:14:36 +04:00
|
|
|
extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *);
|
2013-12-12 02:33:41 +04:00
|
|
|
extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *);
|
2018-10-02 22:30:58 +03:00
|
|
|
extern int zfs_iter_mounted(zfs_handle_t *, zfs_iter_f, void *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-27 01:24:34 +04:00
|
|
|
typedef struct get_all_cb {
|
|
|
|
zfs_handle_t **cb_handles;
|
|
|
|
size_t cb_alloc;
|
|
|
|
size_t cb_used;
|
|
|
|
} get_all_cb_t;
|
|
|
|
|
2018-11-05 18:40:05 +03:00
|
|
|
void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t,
|
|
|
|
zfs_iter_f, void *, boolean_t);
|
2010-08-27 01:24:34 +04:00
|
|
|
void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Functions to create and destroy datasets.
|
|
|
|
*/
|
|
|
|
extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
|
|
|
|
nvlist_t *);
|
|
|
|
extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
|
2009-08-18 22:43:27 +04:00
|
|
|
extern int zfs_destroy(zfs_handle_t *, boolean_t);
|
|
|
|
extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
|
2013-09-04 16:00:57 +04:00
|
|
|
extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
|
2008-12-03 23:09:06 +03:00
|
|
|
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
|
2013-08-28 15:45:09 +04:00
|
|
|
extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
|
|
|
|
nvlist_t *props);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
|
2020-09-02 02:14:16 +03:00
|
|
|
|
|
|
|
typedef struct renameflags {
|
|
|
|
/* recursive rename */
|
|
|
|
int recursive : 1;
|
|
|
|
|
|
|
|
/* don't unmount file systems */
|
|
|
|
int nounmount : 1;
|
|
|
|
|
|
|
|
/* force unmount file systems */
|
|
|
|
int forceunmount : 1;
|
|
|
|
} renameflags_t;
|
|
|
|
|
|
|
|
extern int zfs_rename(zfs_handle_t *, const char *, renameflags_t);
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
typedef struct sendflags {
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 19:48:13 +03:00
|
|
|
/* Amount of extra information to print. */
|
|
|
|
int verbosity;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
/* recursive send (ie, -R) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t replicate;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2021-04-11 22:05:35 +03:00
|
|
|
/* for recursive send, skip sending missing snapshots */
|
|
|
|
boolean_t skipmissing;
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
/* for incrementals, do all intermediate snapshots */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t doall;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
/* if dataset is a clone, do incremental from its origin */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t fromorigin;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2020-04-23 20:06:57 +03:00
|
|
|
/* field no longer used, maintained for backwards compatibility */
|
|
|
|
boolean_t pad;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
/* send properties (ie, -p) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t props;
|
|
|
|
|
|
|
|
/* do not send (no-op, ie. -n) */
|
|
|
|
boolean_t dryrun;
|
|
|
|
|
|
|
|
/* parsable verbose output (ie. -P) */
|
|
|
|
boolean_t parsable;
|
2012-05-10 02:05:14 +04:00
|
|
|
|
|
|
|
/* show progress (ie. -v) */
|
|
|
|
boolean_t progress;
|
2014-06-06 01:19:08 +04:00
|
|
|
|
2014-11-03 23:15:08 +03:00
|
|
|
/* large blocks (>128K) are permitted */
|
|
|
|
boolean_t largeblock;
|
|
|
|
|
2014-06-06 01:19:08 +04:00
|
|
|
/* WRITE_EMBEDDED records of type DATA are permitted */
|
|
|
|
boolean_t embed_data;
|
2016-07-11 20:45:52 +03:00
|
|
|
|
|
|
|
/* compressed WRITE records are permitted */
|
|
|
|
boolean_t compress;
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
|
|
|
|
/* raw encrypted records are permitted */
|
|
|
|
boolean_t raw;
|
2018-02-21 23:32:06 +03:00
|
|
|
|
|
|
|
/* only send received properties (ie. -b) */
|
|
|
|
boolean_t backup;
|
2019-02-15 23:41:38 +03:00
|
|
|
|
|
|
|
/* include snapshot holds in send stream */
|
|
|
|
boolean_t holds;
|
2020-01-10 21:16:58 +03:00
|
|
|
|
|
|
|
/* stream represents a partially received dataset */
|
|
|
|
boolean_t saved;
|
2010-05-29 00:45:14 +04:00
|
|
|
} sendflags_t;
|
|
|
|
|
|
|
|
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
|
|
|
|
|
2011-11-17 22:14:36 +04:00
|
|
|
extern int zfs_send(zfs_handle_t *, const char *, const char *,
|
|
|
|
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 19:48:13 +03:00
|
|
|
extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t *,
|
|
|
|
const char *);
|
|
|
|
extern int zfs_send_progress(zfs_handle_t *, int, uint64_t *, uint64_t *);
|
2016-01-07 00:22:48 +03:00
|
|
|
extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd,
|
|
|
|
const char *);
|
2020-01-10 21:16:58 +03:00
|
|
|
extern int zfs_send_saved(zfs_handle_t *, sendflags_t *, int, const char *);
|
2016-01-07 00:22:48 +03:00
|
|
|
extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl,
|
|
|
|
const char *token);
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_promote(zfs_handle_t *);
|
2013-09-04 16:00:57 +04:00
|
|
|
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
|
2013-05-25 06:06:23 +04:00
|
|
|
boolean_t, int);
|
|
|
|
extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *);
|
2009-08-18 22:43:27 +04:00
|
|
|
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
|
2011-07-27 02:44:36 +04:00
|
|
|
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
|
2019-07-01 02:38:07 +03:00
|
|
|
extern uint64_t zvol_volsize_to_reservation(zpool_handle_t *, uint64_t,
|
|
|
|
nvlist_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2009-07-03 02:44:48 +04:00
|
|
|
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
|
|
|
|
uid_t rid, uint64_t space);
|
|
|
|
|
2011-07-27 02:44:36 +04:00
|
|
|
extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
|
|
|
|
zfs_userspace_cb_t, void *);
|
|
|
|
|
|
|
|
extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
|
|
|
|
extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
|
2009-07-03 02:44:48 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
typedef struct recvflags {
|
|
|
|
/* print informational messages (ie, -v was specified) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t verbose;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/* the destination is a prefix, not the exact fs (ie, -d) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t isprefix;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
/*
|
|
|
|
* Only the tail of the sent snapshot path is appended to the
|
|
|
|
* destination to determine the received snapshot name (ie, -e).
|
|
|
|
*/
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t istail;
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/* do not actually do the recv, just check if it would work (ie, -n) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t dryrun;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/* rollback/destroy filesystems as necessary (eg, -F) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t force;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/* set "canmount=off" on all modified filesystems */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t canmountoff;
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2016-01-07 00:22:48 +03:00
|
|
|
/*
|
|
|
|
* Mark the file systems as "resumable" and do not destroy them if the
|
|
|
|
* receive is interrupted
|
|
|
|
*/
|
|
|
|
boolean_t resumable;
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/* byteswap flag is used internally; callers need not specify */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t byteswap;
|
2009-02-18 23:51:31 +03:00
|
|
|
|
|
|
|
/* do not mount file systems as they are extracted (private) */
|
2011-11-17 22:14:36 +04:00
|
|
|
boolean_t nomount;
|
2019-02-15 23:41:38 +03:00
|
|
|
|
|
|
|
/* Was holds flag set in the compound header? */
|
|
|
|
boolean_t holds;
|
|
|
|
|
|
|
|
/* skip receive of snapshot holds */
|
|
|
|
boolean_t skipholds;
|
2019-09-26 03:02:33 +03:00
|
|
|
|
|
|
|
/* mount the filesystem unless nomount is specified */
|
|
|
|
boolean_t domount;
|
2020-03-17 20:08:32 +03:00
|
|
|
|
|
|
|
/* force unmount while recv snapshot (private) */
|
|
|
|
boolean_t forceunmount;
|
2008-11-20 23:01:55 +03:00
|
|
|
} recvflags_t;
|
|
|
|
|
2015-12-22 04:31:57 +03:00
|
|
|
extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
|
|
|
|
recvflags_t *, int, avl_tree_t *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2010-08-27 01:24:34 +04:00
|
|
|
typedef enum diff_flags {
|
2021-12-10 02:02:52 +03:00
|
|
|
ZFS_DIFF_PARSEABLE = 1 << 0,
|
|
|
|
ZFS_DIFF_TIMESTAMP = 1 << 1,
|
|
|
|
ZFS_DIFF_CLASSIFY = 1 << 2,
|
|
|
|
ZFS_DIFF_NO_MANGLE = 1 << 3
|
2010-08-27 01:24:34 +04:00
|
|
|
} diff_flags_t;
|
|
|
|
|
|
|
|
extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *,
|
|
|
|
int);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Miscellaneous functions.
|
|
|
|
*/
|
|
|
|
extern const char *zfs_type_to_name(zfs_type_t);
|
|
|
|
extern void zfs_refresh_properties(zfs_handle_t *);
|
|
|
|
extern int zfs_name_valid(const char *, zfs_type_t);
|
2020-07-22 21:14:20 +03:00
|
|
|
extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, const char *,
|
|
|
|
zfs_type_t);
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
|
|
|
|
zfs_type_t);
|
|
|
|
extern int zfs_spa_version(zfs_handle_t *, int *);
|
2013-12-12 02:33:41 +04:00
|
|
|
extern boolean_t zfs_bookmark_exists(const char *path);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Mount support functions.
|
|
|
|
*/
|
|
|
|
extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
|
|
|
|
extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
|
|
|
|
extern int zfs_mount(zfs_handle_t *, const char *, int);
|
2020-01-14 19:49:54 +03:00
|
|
|
extern int zfs_mount_at(zfs_handle_t *, const char *, int, const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_unmount(zfs_handle_t *, const char *, int);
|
|
|
|
extern int zfs_unmountall(zfs_handle_t *, int);
|
|
|
|
|
2020-05-21 04:02:41 +03:00
|
|
|
#if defined(__linux__)
|
|
|
|
extern int zfs_parse_mount_options(char *mntopts, unsigned long *mntflags,
|
|
|
|
unsigned long *zfsflags, int sloppy, char *badopt, char *mtabopt);
|
|
|
|
extern void zfs_adjust_mount_options(zfs_handle_t *zhp, const char *mntpoint,
|
|
|
|
char *mntopts, char *mtabopt);
|
|
|
|
#endif
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Share support functions.
|
|
|
|
*/
|
|
|
|
extern boolean_t zfs_is_shared(zfs_handle_t *);
|
|
|
|
extern int zfs_share(zfs_handle_t *);
|
|
|
|
extern int zfs_unshare(zfs_handle_t *);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Protocol-specific share support functions.
|
|
|
|
*/
|
|
|
|
extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
|
|
|
|
extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
|
|
|
|
extern int zfs_share_nfs(zfs_handle_t *);
|
|
|
|
extern int zfs_share_smb(zfs_handle_t *);
|
|
|
|
extern int zfs_shareall(zfs_handle_t *);
|
|
|
|
extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
|
|
|
|
extern int zfs_unshare_smb(zfs_handle_t *, const char *);
|
|
|
|
extern int zfs_unshareall_nfs(zfs_handle_t *);
|
|
|
|
extern int zfs_unshareall_smb(zfs_handle_t *);
|
|
|
|
extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
|
2016-11-29 22:22:38 +03:00
|
|
|
extern int zfs_unshareall_bytype(zfs_handle_t *, const char *, const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
extern int zfs_unshareall(zfs_handle_t *);
|
2009-07-03 02:44:48 +04:00
|
|
|
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
|
2008-11-20 23:01:55 +03:00
|
|
|
void *, void *, int, zfs_share_op_t);
|
2020-07-13 19:19:18 +03:00
|
|
|
extern void zfs_commit_nfs_shares(void);
|
|
|
|
extern void zfs_commit_smb_shares(void);
|
|
|
|
extern void zfs_commit_all_shares(void);
|
|
|
|
extern void zfs_commit_shares(const char *);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
|
|
|
extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
|
|
|
|
|
2010-12-17 02:47:40 +03:00
|
|
|
/*
|
|
|
|
* Utility functions to run an external process.
|
|
|
|
*/
|
2011-03-07 21:10:20 +03:00
|
|
|
#define STDOUT_VERBOSE 0x01
|
|
|
|
#define STDERR_VERBOSE 0x02
|
2017-04-21 19:27:04 +03:00
|
|
|
#define NO_DEFAULT_PATH 0x04 /* Don't use $PATH to lookup the command */
|
2011-03-07 21:10:20 +03:00
|
|
|
|
2020-01-09 04:50:05 +03:00
|
|
|
int libzfs_run_process(const char *, char **, int);
|
|
|
|
int libzfs_run_process_get_stdout(const char *, char *[], char *[],
|
|
|
|
char **[], int *);
|
|
|
|
int libzfs_run_process_get_stdout_nopath(const char *, char *[], char *[],
|
|
|
|
char **[], int *);
|
2017-04-21 19:27:04 +03:00
|
|
|
|
2020-01-09 04:50:05 +03:00
|
|
|
void libzfs_free_str_array(char **, int);
|
2017-04-21 19:27:04 +03:00
|
|
|
|
2020-01-09 04:50:05 +03:00
|
|
|
int libzfs_envvar_is_set(char *);
|
2010-12-17 02:47:40 +03:00
|
|
|
|
2019-04-10 10:43:28 +03:00
|
|
|
/*
|
|
|
|
* Utility functions for zfs version
|
|
|
|
*/
|
|
|
|
extern void zfs_version_userland(char *, int);
|
|
|
|
extern int zfs_version_kernel(char *, int);
|
|
|
|
extern int zfs_version_print(void);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Given a device or file, determine if it is part of a pool.
|
|
|
|
*/
|
|
|
|
extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
|
|
|
|
boolean_t *);
|
|
|
|
|
|
|
|
/*
|
2010-05-29 00:45:14 +04:00
|
|
|
* Label manipulation.
|
2008-11-20 23:01:55 +03:00
|
|
|
*/
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int zpool_clear_label(int);
|
2020-09-16 01:42:27 +03:00
|
|
|
extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *);
|
|
|
|
extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **);
|
2008-11-20 23:01:55 +03:00
|
|
|
|
2009-07-03 02:44:48 +04:00
|
|
|
/*
|
|
|
|
* Management interfaces for SMB ACL files
|
|
|
|
*/
|
|
|
|
|
|
|
|
int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *);
|
|
|
|
int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *);
|
|
|
|
int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *);
|
|
|
|
int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
/*
|
|
|
|
* Enable and disable datasets within a pool by mounting/unmounting and
|
|
|
|
* sharing/unsharing them.
|
|
|
|
*/
|
|
|
|
extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
|
|
|
|
extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
|
|
|
|
|
2021-02-18 08:30:45 +03:00
|
|
|
/*
|
|
|
|
* Parse a features file for -o compatibility
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
ZPOOL_COMPATIBILITY_OK,
|
2021-04-12 19:08:56 +03:00
|
|
|
ZPOOL_COMPATIBILITY_WARNTOKEN,
|
|
|
|
ZPOOL_COMPATIBILITY_BADTOKEN,
|
2021-02-18 08:30:45 +03:00
|
|
|
ZPOOL_COMPATIBILITY_BADFILE,
|
|
|
|
ZPOOL_COMPATIBILITY_NOFILES
|
|
|
|
} zpool_compat_status_t;
|
|
|
|
|
|
|
|
extern zpool_compat_status_t zpool_load_compat(const char *,
|
2021-04-12 19:08:56 +03:00
|
|
|
boolean_t *, char *, size_t);
|
2021-02-18 08:30:45 +03:00
|
|
|
|
2020-07-06 21:57:24 +03:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attach/detach the given filesystem to/from the given jail.
|
|
|
|
*/
|
|
|
|
extern int zfs_jail(zfs_handle_t *zhp, int jailid, int attach);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set loader options for next boot.
|
|
|
|
*/
|
|
|
|
extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *);
|
|
|
|
|
|
|
|
#endif /* __FreeBSD__ */
|
|
|
|
|
2008-11-20 23:01:55 +03:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* _LIBZFS_H */
|