2010-05-29 00:45:14 +04:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
2022-07-12 00:16:13 +03:00
|
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
2010-05-29 00:45:14 +04:00
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
2016-07-22 18:52:49 +03:00
|
|
|
* Copyright (c) 2016 by Delphix. All rights reserved.
|
2010-05-29 00:45:14 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _SYS_DDT_H
|
|
|
|
#define _SYS_DDT_H
|
|
|
|
|
|
|
|
#include <sys/sysmacros.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/fs/zfs.h>
|
|
|
|
#include <sys/zio.h>
|
|
|
|
#include <sys/dmu.h>
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2016-07-22 18:52:49 +03:00
|
|
|
struct abd;
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
/*
|
|
|
|
* On-disk DDT formats, in the desired search order (newest version first).
|
|
|
|
*/
|
|
|
|
enum ddt_type {
|
|
|
|
DDT_TYPE_ZAP = 0,
|
|
|
|
DDT_TYPES
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* DDT classes, in the desired search order (highest replication level first).
|
|
|
|
*/
|
|
|
|
enum ddt_class {
|
|
|
|
DDT_CLASS_DITTO = 0,
|
|
|
|
DDT_CLASS_DUPLICATE,
|
|
|
|
DDT_CLASS_UNIQUE,
|
|
|
|
DDT_CLASSES
|
|
|
|
};
|
|
|
|
|
|
|
|
#define DDT_TYPE_CURRENT 0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* On-disk ddt entry: key (name) and physical storage (value).
|
|
|
|
*/
|
|
|
|
typedef struct ddt_key {
|
|
|
|
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
|
2013-06-11 21:12:34 +04:00
|
|
|
/*
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
* Encoded with logical & physical size, encryption, and compression,
|
|
|
|
* as follows:
|
2013-06-11 21:12:34 +04:00
|
|
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
* | 0 | 0 | 0 |X| comp| PSIZE | LSIZE |
|
2013-06-11 21:12:34 +04:00
|
|
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
|
|
|
*/
|
|
|
|
uint64_t ddk_prop;
|
2010-05-29 00:45:14 +04:00
|
|
|
} ddt_key_t;
|
|
|
|
|
|
|
|
#define DDK_GET_LSIZE(ddk) \
|
|
|
|
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
|
|
|
|
#define DDK_SET_LSIZE(ddk, x) \
|
|
|
|
BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
|
|
|
|
|
|
|
#define DDK_GET_PSIZE(ddk) \
|
|
|
|
BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
|
|
|
|
#define DDK_SET_PSIZE(ddk, x) \
|
|
|
|
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
|
|
|
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 20:36:48 +03:00
|
|
|
#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7)
|
|
|
|
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x)
|
|
|
|
|
|
|
|
#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
|
|
|
|
#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
typedef struct ddt_phys {
|
|
|
|
dva_t ddp_dva[SPA_DVAS_PER_BP];
|
|
|
|
uint64_t ddp_refcnt;
|
|
|
|
uint64_t ddp_phys_birth;
|
|
|
|
} ddt_phys_t;
|
|
|
|
|
Remove dedupditto functionality
If dedup is in use, the `dedupditto` property can be set, causing ZFS to
keep an extra copy of data that is referenced many times (>100x). The
idea was that this data is more important than other data and thus we
want to be really sure that it is not lost if the disk experiences a
small amount of random corruption.
ZFS (and system administrators) rely on the pool-level redundancy to
protect their data (e.g. mirroring or RAIDZ). Since the user/sysadmin
doesn't have control over what data will be offered extra redundancy by
dedupditto, this extra redundancy is not very useful. The bulk of the
data is still vulnerable to loss based on the pool-level redundancy.
For example, if particle strikes corrupt 0.1% of blocks, you will either
be saved by mirror/raidz, or you will be sad. This is true even if
dedupditto saved another 0.01% of blocks from being corrupted.
Therefore, the dedupditto functionality is rarely enabled (i.e. the
property is rarely set), and it fulfills its promise of increased
redundancy even more rarely.
Additionally, this feature does not work as advertised (on existing
releases), because scrub/resilver did not repair the extra (dedupditto)
copy (see https://github.com/zfsonlinux/zfs/pull/8270).
In summary, this seldom-used feature doesn't work, and even if it did it
wouldn't provide useful data protection. It has a non-trivial
maintenance burden (again see https://github.com/zfsonlinux/zfs/pull/8270).
We should remove the dedupditto functionality. For backwards
compatibility with the existing CLI, "zpool set dedupditto" will still
"succeed" (exit code zero), but won't have any effect. For backwards
compatibility with existing pools that had dedupditto enabled at some
point, the code will still be able to understand dedupditto blocks and
free them when appropriate. However, ZFS won't write any new dedupditto
blocks.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Igor Kozhukhov <igor@dilos.org>
Reviewed-by: Alek Pinchuk <apinchuk@datto.com>
Issue #8270
Closes #8310
2019-06-20 00:54:02 +03:00
|
|
|
/*
|
|
|
|
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
|
|
|
|
* we maintain the ability to free existing dedup-ditto blocks.
|
|
|
|
*/
|
2010-05-29 00:45:14 +04:00
|
|
|
enum ddt_phys_type {
|
|
|
|
DDT_PHYS_DITTO = 0,
|
|
|
|
DDT_PHYS_SINGLE = 1,
|
|
|
|
DDT_PHYS_DOUBLE = 2,
|
|
|
|
DDT_PHYS_TRIPLE = 3,
|
|
|
|
DDT_PHYS_TYPES
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In-core ddt entry
|
|
|
|
*/
|
|
|
|
struct ddt_entry {
|
2023-06-09 03:14:42 +03:00
|
|
|
/* key must be first for ddt_key_compare */
|
2010-05-29 00:45:14 +04:00
|
|
|
ddt_key_t dde_key;
|
|
|
|
ddt_phys_t dde_phys[DDT_PHYS_TYPES];
|
|
|
|
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
|
2016-07-22 18:52:49 +03:00
|
|
|
struct abd *dde_repair_abd;
|
2010-05-29 00:45:14 +04:00
|
|
|
enum ddt_type dde_type;
|
|
|
|
enum ddt_class dde_class;
|
|
|
|
uint8_t dde_loading;
|
|
|
|
uint8_t dde_loaded;
|
|
|
|
kcondvar_t dde_cv;
|
|
|
|
avl_node_t dde_node;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In-core ddt
|
|
|
|
*/
|
|
|
|
struct ddt {
|
|
|
|
kmutex_t ddt_lock;
|
|
|
|
avl_tree_t ddt_tree;
|
|
|
|
avl_tree_t ddt_repair_tree;
|
|
|
|
enum zio_checksum ddt_checksum;
|
|
|
|
spa_t *ddt_spa;
|
|
|
|
objset_t *ddt_os;
|
|
|
|
uint64_t ddt_stat_object;
|
|
|
|
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
|
|
|
|
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
|
|
|
|
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
|
|
|
|
ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
|
|
|
|
avl_node_t ddt_node;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In-core and on-disk bookmark for DDT walks
|
|
|
|
*/
|
|
|
|
typedef struct ddt_bookmark {
|
|
|
|
uint64_t ddb_class;
|
|
|
|
uint64_t ddb_type;
|
|
|
|
uint64_t ddb_checksum;
|
|
|
|
uint64_t ddb_cursor;
|
|
|
|
} ddt_bookmark_t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ops vector to access a specific DDT object type.
|
|
|
|
*/
|
|
|
|
typedef struct ddt_ops {
|
|
|
|
char ddt_op_name[32];
|
|
|
|
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
|
|
|
|
boolean_t prehash);
|
|
|
|
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
|
|
|
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
|
|
|
|
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
|
|
|
|
ddt_entry_t *dde);
|
|
|
|
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
|
|
|
|
dmu_tx_t *tx);
|
|
|
|
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
|
|
|
|
dmu_tx_t *tx);
|
|
|
|
int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
|
|
|
|
uint64_t *walk);
|
2012-10-26 21:01:49 +04:00
|
|
|
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
|
2010-05-29 00:45:14 +04:00
|
|
|
} ddt_ops_t;
|
|
|
|
|
2020-08-24 20:32:59 +03:00
|
|
|
#define DDT_NAMELEN 107
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
|
2020-06-06 22:54:04 +03:00
|
|
|
enum ddt_class clazz, char *name);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
|
2020-06-06 22:54:04 +03:00
|
|
|
enum ddt_class clazz, uint64_t *walk, ddt_entry_t *dde);
|
2012-10-26 21:01:49 +04:00
|
|
|
extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
|
2020-06-06 22:54:04 +03:00
|
|
|
enum ddt_class clazz, uint64_t *count);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
|
2020-06-06 22:54:04 +03:00
|
|
|
enum ddt_class clazz, dmu_object_info_t *);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
|
2020-06-06 22:54:04 +03:00
|
|
|
enum ddt_class clazz);
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
|
|
|
|
uint64_t txg);
|
|
|
|
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
|
|
|
|
const ddt_phys_t *ddp, blkptr_t *bp);
|
|
|
|
|
|
|
|
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
|
|
|
|
|
|
|
|
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
|
|
|
|
extern void ddt_phys_clear(ddt_phys_t *ddp);
|
|
|
|
extern void ddt_phys_addref(ddt_phys_t *ddp);
|
|
|
|
extern void ddt_phys_decref(ddt_phys_t *ddp);
|
|
|
|
extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
|
|
|
|
uint64_t txg);
|
|
|
|
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
|
|
|
|
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
|
|
|
|
|
2023-05-16 06:30:26 +03:00
|
|
|
extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
|
|
|
|
|
|
|
|
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
|
|
|
|
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
|
|
|
|
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
|
|
|
|
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
|
|
|
|
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
|
|
|
|
extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
|
|
|
|
|
|
|
|
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
|
|
|
|
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
|
|
|
|
|
|
|
|
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
|
|
|
|
extern void ddt_enter(ddt_t *ddt);
|
|
|
|
extern void ddt_exit(ddt_t *ddt);
|
2013-11-20 01:34:46 +04:00
|
|
|
extern void ddt_init(void);
|
|
|
|
extern void ddt_fini(void);
|
2010-05-29 00:45:14 +04:00
|
|
|
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
|
|
|
|
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
|
|
|
|
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
|
|
|
|
|
|
|
|
extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
|
|
|
|
const blkptr_t *bp);
|
|
|
|
|
|
|
|
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
|
|
|
|
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
|
|
|
|
|
2023-06-09 03:14:42 +03:00
|
|
|
extern int ddt_key_compare(const void *x1, const void *x2);
|
2010-05-29 00:45:14 +04:00
|
|
|
|
|
|
|
extern void ddt_create(spa_t *spa);
|
|
|
|
extern int ddt_load(spa_t *spa);
|
|
|
|
extern void ddt_unload(spa_t *spa);
|
|
|
|
extern void ddt_sync(spa_t *spa, uint64_t txg);
|
|
|
|
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
|
|
|
|
extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
|
2020-06-06 22:54:04 +03:00
|
|
|
enum ddt_class clazz, ddt_entry_t *dde, dmu_tx_t *tx);
|
2010-05-29 00:45:14 +04:00
|
|
|
|
2023-03-10 22:59:53 +03:00
|
|
|
extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
|
|
|
|
|
2010-05-29 00:45:14 +04:00
|
|
|
extern const ddt_ops_t ddt_zap_ops;
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* _SYS_DDT_H */
|