mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
Remove dedupditto functionality
If dedup is in use, the `dedupditto` property can be set, causing ZFS to keep an extra copy of data that is referenced many times (>100x). The idea was that this data is more important than other data and thus we want to be really sure that it is not lost if the disk experiences a small amount of random corruption. ZFS (and system administrators) rely on the pool-level redundancy to protect their data (e.g. mirroring or RAIDZ). Since the user/sysadmin doesn't have control over what data will be offered extra redundancy by dedupditto, this extra redundancy is not very useful. The bulk of the data is still vulnerable to loss based on the pool-level redundancy. For example, if particle strikes corrupt 0.1% of blocks, you will either be saved by mirror/raidz, or you will be sad. This is true even if dedupditto saved another 0.01% of blocks from being corrupted. Therefore, the dedupditto functionality is rarely enabled (i.e. the property is rarely set), and it fulfills its promise of increased redundancy even more rarely. Additionally, this feature does not work as advertised (on existing releases), because scrub/resilver did not repair the extra (dedupditto) copy (see https://github.com/zfsonlinux/zfs/pull/8270). In summary, this seldom-used feature doesn't work, and even if it did it wouldn't provide useful data protection. It has a non-trivial maintenance burden (again see https://github.com/zfsonlinux/zfs/pull/8270). We should remove the dedupditto functionality. For backwards compatibility with the existing CLI, "zpool set dedupditto" will still "succeed" (exit code zero), but won't have any effect. For backwards compatibility with existing pools that had dedupditto enabled at some point, the code will still be able to understand dedupditto blocks and free them when appropriate. However, ZFS won't write any new dedupditto blocks. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Igor Kozhukhov <igor@dilos.org> Reviewed-by: Alek Pinchuk <apinchuk@datto.com> Issue #8270 Closes #8310
This commit is contained in:
parent
fb0be12d7b
commit
050d720c43
@ -359,7 +359,6 @@ ztest_func_t ztest_dsl_prop_get_set;
|
|||||||
ztest_func_t ztest_spa_prop_get_set;
|
ztest_func_t ztest_spa_prop_get_set;
|
||||||
ztest_func_t ztest_spa_create_destroy;
|
ztest_func_t ztest_spa_create_destroy;
|
||||||
ztest_func_t ztest_fault_inject;
|
ztest_func_t ztest_fault_inject;
|
||||||
ztest_func_t ztest_ddt_repair;
|
|
||||||
ztest_func_t ztest_dmu_snapshot_hold;
|
ztest_func_t ztest_dmu_snapshot_hold;
|
||||||
ztest_func_t ztest_mmp_enable_disable;
|
ztest_func_t ztest_mmp_enable_disable;
|
||||||
ztest_func_t ztest_scrub;
|
ztest_func_t ztest_scrub;
|
||||||
@ -414,7 +413,6 @@ ztest_info_t ztest_info[] = {
|
|||||||
ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
|
ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
|
||||||
ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
|
ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
|
||||||
ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
|
ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
|
||||||
ZTI_INIT(ztest_ddt_repair, 1, &zopt_sometimes),
|
|
||||||
ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
|
ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
|
||||||
ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
|
ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
|
||||||
ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
|
ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
|
||||||
@ -1523,31 +1521,6 @@ ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
|
|||||||
return (txg);
|
return (txg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
|
|
||||||
{
|
|
||||||
uint64_t *ip = buf;
|
|
||||||
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
|
|
||||||
|
|
||||||
while (ip < ip_end)
|
|
||||||
*ip++ = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
static boolean_t
|
|
||||||
ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
|
|
||||||
{
|
|
||||||
uint64_t *ip = buf;
|
|
||||||
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
|
|
||||||
uint64_t diff = 0;
|
|
||||||
|
|
||||||
while (ip < ip_end)
|
|
||||||
diff |= (value - *ip++);
|
|
||||||
|
|
||||||
return (diff == 0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
|
ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
|
||||||
uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
|
uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
|
||||||
@ -5574,9 +5547,6 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
|
|||||||
|
|
||||||
(void) pthread_rwlock_rdlock(&ztest_name_lock);
|
(void) pthread_rwlock_rdlock(&ztest_name_lock);
|
||||||
|
|
||||||
(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
|
|
||||||
ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
|
|
||||||
|
|
||||||
(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));
|
(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));
|
||||||
|
|
||||||
VERIFY0(spa_prop_get(ztest_spa, &props));
|
VERIFY0(spa_prop_get(ztest_spa, &props));
|
||||||
@ -6017,136 +5987,6 @@ out:
|
|||||||
umem_free(pathrand, MAXPATHLEN);
|
umem_free(pathrand, MAXPATHLEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Verify that DDT repair works as expected.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
|
||||||
{
|
|
||||||
ztest_shared_t *zs = ztest_shared;
|
|
||||||
spa_t *spa = ztest_spa;
|
|
||||||
objset_t *os = zd->zd_os;
|
|
||||||
ztest_od_t *od;
|
|
||||||
uint64_t object, blocksize, txg, pattern;
|
|
||||||
enum zio_checksum checksum = spa_dedup_checksum(spa);
|
|
||||||
dmu_buf_t *db;
|
|
||||||
dmu_tx_t *tx;
|
|
||||||
|
|
||||||
od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
|
|
||||||
ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
|
|
||||||
|
|
||||||
if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
|
|
||||||
umem_free(od, sizeof (ztest_od_t));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Take the name lock as writer to prevent anyone else from changing
|
|
||||||
* the pool and dataset properties we need to maintain during this test.
|
|
||||||
*/
|
|
||||||
(void) pthread_rwlock_wrlock(&ztest_name_lock);
|
|
||||||
|
|
||||||
if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
|
|
||||||
B_FALSE) != 0 ||
|
|
||||||
ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
|
|
||||||
B_FALSE) != 0) {
|
|
||||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
|
||||||
umem_free(od, sizeof (ztest_od_t));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
dmu_objset_stats_t dds;
|
|
||||||
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
|
|
||||||
dmu_objset_fast_stat(os, &dds);
|
|
||||||
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
|
|
||||||
|
|
||||||
object = od[0].od_object;
|
|
||||||
blocksize = od[0].od_blocksize;
|
|
||||||
pattern = zs->zs_guid ^ dds.dds_guid;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The numbers of copies written must always be greater than or
|
|
||||||
* equal to the threshold set by the dedupditto property. This
|
|
||||||
* is initialized in ztest_run() and then randomly changed by
|
|
||||||
* ztest_spa_prop_get_set(), these function will never set it
|
|
||||||
* larger than 2 * ZIO_DEDUPDITTO_MIN.
|
|
||||||
*/
|
|
||||||
int copies = 2 * ZIO_DEDUPDITTO_MIN;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The block size is limited by DMU_MAX_ACCESS (64MB) which
|
|
||||||
* caps the maximum transaction size. A block size of up to
|
|
||||||
* SPA_OLD_MAXBLOCKSIZE is allowed which results in a maximum
|
|
||||||
* transaction size of: 128K * 200 (copies) = ~25MB
|
|
||||||
*
|
|
||||||
* The actual block size is checked here, rather than requested
|
|
||||||
* above, because the way ztest_od_init() is implemented it does
|
|
||||||
* not guarantee the block size requested will be used.
|
|
||||||
*/
|
|
||||||
if (blocksize > SPA_OLD_MAXBLOCKSIZE) {
|
|
||||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
|
||||||
umem_free(od, sizeof (ztest_od_t));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(object != 0);
|
|
||||||
|
|
||||||
tx = dmu_tx_create(os);
|
|
||||||
dmu_tx_hold_write(tx, object, 0, copies * blocksize);
|
|
||||||
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
|
|
||||||
if (txg == 0) {
|
|
||||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
|
||||||
umem_free(od, sizeof (ztest_od_t));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Write all the copies of our block.
|
|
||||||
*/
|
|
||||||
for (int i = 0; i < copies; i++) {
|
|
||||||
uint64_t offset = i * blocksize;
|
|
||||||
int error = dmu_buf_hold(os, object, offset, FTAG, &db,
|
|
||||||
DMU_READ_NO_PREFETCH);
|
|
||||||
if (error != 0) {
|
|
||||||
fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
|
|
||||||
os, (long long)object, (long long) offset, error);
|
|
||||||
}
|
|
||||||
ASSERT(db->db_offset == offset);
|
|
||||||
ASSERT(db->db_size == blocksize);
|
|
||||||
ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
|
|
||||||
ztest_pattern_match(db->db_data, db->db_size, 0ULL));
|
|
||||||
dmu_buf_will_fill(db, tx);
|
|
||||||
ztest_pattern_set(db->db_data, db->db_size, pattern);
|
|
||||||
dmu_buf_rele(db, FTAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
|
||||||
txg_wait_synced(spa_get_dsl(spa), txg);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find out what block we got.
|
|
||||||
*/
|
|
||||||
VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
|
|
||||||
blkptr_t blk = *((dmu_buf_impl_t *)db)->db_blkptr;
|
|
||||||
dmu_buf_rele(db, FTAG);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Damage the block. Dedup-ditto will save us when we read it later.
|
|
||||||
*/
|
|
||||||
uint64_t psize = BP_GET_PSIZE(&blk);
|
|
||||||
abd_t *abd = abd_alloc_linear(psize, B_TRUE);
|
|
||||||
ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
|
|
||||||
|
|
||||||
(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
|
|
||||||
abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
|
|
||||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
|
|
||||||
|
|
||||||
abd_free(abd);
|
|
||||||
|
|
||||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
|
||||||
umem_free(od, sizeof (ztest_od_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By design ztest will never inject uncorrectable damage in to the pool.
|
* By design ztest will never inject uncorrectable damage in to the pool.
|
||||||
* Issue a scrub, wait for it to complete, and verify there is never any
|
* Issue a scrub, wait for it to complete, and verify there is never any
|
||||||
@ -7103,8 +6943,6 @@ ztest_run(ztest_shared_t *zs)
|
|||||||
zs->zs_guid = dds.dds_guid;
|
zs->zs_guid = dds.dds_guid;
|
||||||
dmu_objset_disown(os, B_TRUE, FTAG);
|
dmu_objset_disown(os, B_TRUE, FTAG);
|
||||||
|
|
||||||
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a thread to periodically resume suspended I/O.
|
* Create a thread to periodically resume suspended I/O.
|
||||||
*/
|
*/
|
||||||
|
@ -103,6 +103,10 @@ typedef struct ddt_phys {
|
|||||||
uint64_t ddp_phys_birth;
|
uint64_t ddp_phys_birth;
|
||||||
} ddt_phys_t;
|
} ddt_phys_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
|
||||||
|
* we maintain the ability to free existing dedup-ditto blocks.
|
||||||
|
*/
|
||||||
enum ddt_phys_type {
|
enum ddt_phys_type {
|
||||||
DDT_PHYS_DITTO = 0,
|
DDT_PHYS_DITTO = 0,
|
||||||
DDT_PHYS_SINGLE = 1,
|
DDT_PHYS_SINGLE = 1,
|
||||||
@ -216,10 +220,6 @@ extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
|
|||||||
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
|
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
|
||||||
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
|
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
|
||||||
|
|
||||||
extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
|
|
||||||
ddt_phys_t *ddp_willref);
|
|
||||||
extern int ddt_ditto_copies_present(ddt_entry_t *dde);
|
|
||||||
|
|
||||||
extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
|
extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
|
||||||
extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
|
extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
|
||||||
|
|
||||||
|
@ -350,7 +350,6 @@ struct spa {
|
|||||||
ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
|
ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
|
||||||
uint64_t spa_ddt_stat_object; /* DDT statistics */
|
uint64_t spa_ddt_stat_object; /* DDT statistics */
|
||||||
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
|
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
|
||||||
uint64_t spa_dedup_ditto; /* dedup ditto threshold */
|
|
||||||
uint64_t spa_dedup_checksum; /* default dedup checksum */
|
uint64_t spa_dedup_checksum; /* default dedup checksum */
|
||||||
uint64_t spa_dspace; /* dspace in normal class */
|
uint64_t spa_dspace; /* dspace in normal class */
|
||||||
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
|
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
|
||||||
|
@ -102,7 +102,6 @@ enum zio_checksum {
|
|||||||
#define ZIO_CHECKSUM_VERIFY (1 << 8)
|
#define ZIO_CHECKSUM_VERIFY (1 << 8)
|
||||||
|
|
||||||
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
|
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
|
||||||
#define ZIO_DEDUPDITTO_MIN 100
|
|
||||||
|
|
||||||
/* supported encryption algorithms */
|
/* supported encryption algorithms */
|
||||||
enum zio_encrypt {
|
enum zio_encrypt {
|
||||||
|
@ -722,15 +722,8 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ZPOOL_PROP_DEDUPDITTO:
|
case ZPOOL_PROP_DEDUPDITTO:
|
||||||
if (intval < ZIO_DEDUPDITTO_MIN && intval != 0) {
|
printf("Note: property '%s' no longer has "
|
||||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
"any effect\n", propname);
|
||||||
"property '%s' value %d is invalid; only "
|
|
||||||
"values of 0 or >= %" PRId32 " are allowed "
|
|
||||||
"for this property."),
|
|
||||||
propname, intval, ZIO_DEDUPDITTO_MIN);
|
|
||||||
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -806,17 +806,7 @@ such that it is available even if the pool becomes faulted.
|
|||||||
An administrator can provide additional information about a pool using this
|
An administrator can provide additional information about a pool using this
|
||||||
property.
|
property.
|
||||||
.It Sy dedupditto Ns = Ns Ar number
|
.It Sy dedupditto Ns = Ns Ar number
|
||||||
This property is deprecated. In a future release, it will no longer have any
|
This property is deprecated and no longer has any effect.
|
||||||
effect.
|
|
||||||
.Pp
|
|
||||||
Threshold for the number of block ditto copies.
|
|
||||||
If the reference count for a deduplicated block increases above this number, a
|
|
||||||
new ditto copy of this block is automatically stored.
|
|
||||||
The default setting is
|
|
||||||
.Sy 0
|
|
||||||
which causes no ditto copies to be created for deduplicated blocks.
|
|
||||||
The minimum legal nonzero setting is
|
|
||||||
.Sy 100 .
|
|
||||||
.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off
|
.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off
|
||||||
Controls whether a non-privileged user is granted access based on the dataset
|
Controls whether a non-privileged user is granted access based on the dataset
|
||||||
permissions defined on the dataset.
|
permissions defined on the dataset.
|
||||||
|
@ -104,8 +104,6 @@ zpool_prop_init(void)
|
|||||||
/* default number properties */
|
/* default number properties */
|
||||||
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
|
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
|
||||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
|
PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
|
||||||
zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
|
|
||||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
|
|
||||||
zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT,
|
zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT,
|
||||||
ZFS_TYPE_POOL, "<ashift, 9-16, or 0=default>", "ASHIFT");
|
ZFS_TYPE_POOL, "<ashift, 9-16, or 0=default>", "ASHIFT");
|
||||||
|
|
||||||
@ -143,6 +141,8 @@ zpool_prop_init(void)
|
|||||||
PROP_ONETIME, ZFS_TYPE_POOL, "TNAME");
|
PROP_ONETIME, ZFS_TYPE_POOL, "TNAME");
|
||||||
zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize",
|
zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize",
|
||||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE");
|
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE");
|
||||||
|
zprop_register_hidden(ZPOOL_PROP_DEDUPDITTO, "dedupditto",
|
||||||
|
PROP_TYPE_NUMBER, PROP_DEFAULT, ZFS_TYPE_POOL, "DEDUPDITTO");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -552,65 +552,6 @@ ddt_get_pool_dedup_ratio(spa_t *spa)
|
|||||||
return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
|
return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
|
|
||||||
{
|
|
||||||
spa_t *spa = ddt->ddt_spa;
|
|
||||||
uint64_t total_refcnt = 0;
|
|
||||||
uint64_t ditto = spa->spa_dedup_ditto;
|
|
||||||
int total_copies = 0;
|
|
||||||
int desired_copies = 0;
|
|
||||||
int copies_needed = 0;
|
|
||||||
|
|
||||||
for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
|
|
||||||
ddt_phys_t *ddp = &dde->dde_phys[p];
|
|
||||||
zio_t *zio = dde->dde_lead_zio[p];
|
|
||||||
uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */
|
|
||||||
if (zio != NULL)
|
|
||||||
refcnt += zio->io_parent_count; /* pending refs */
|
|
||||||
if (ddp == ddp_willref)
|
|
||||||
refcnt++; /* caller's ref */
|
|
||||||
if (refcnt != 0) {
|
|
||||||
total_refcnt += refcnt;
|
|
||||||
total_copies += p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ditto == 0 || ditto > UINT32_MAX)
|
|
||||||
ditto = UINT32_MAX;
|
|
||||||
|
|
||||||
if (total_refcnt >= 1)
|
|
||||||
desired_copies++;
|
|
||||||
if (total_refcnt >= ditto)
|
|
||||||
desired_copies++;
|
|
||||||
if (total_refcnt >= ditto * ditto)
|
|
||||||
desired_copies++;
|
|
||||||
|
|
||||||
copies_needed = MAX(desired_copies, total_copies) - total_copies;
|
|
||||||
|
|
||||||
/* encrypted blocks store their IV in DVA[2] */
|
|
||||||
if (DDK_GET_CRYPT(&dde->dde_key))
|
|
||||||
copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1);
|
|
||||||
|
|
||||||
return (copies_needed);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
ddt_ditto_copies_present(ddt_entry_t *dde)
|
|
||||||
{
|
|
||||||
ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO];
|
|
||||||
dva_t *dva = ddp->ddp_dva;
|
|
||||||
int copies = 0 - DVA_GET_GANG(dva);
|
|
||||||
|
|
||||||
for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++)
|
|
||||||
if (DVA_IS_VALID(dva))
|
|
||||||
copies++;
|
|
||||||
|
|
||||||
ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP);
|
|
||||||
|
|
||||||
return (copies);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
|
ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
|
||||||
{
|
{
|
||||||
@ -1088,7 +1029,10 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (p == DDT_PHYS_DITTO) {
|
if (p == DDT_PHYS_DITTO) {
|
||||||
if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0)
|
/*
|
||||||
|
* Note, we no longer create DDT-DITTO blocks, but we
|
||||||
|
* don't want to leak any written by older software.
|
||||||
|
*/
|
||||||
ddt_phys_free(ddt, ddk, ddp, txg);
|
ddt_phys_free(ddt, ddk, ddp, txg);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1097,9 +1041,9 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
|
|||||||
total_refcnt += ddp->ddp_refcnt;
|
total_refcnt += ddp->ddp_refcnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0)
|
/* We do not create new DDT-DITTO blocks. */
|
||||||
nclass = DDT_CLASS_DITTO;
|
ASSERT0(dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth);
|
||||||
else if (total_refcnt > 1)
|
if (total_refcnt > 1)
|
||||||
nclass = DDT_CLASS_DUPLICATE;
|
nclass = DDT_CLASS_DUPLICATE;
|
||||||
else
|
else
|
||||||
nclass = DDT_CLASS_UNIQUE;
|
nclass = DDT_CLASS_UNIQUE;
|
||||||
|
@ -694,16 +694,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
|||||||
error = SET_ERROR(E2BIG);
|
error = SET_ERROR(E2BIG);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ZPOOL_PROP_DEDUPDITTO:
|
|
||||||
if (spa_version(spa) < SPA_VERSION_DEDUP)
|
|
||||||
error = SET_ERROR(ENOTSUP);
|
|
||||||
else
|
|
||||||
error = nvpair_value_uint64(elem, &intval);
|
|
||||||
if (error == 0 &&
|
|
||||||
intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
|
|
||||||
error = SET_ERROR(EINVAL);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -712,6 +702,9 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(void) nvlist_remove_all(props,
|
||||||
|
zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO));
|
||||||
|
|
||||||
if (!error && reset_bootfs) {
|
if (!error && reset_bootfs) {
|
||||||
error = nvlist_remove(props,
|
error = nvlist_remove(props,
|
||||||
zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
|
zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
|
||||||
@ -3604,8 +3597,6 @@ spa_ld_get_props(spa_t *spa)
|
|||||||
spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
|
spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
|
||||||
spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
|
spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
|
||||||
spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
|
spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
|
||||||
spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
|
|
||||||
&spa->spa_dedup_ditto);
|
|
||||||
spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim);
|
spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim);
|
||||||
spa->spa_autoreplace = (autoreplace != 0);
|
spa->spa_autoreplace = (autoreplace != 0);
|
||||||
}
|
}
|
||||||
@ -7983,9 +7974,6 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
|
|||||||
case ZPOOL_PROP_MULTIHOST:
|
case ZPOOL_PROP_MULTIHOST:
|
||||||
spa->spa_multihost = intval;
|
spa->spa_multihost = intval;
|
||||||
break;
|
break;
|
||||||
case ZPOOL_PROP_DEDUPDITTO:
|
|
||||||
spa->spa_dedup_ditto = intval;
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -3118,35 +3118,6 @@ zio_ddt_child_write_done(zio_t *zio)
|
|||||||
ddt_exit(ddt);
|
ddt_exit(ddt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
zio_ddt_ditto_write_done(zio_t *zio)
|
|
||||||
{
|
|
||||||
int p = DDT_PHYS_DITTO;
|
|
||||||
ASSERTV(zio_prop_t *zp = &zio->io_prop);
|
|
||||||
blkptr_t *bp = zio->io_bp;
|
|
||||||
ddt_t *ddt = ddt_select(zio->io_spa, bp);
|
|
||||||
ddt_entry_t *dde = zio->io_private;
|
|
||||||
ddt_phys_t *ddp = &dde->dde_phys[p];
|
|
||||||
ddt_key_t *ddk = &dde->dde_key;
|
|
||||||
|
|
||||||
ddt_enter(ddt);
|
|
||||||
|
|
||||||
ASSERT(ddp->ddp_refcnt == 0);
|
|
||||||
ASSERT(dde->dde_lead_zio[p] == zio);
|
|
||||||
dde->dde_lead_zio[p] = NULL;
|
|
||||||
|
|
||||||
if (zio->io_error == 0) {
|
|
||||||
ASSERT(ZIO_CHECKSUM_EQUAL(bp->blk_cksum, ddk->ddk_cksum));
|
|
||||||
ASSERT(zp->zp_copies < SPA_DVAS_PER_BP);
|
|
||||||
ASSERT(zp->zp_copies == BP_GET_NDVAS(bp) - BP_IS_GANG(bp));
|
|
||||||
if (ddp->ddp_phys_birth != 0)
|
|
||||||
ddt_phys_free(ddt, ddk, ddp, zio->io_txg);
|
|
||||||
ddt_phys_fill(ddp, bp);
|
|
||||||
}
|
|
||||||
|
|
||||||
ddt_exit(ddt);
|
|
||||||
}
|
|
||||||
|
|
||||||
static zio_t *
|
static zio_t *
|
||||||
zio_ddt_write(zio_t *zio)
|
zio_ddt_write(zio_t *zio)
|
||||||
{
|
{
|
||||||
@ -3155,9 +3126,7 @@ zio_ddt_write(zio_t *zio)
|
|||||||
uint64_t txg = zio->io_txg;
|
uint64_t txg = zio->io_txg;
|
||||||
zio_prop_t *zp = &zio->io_prop;
|
zio_prop_t *zp = &zio->io_prop;
|
||||||
int p = zp->zp_copies;
|
int p = zp->zp_copies;
|
||||||
int ditto_copies;
|
|
||||||
zio_t *cio = NULL;
|
zio_t *cio = NULL;
|
||||||
zio_t *dio = NULL;
|
|
||||||
ddt_t *ddt = ddt_select(spa, bp);
|
ddt_t *ddt = ddt_select(spa, bp);
|
||||||
ddt_entry_t *dde;
|
ddt_entry_t *dde;
|
||||||
ddt_phys_t *ddp;
|
ddt_phys_t *ddp;
|
||||||
@ -3192,41 +3161,6 @@ zio_ddt_write(zio_t *zio)
|
|||||||
return (zio);
|
return (zio);
|
||||||
}
|
}
|
||||||
|
|
||||||
ditto_copies = ddt_ditto_copies_needed(ddt, dde, ddp);
|
|
||||||
ASSERT(ditto_copies < SPA_DVAS_PER_BP);
|
|
||||||
|
|
||||||
if (ditto_copies > ddt_ditto_copies_present(dde) &&
|
|
||||||
dde->dde_lead_zio[DDT_PHYS_DITTO] == NULL) {
|
|
||||||
zio_prop_t czp = *zp;
|
|
||||||
|
|
||||||
czp.zp_copies = ditto_copies;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we arrived here with an override bp, we won't have run
|
|
||||||
* the transform stack, so we won't have the data we need to
|
|
||||||
* generate a child i/o. So, toss the override bp and restart.
|
|
||||||
* This is safe, because using the override bp is just an
|
|
||||||
* optimization; and it's rare, so the cost doesn't matter.
|
|
||||||
*/
|
|
||||||
if (zio->io_bp_override) {
|
|
||||||
zio_pop_transforms(zio);
|
|
||||||
zio->io_stage = ZIO_STAGE_OPEN;
|
|
||||||
zio->io_pipeline = ZIO_WRITE_PIPELINE;
|
|
||||||
zio->io_bp_override = NULL;
|
|
||||||
BP_ZERO(bp);
|
|
||||||
ddt_exit(ddt);
|
|
||||||
return (zio);
|
|
||||||
}
|
|
||||||
|
|
||||||
dio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
|
|
||||||
zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL,
|
|
||||||
NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
|
|
||||||
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
|
|
||||||
|
|
||||||
zio_push_transform(dio, zio->io_abd, zio->io_size, 0, NULL);
|
|
||||||
dde->dde_lead_zio[DDT_PHYS_DITTO] = dio;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) {
|
if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) {
|
||||||
if (ddp->ddp_phys_birth != 0)
|
if (ddp->ddp_phys_birth != 0)
|
||||||
ddt_bp_fill(ddp, bp, txg);
|
ddt_bp_fill(ddp, bp, txg);
|
||||||
@ -3254,8 +3188,6 @@ zio_ddt_write(zio_t *zio)
|
|||||||
|
|
||||||
if (cio)
|
if (cio)
|
||||||
zio_nowait(cio);
|
zio_nowait(cio);
|
||||||
if (dio)
|
|
||||||
zio_nowait(dio);
|
|
||||||
|
|
||||||
return (zio);
|
return (zio);
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,6 @@ typeset -a properties=(
|
|||||||
"failmode"
|
"failmode"
|
||||||
"listsnapshots"
|
"listsnapshots"
|
||||||
"autoexpand"
|
"autoexpand"
|
||||||
"dedupditto"
|
|
||||||
"dedupratio"
|
"dedupratio"
|
||||||
"free"
|
"free"
|
||||||
"allocated"
|
"allocated"
|
||||||
|
@ -53,10 +53,10 @@ if [ -e $HOSTID_FILE ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
typeset good_props=('comment=text' 'ashift=12' 'multihost=on'
|
typeset good_props=('comment=text' 'ashift=12' 'multihost=on'
|
||||||
'listsnapshots=on' 'autoexpand=on' 'autoreplace=on' 'dedupditto=1234'
|
'listsnapshots=on' 'autoexpand=on' 'autoreplace=on'
|
||||||
'delegation=off' 'failmode=continue')
|
'delegation=off' 'failmode=continue')
|
||||||
typeset bad_props=("bootfs=$TESTPOOL2/bootfs" 'version=28' 'ashift=4'
|
typeset bad_props=("bootfs=$TESTPOOL2/bootfs" 'version=28' 'ashift=4'
|
||||||
'allocated=1234' 'capacity=5678' 'dedupditto=42' 'multihost=none'
|
'allocated=1234' 'capacity=5678' 'multihost=none'
|
||||||
'feature@async_destroy=disabled' 'feature@xxx_fake_xxx=enabled'
|
'feature@async_destroy=disabled' 'feature@xxx_fake_xxx=enabled'
|
||||||
'propname=propval' 'readonly=on')
|
'propname=propval' 'readonly=on')
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user