mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-17 10:01:01 +03:00
ztest: scrub ddt repair
The ztest_ddt_repair() test is designed inflict damage to the ddt which can be repairable by a scrub. Unfortunately, this repair logic was broken at some point and it went undetected. This issue is not specific to ztest, but thankfully this extra redundancy is rarely enabled and even more rarely needed. The root cause was identified to be the ddt_bp_create() function called by dsl_scan_ddt_entry() which did not set the dedup bit of the generated block pointer. The consequence of this was that the ZIO_DDT_READ_PIPELINE was never enabled for the block pointer during the scrub, and the dedup ditto repair logic was never run. Note that for demand reads which don't rely on ddt_bp_create() the required pipeline stages would be enabled and the repair performed. This was resolved by unconditionally setting the dedup bit in ddt_bp_create(). This way all codes paths which may need to perform a repair from a block pointer generated from the dtt entry will be able too. The only exception is that the dedup bit is cleared in ddt_phys_free() which is required to avoid leaking space. Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: Tom Caputi <tcaputi@datto.com> Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #8270
This commit is contained in:
parent
419ba59145
commit
52b684236d
@ -5993,20 +5993,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
|||||||
spa_t *spa = ztest_spa;
|
spa_t *spa = ztest_spa;
|
||||||
objset_t *os = zd->zd_os;
|
objset_t *os = zd->zd_os;
|
||||||
ztest_od_t *od;
|
ztest_od_t *od;
|
||||||
uint64_t object, blocksize, txg, pattern, psize;
|
uint64_t object, blocksize, txg, pattern;
|
||||||
enum zio_checksum checksum = spa_dedup_checksum(spa);
|
enum zio_checksum checksum = spa_dedup_checksum(spa);
|
||||||
dmu_buf_t *db;
|
dmu_buf_t *db;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
abd_t *abd;
|
|
||||||
blkptr_t blk;
|
|
||||||
int copies = 2 * ZIO_DEDUPDITTO_MIN;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
blocksize = ztest_random_blocksize();
|
|
||||||
blocksize = MIN(blocksize, 2048); /* because we write so many */
|
|
||||||
|
|
||||||
od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
|
od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
|
||||||
ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
|
ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
|
||||||
|
|
||||||
if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
|
if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
|
||||||
umem_free(od, sizeof (ztest_od_t));
|
umem_free(od, sizeof (ztest_od_t));
|
||||||
@ -6015,7 +6008,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Take the name lock as writer to prevent anyone else from changing
|
* Take the name lock as writer to prevent anyone else from changing
|
||||||
* the pool and dataset properies we need to maintain during this test.
|
* the pool and dataset properties we need to maintain during this test.
|
||||||
*/
|
*/
|
||||||
(void) pthread_rwlock_wrlock(&ztest_name_lock);
|
(void) pthread_rwlock_wrlock(&ztest_name_lock);
|
||||||
|
|
||||||
@ -6037,6 +6030,31 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
|||||||
blocksize = od[0].od_blocksize;
|
blocksize = od[0].od_blocksize;
|
||||||
pattern = zs->zs_guid ^ dds.dds_guid;
|
pattern = zs->zs_guid ^ dds.dds_guid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The numbers of copies written must always be greater than or
|
||||||
|
* equal to the threshold set by the dedupditto property. This
|
||||||
|
* is initialized in ztest_run() and then randomly changed by
|
||||||
|
* ztest_spa_prop_get_set(), these function will never set it
|
||||||
|
* larger than 2 * ZIO_DEDUPDITTO_MIN.
|
||||||
|
*/
|
||||||
|
int copies = 2 * ZIO_DEDUPDITTO_MIN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The block size is limited by DMU_MAX_ACCESS (64MB) which
|
||||||
|
* caps the maximum transaction size. A block size of up to
|
||||||
|
* SPA_OLD_MAXBLOCKSIZE is allowed which results in a maximum
|
||||||
|
* transaction size of: 128K * 200 (copies) = ~25MB
|
||||||
|
*
|
||||||
|
* The actual block size is checked here, rather than requested
|
||||||
|
* above, because the way ztest_od_init() is implemented it does
|
||||||
|
* not guarantee the block size requested will be used.
|
||||||
|
*/
|
||||||
|
if (blocksize > SPA_OLD_MAXBLOCKSIZE) {
|
||||||
|
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
||||||
|
umem_free(od, sizeof (ztest_od_t));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ASSERT(object != 0);
|
ASSERT(object != 0);
|
||||||
|
|
||||||
tx = dmu_tx_create(os);
|
tx = dmu_tx_create(os);
|
||||||
@ -6051,7 +6069,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
|||||||
/*
|
/*
|
||||||
* Write all the copies of our block.
|
* Write all the copies of our block.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < copies; i++) {
|
for (int i = 0; i < copies; i++) {
|
||||||
uint64_t offset = i * blocksize;
|
uint64_t offset = i * blocksize;
|
||||||
int error = dmu_buf_hold(os, object, offset, FTAG, &db,
|
int error = dmu_buf_hold(os, object, offset, FTAG, &db,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
@ -6074,16 +6092,15 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
|||||||
/*
|
/*
|
||||||
* Find out what block we got.
|
* Find out what block we got.
|
||||||
*/
|
*/
|
||||||
VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db,
|
VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
|
||||||
DMU_READ_NO_PREFETCH));
|
blkptr_t blk = *((dmu_buf_impl_t *)db)->db_blkptr;
|
||||||
blk = *((dmu_buf_impl_t *)db)->db_blkptr;
|
|
||||||
dmu_buf_rele(db, FTAG);
|
dmu_buf_rele(db, FTAG);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Damage the block. Dedup-ditto will save us when we read it later.
|
* Damage the block. Dedup-ditto will save us when we read it later.
|
||||||
*/
|
*/
|
||||||
psize = BP_GET_PSIZE(&blk);
|
uint64_t psize = BP_GET_PSIZE(&blk);
|
||||||
abd = abd_alloc_linear(psize, B_TRUE);
|
abd_t *abd = abd_alloc_linear(psize, B_TRUE);
|
||||||
ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
|
ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
|
||||||
|
|
||||||
(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
|
(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
|
||||||
|
@ -291,7 +291,7 @@ ddt_bp_create(enum zio_checksum checksum,
|
|||||||
BP_SET_CHECKSUM(bp, checksum);
|
BP_SET_CHECKSUM(bp, checksum);
|
||||||
BP_SET_TYPE(bp, DMU_OT_DEDUP);
|
BP_SET_TYPE(bp, DMU_OT_DEDUP);
|
||||||
BP_SET_LEVEL(bp, 0);
|
BP_SET_LEVEL(bp, 0);
|
||||||
BP_SET_DEDUP(bp, 0);
|
BP_SET_DEDUP(bp, 1);
|
||||||
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
|
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -346,6 +346,13 @@ ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg)
|
|||||||
blkptr_t blk;
|
blkptr_t blk;
|
||||||
|
|
||||||
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
|
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We clear the dedup bit so that zio_free() will actually free the
|
||||||
|
* space, rather than just decrementing the refcount in the DDT.
|
||||||
|
*/
|
||||||
|
BP_SET_DEDUP(&blk, 0);
|
||||||
|
|
||||||
ddt_phys_clear(ddp);
|
ddt_phys_clear(ddp);
|
||||||
zio_free(ddt->ddt_spa, txg, &blk);
|
zio_free(ddt->ddt_spa, txg, &blk);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user