mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 19:50:25 +03:00
Illumos 6844 - dnode_next_offset can detect fictional holes
6844 dnode_next_offset can detect fictional holes Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> dnode_next_offset is used in a variety of places to iterate over the holes or allocated blocks in a dnode. It operates under the premise that it can iterate over the blockpointers of a dnode in open context while holding only the dn_struct_rwlock as reader. Unfortunately, this premise does not hold. When we create the zio for a dbuf, we pass in the actual block pointer in the indirect block above that dbuf. When we later zero the bp in zio_write_compress, we are directly modifying the bp. The state of the bp is now inconsistent from the perspective of dnode_next_offset: the bp will appear to be a hole until zio_dva_allocate finally finishes filling it in. In the meantime, dnode_next_offset can detect a hole in the dnode when none exists. I was able to experimentally demonstrate this behavior with the following setup: 1. Create a file with 1 million dbufs. 2. Create a thread that randomly dirties L2 blocks by writing to the first L0 block under them. 3. Observe dnode_next_offset, waiting for it to skip over a hole in the middle of a file. 4. Do dnode_next_offset in a loop until we skip over such a non-existent hole. The fix is to ensure that it is valid to iterate over the indirect blocks in a dnode while holding the dn_struct_rwlock by passing the zio a copy of the BP and updating the actual BP in dbuf_write_ready while holding the lock. References: https://www.illumos.org/issues/6844 https://github.com/openzfs/openzfs/pull/82 DLPX-35372 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #4548
This commit is contained in:
parent
8a5fc74880
commit
463a8cfe2b
@ -121,6 +121,9 @@ typedef struct dbuf_dirty_record {
|
|||||||
/* How much space was changed to dsl_pool_dirty_space() for this? */
|
/* How much space was changed to dsl_pool_dirty_space() for this? */
|
||||||
unsigned int dr_accounted;
|
unsigned int dr_accounted;
|
||||||
|
|
||||||
|
/* A copy of the bp that points to us */
|
||||||
|
blkptr_t dr_bp_copy;
|
||||||
|
|
||||||
union dirty_types {
|
union dirty_types {
|
||||||
struct dirty_indirect {
|
struct dirty_indirect {
|
||||||
|
|
||||||
|
@ -3024,7 +3024,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||||||
uint64_t fill = 0;
|
uint64_t fill = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ASSERT3P(db->db_blkptr, ==, bp);
|
ASSERT3P(db->db_blkptr, !=, NULL);
|
||||||
|
ASSERT3P(&db->db_data_pending->dr_bp_copy, ==, bp);
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
dn = DB_DNODE(db);
|
dn = DB_DNODE(db);
|
||||||
@ -3046,7 +3047,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
if (db->db_blkid == DMU_SPILL_BLKID) {
|
if (db->db_blkid == DMU_SPILL_BLKID) {
|
||||||
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
|
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
|
||||||
ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
|
ASSERT(!(BP_IS_HOLE(bp)) &&
|
||||||
db->db_blkptr == &dn->dn_phys->dn_spill);
|
db->db_blkptr == &dn->dn_phys->dn_spill);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -3087,6 +3088,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||||||
bp->blk_fill = fill;
|
bp->blk_fill = fill;
|
||||||
|
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
|
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||||
|
*db->db_blkptr = *bp;
|
||||||
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3265,6 +3270,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||||||
zio_t *zio;
|
zio_t *zio;
|
||||||
int wp_flag = 0;
|
int wp_flag = 0;
|
||||||
|
|
||||||
|
ASSERT(dmu_tx_is_syncing(tx));
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
dn = DB_DNODE(db);
|
dn = DB_DNODE(db);
|
||||||
os = dn->dn_objset;
|
os = dn->dn_objset;
|
||||||
@ -3323,6 +3330,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||||||
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We copy the blkptr now (rather than when we instantiate the dirty
|
||||||
|
* record), because its value can change between open context and
|
||||||
|
* syncing context. We do not need to hold dn_struct_rwlock to read
|
||||||
|
* db_blkptr because we are in syncing context.
|
||||||
|
*/
|
||||||
|
dr->dr_bp_copy = *db->db_blkptr;
|
||||||
|
|
||||||
if (db->db_level == 0 &&
|
if (db->db_level == 0 &&
|
||||||
dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
|
dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
|
||||||
/*
|
/*
|
||||||
@ -3332,7 +3347,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||||||
void *contents = (data != NULL) ? data->b_data : NULL;
|
void *contents = (data != NULL) ? data->b_data : NULL;
|
||||||
|
|
||||||
dr->dr_zio = zio_write(zio, os->os_spa, txg,
|
dr->dr_zio = zio_write(zio, os->os_spa, txg,
|
||||||
db->db_blkptr, contents, db->db.db_size, &zp,
|
&dr->dr_bp_copy, contents, db->db.db_size, &zp,
|
||||||
dbuf_write_override_ready, NULL, dbuf_write_override_done,
|
dbuf_write_override_ready, NULL, dbuf_write_override_done,
|
||||||
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
@ -3343,14 +3358,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||||||
} else if (db->db_state == DB_NOFILL) {
|
} else if (db->db_state == DB_NOFILL) {
|
||||||
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
|
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
|
||||||
dr->dr_zio = zio_write(zio, os->os_spa, txg,
|
dr->dr_zio = zio_write(zio, os->os_spa, txg,
|
||||||
db->db_blkptr, NULL, db->db.db_size, &zp,
|
&dr->dr_bp_copy, NULL, db->db.db_size, &zp,
|
||||||
dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
|
dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
|
||||||
ZIO_PRIORITY_ASYNC_WRITE,
|
ZIO_PRIORITY_ASYNC_WRITE,
|
||||||
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
|
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(arc_released(data));
|
ASSERT(arc_released(data));
|
||||||
dr->dr_zio = arc_write(zio, os->os_spa, txg,
|
dr->dr_zio = arc_write(zio, os->os_spa, txg,
|
||||||
db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db),
|
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
|
||||||
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
|
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
|
||||||
dbuf_write_physdone, dbuf_write_done, db,
|
dbuf_write_physdone, dbuf_write_done, db,
|
||||||
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||||
|
Loading…
Reference in New Issue
Block a user