diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 28756e6f7..67f4be1c2 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -389,6 +389,9 @@ typedef struct dmu_buf { */ uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); +uint64_t dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); uint64_t dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, int dnodesize, dmu_tx_t *tx); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 586a04b16..b9960782e 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -41,17 +41,10 @@ */ int dmu_object_alloc_chunk_shift = 7; -uint64_t -dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) -{ - return dmu_object_alloc_dnsize(os, ot, blocksize, bonustype, bonuslen, - 0, tx); -} - -uint64_t -dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) +static uint64_t +dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, + int dnodesize, dmu_tx_t *tx) { uint64_t object; uint64_t L1_dnode_count = DNODES_PER_BLOCK << @@ -182,8 +175,9 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, * again now that we have the struct lock. */ if (dn->dn_type == DMU_OT_NONE) { - dnode_allocate(dn, ot, blocksize, 0, - bonustype, bonuslen, dn_slots, tx); + dnode_allocate(dn, ot, blocksize, + indirect_blockshift, bonustype, + bonuslen, dn_slots, tx); rw_exit(&dn->dn_struct_rwlock); dmu_tx_add_new_object(tx, dn); dnode_rele(dn, FTAG); @@ -206,6 +200,31 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, } } +uint64_t +dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, + bonuslen, 0, tx); +} + +uint64_t +dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, + dmu_tx_t *tx) +{ + return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift, + bonustype, bonuslen, 0, tx); +} + +uint64_t +dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) +{ + return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, + bonuslen, dnodesize, tx)); +} + int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) @@ -423,6 +442,7 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx) #if defined(_KERNEL) EXPORT_SYMBOL(dmu_object_alloc); +EXPORT_SYMBOL(dmu_object_alloc_ibs); EXPORT_SYMBOL(dmu_object_alloc_dnsize); EXPORT_SYMBOL(dmu_object_claim); EXPORT_SYMBOL(dmu_object_claim_dnsize); diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c index 5f67a7987..9ba6ff6ff 100644 --- a/module/zfs/space_map.c +++ b/module/zfs/space_map.c @@ -52,6 +52,14 @@ */ boolean_t zfs_force_some_double_word_sm_entries = B_FALSE; +/* + * Override the default indirect block size of 128K, instead use 16K for + * spacemaps (2^14 bytes). This dramatically reduces write inflation since + * appending to a spacemap typically has to write one data block (4KB) and one + * or two indirect blocks (16K-32K, rather than 128K). + */ +int space_map_ibs = 14; + boolean_t sm_entry_is_debug(uint64_t e) { @@ -674,8 +682,8 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, * * [1] The feature is enabled. * [2] The offset or run is too big for a single-word entry, - * or the vdev_id is set (meaning not equal to - * SM_NO_VDEVID). + * or the vdev_id is set (meaning not equal to + * SM_NO_VDEVID). * * Note that for purposes of testing we've added the case that * we write two-word entries occasionally when the feature is @@ -837,7 +845,8 @@ space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx) */ if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) && doi.doi_bonus_size != sizeof (space_map_phys_t)) || - doi.doi_data_block_size != blocksize) { + doi.doi_data_block_size != blocksize || + doi.doi_metadata_block_size != 1 << space_map_ibs) { zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating " "object[%llu]: old bonus %u, old blocksz %u", dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object, @@ -893,8 +902,8 @@ space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) bonuslen = SPACE_MAP_SIZE_V0; } - object = dmu_object_alloc(os, DMU_OT_SPACE_MAP, blocksize, - DMU_OT_SPACE_MAP_HEADER, bonuslen, tx); + object = dmu_object_alloc_ibs(os, DMU_OT_SPACE_MAP, blocksize, + space_map_ibs, DMU_OT_SPACE_MAP_HEADER, bonuslen, tx); return (object); }