diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 837a0d510..0f03699f1 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -106,6 +106,8 @@ struct objset { zil_header_t os_zil_header; list_t os_synced_dnodes; uint64_t os_flags; + uint64_t os_freed_dnodes; + boolean_t os_rescan_dnodes; /* Protected by os_obj_lock */ kmutex_t os_obj_lock; diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 5faecafc7..a5a53418b 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -36,20 +36,22 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { uint64_t object; - uint64_t L2_dnode_count = DNODES_PER_BLOCK << + uint64_t L1_dnode_count = DNODES_PER_BLOCK << (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT); dnode_t *dn = NULL; - int restarted = B_FALSE; mutex_enter(&os->os_obj_lock); for (;;) { object = os->os_obj_next; /* - * Each time we polish off an L2 bp worth of dnodes - * (2^13 objects), move to another L2 bp that's still - * reasonably sparse (at most 1/4 full). Look from the - * beginning once, but after that keep looking from here. - * If we can't find one, just keep going from here. + * Each time we polish off a L1 bp worth of dnodes (2^12 + * objects), move to another L1 bp that's still reasonably + * sparse (at most 1/4 full). Look from the beginning at most + * once per txg, but after that keep looking from here. + * os_scan_dnodes is set during txg sync if enough objects + * have been freed since the previous rescan to justify + * backfilling again. If we can't find a suitable block, just + * keep going from here. * * Note that dmu_traverse depends on the behavior that we use * multiple blocks of the dnode object before going back to @@ -57,12 +59,19 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, * that property or find another solution to the issues * described in traverse_visitbp. */ - if (P2PHASE(object, L2_dnode_count) == 0) { - uint64_t offset = restarted ? object << DNODE_SHIFT : 0; - int error = dnode_next_offset(DMU_META_DNODE(os), + + if (P2PHASE(object, L1_dnode_count) == 0) { + uint64_t offset; + int error; + if (os->os_rescan_dnodes) { + offset = 0; + os->os_rescan_dnodes = B_FALSE; + } else { + offset = object << DNODE_SHIFT; + } + error = dnode_next_offset(DMU_META_DNODE(os), DNODE_FIND_HOLE, &offset, 2, DNODES_PER_BLOCK >> 2, 0); - restarted = B_TRUE; if (error == 0) object = offset >> DNODE_SHIFT; } diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 9f7cf5f4b..03b30dd3b 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -68,6 +68,13 @@ krwlock_t os_lock; */ int dmu_find_threads = 0; +/* + * Backfill lower metadnode objects after this many have been freed. + * Backfilling negatively impacts object creation rates, so only do it + * if there are enough holes to fill. + */ +int dmu_rescan_dnode_threshold = 1 << DN_MAX_INDBLKSHIFT; + static void dmu_objset_find_dp_cb(void *arg); void @@ -1152,6 +1159,13 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) if (dr->dr_zio) zio_nowait(dr->dr_zio); } + + /* Enable dnode backfill if enough objects have been freed. */ + if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) { + os->os_rescan_dnodes = B_TRUE; + os->os_freed_dnodes = 0; + } + /* * Free intent log blocks up to this tx. */ diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 66b0eef2a..bea7be186 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -682,6 +682,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) } if (freeing_dnode) { + dn->dn_objset->os_freed_dnodes++; dnode_sync_free(dn, tx); return; }