diff --git a/include/sys/spa.h b/include/sys/spa.h index 7dbb37406..a3e36c1f5 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1116,7 +1116,9 @@ extern boolean_t spa_has_spare(spa_t *, uint64_t guid); extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva); extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp); extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp); +extern boolean_t spa_has_dedup(spa_t *spa); extern boolean_t spa_has_slogs(spa_t *spa); +extern boolean_t spa_has_special(spa_t *spa); extern boolean_t spa_is_root(spa_t *spa); extern boolean_t spa_writeable(spa_t *spa); extern boolean_t spa_has_pending_synctask(spa_t *spa); diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index d7326972c..60cbb7755 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -1037,29 +1037,18 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde) ddt_free(ddt, dde); } +/* + * We're considered over quota when we hit 85% full, or for larger drives, + * when there is less than 8GB free. + */ static boolean_t -ddt_special_over_quota(spa_t *spa, metaslab_class_t *mc) +ddt_special_over_quota(metaslab_class_t *mc) { - if (mc != NULL && metaslab_class_get_space(mc) > 0) { - /* Over quota if allocating outside of this special class */ - if (spa_syncing_txg(spa) <= spa->spa_dedup_class_full_txg + - dedup_class_wait_txgs) { - /* Waiting for some deferred frees to be processed */ - return (B_TRUE); - } - - /* - * We're considered over quota when we hit 85% full, or for - * larger drives, when there is less than 8GB free. - */ - uint64_t allocated = metaslab_class_get_alloc(mc); - uint64_t capacity = metaslab_class_get_space(mc); - uint64_t limit = MAX(capacity * 85 / 100, - (capacity > (1LL<<33)) ? capacity - (1LL<<33) : 0); - - return (allocated >= limit); - } - return (B_FALSE); + uint64_t allocated = metaslab_class_get_alloc(mc); + uint64_t capacity = metaslab_class_get_space(mc); + uint64_t limit = MAX(capacity * 85 / 100, + (capacity > (1LL<<33)) ? capacity - (1LL<<33) : 0); + return (allocated >= limit); } /* @@ -1081,14 +1070,22 @@ ddt_over_quota(spa_t *spa) if (spa->spa_dedup_table_quota != UINT64_MAX) return (ddt_get_ddt_dsize(spa) > spa->spa_dedup_table_quota); + /* + * Over quota if have to allocate outside of the dedup/special class. + */ + if (spa_syncing_txg(spa) <= spa->spa_dedup_class_full_txg + + dedup_class_wait_txgs) { + /* Waiting for some deferred frees to be processed */ + return (B_TRUE); + } + /* * For automatic quota, table size is limited by dedup or special class */ - if (ddt_special_over_quota(spa, spa_dedup_class(spa))) - return (B_TRUE); - else if (spa_special_has_ddt(spa) && - ddt_special_over_quota(spa, spa_special_class(spa))) - return (B_TRUE); + if (spa_has_dedup(spa)) + return (ddt_special_over_quota(spa_dedup_class(spa))); + else if (spa_special_has_ddt(spa)) + return (ddt_special_over_quota(spa_special_class(spa))); return (B_FALSE); } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index a2cfee2de..f054e4290 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2009,8 +2009,7 @@ spa_dedup_class(spa_t *spa) boolean_t spa_special_has_ddt(spa_t *spa) { - return (zfs_ddt_data_is_special && - spa->spa_special_class->mc_groups != 0); + return (zfs_ddt_data_is_special && spa_has_special(spa)); } /* @@ -2019,6 +2018,9 @@ spa_special_has_ddt(spa_t *spa) metaslab_class_t * spa_preferred_class(spa_t *spa, const zio_t *zio) { + metaslab_class_t *mc = zio->io_metaslab_class; + boolean_t tried_dedup = (mc == spa_dedup_class(spa)); + boolean_t tried_special = (mc == spa_special_class(spa)); const zio_prop_t *zp = &zio->io_prop; /* @@ -2036,12 +2038,10 @@ spa_preferred_class(spa_t *spa, const zio_t *zio) */ ASSERT(objtype != DMU_OT_INTENT_LOG); - boolean_t has_special_class = spa->spa_special_class->mc_groups != 0; - if (DMU_OT_IS_DDT(objtype)) { - if (spa->spa_dedup_class->mc_groups != 0) + if (spa_has_dedup(spa) && !tried_dedup && !tried_special) return (spa_dedup_class(spa)); - else if (has_special_class && zfs_ddt_data_is_special) + else if (spa_special_has_ddt(spa) && !tried_special) return (spa_special_class(spa)); else return (spa_normal_class(spa)); @@ -2050,14 +2050,15 @@ spa_preferred_class(spa_t *spa, const zio_t *zio) /* Indirect blocks for user data can land in special if allowed */ if (zp->zp_level > 0 && (DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL)) { - if (has_special_class && zfs_user_indirect_is_special) + if (zfs_user_indirect_is_special && spa_has_special(spa) && + !tried_special) return (spa_special_class(spa)); else return (spa_normal_class(spa)); } if (DMU_OT_IS_METADATA(objtype) || zp->zp_level > 0) { - if (has_special_class) + if (spa_has_special(spa) && !tried_special) return (spa_special_class(spa)); else return (spa_normal_class(spa)); @@ -2069,7 +2070,8 @@ spa_preferred_class(spa_t *spa, const zio_t *zio) * zfs_special_class_metadata_reserve_pct exclusively for metadata. */ if ((DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL) && - has_special_class && zio->io_size <= zp->zp_zpl_smallblk) { + spa_has_special(spa) && !tried_special && + zio->io_size <= zp->zp_zpl_smallblk) { metaslab_class_t *special = spa_special_class(spa); uint64_t alloc = metaslab_class_get_alloc(special); uint64_t space = metaslab_class_get_space(special); @@ -2640,6 +2642,12 @@ spa_fini(void) mutex_destroy(&spa_l2cache_lock); } +boolean_t +spa_has_dedup(spa_t *spa) +{ + return (spa->spa_dedup_class->mc_groups != 0); +} + /* * Return whether this pool has a dedicated slog device. No locking needed. * It's not a problem if the wrong answer is returned as it's only for @@ -2651,6 +2659,12 @@ spa_has_slogs(spa_t *spa) return (spa->spa_log_class->mc_groups != 0); } +boolean_t +spa_has_special(spa_t *spa) +{ + return (spa->spa_special_class->mc_groups != 0); +} + spa_log_state_t spa_get_log_state(spa_t *spa) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ca84f919e..554de6b7d 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4150,7 +4150,7 @@ static zio_t * zio_dva_allocate(zio_t *zio) { spa_t *spa = zio->io_spa; - metaslab_class_t *mc; + metaslab_class_t *mc, *newmc; blkptr_t *bp = zio->io_bp; int error; int flags = 0; @@ -4193,7 +4193,7 @@ zio_dva_allocate(zio_t *zio) again: /* * Try allocating the block in the usual metaslab class. - * If that's full, allocate it in the normal class. + * If that's full, allocate it in some other class(es). * If that's full, allocate as a gang block, * and if all are full, the allocation fails (which shouldn't happen). * @@ -4208,29 +4208,29 @@ again: &zio->io_alloc_list, zio->io_allocator, zio); /* - * Fallback to normal class when an alloc class is full + * When the dedup or special class is spilling into the normal class, + * there can still be significant space available due to deferred + * frees that are in-flight. We track the txg when this occurred and + * back off adding new DDT entries for a few txgs to allow the free + * blocks to be processed. */ - if (error == ENOSPC && mc != spa_normal_class(spa)) { - /* - * When the dedup or special class is spilling into the normal - * class, there can still be significant space available due - * to deferred frees that are in-flight. We track the txg when - * this occurred and back off adding new DDT entries for a few - * txgs to allow the free blocks to be processed. - */ - if ((mc == spa_dedup_class(spa) || (spa_special_has_ddt(spa) && - mc == spa_special_class(spa))) && - spa->spa_dedup_class_full_txg != zio->io_txg) { - spa->spa_dedup_class_full_txg = zio->io_txg; - zfs_dbgmsg("%s[%d]: %s class spilling, req size %d, " - "%llu allocated of %llu", - spa_name(spa), (int)zio->io_txg, - mc == spa_dedup_class(spa) ? "dedup" : "special", - (int)zio->io_size, - (u_longlong_t)metaslab_class_get_alloc(mc), - (u_longlong_t)metaslab_class_get_space(mc)); - } + if (error == ENOSPC && spa->spa_dedup_class_full_txg != zio->io_txg && + (mc == spa_dedup_class(spa) || (mc == spa_special_class(spa) && + !spa_has_dedup(spa) && spa_special_has_ddt(spa)))) { + spa->spa_dedup_class_full_txg = zio->io_txg; + zfs_dbgmsg("%s[%llu]: %s class spilling, req size %llu, " + "%llu allocated of %llu", + spa_name(spa), (u_longlong_t)zio->io_txg, + mc == spa_dedup_class(spa) ? "dedup" : "special", + (u_longlong_t)zio->io_size, + (u_longlong_t)metaslab_class_get_alloc(mc), + (u_longlong_t)metaslab_class_get_space(mc)); + } + /* + * Fall back to some other class when this one is full. + */ + if (error == ENOSPC && (newmc = spa_preferred_class(spa, zio)) != mc) { /* * If we are holding old class reservation, drop it. * Dispatch the next ZIO(s) there if some are waiting. @@ -4246,15 +4246,15 @@ again: if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) { zfs_dbgmsg("%s: metaslab allocation failure, " - "trying normal class: zio %px, size %llu, error %d", + "trying fallback: zio %px, size %llu, error %d", spa_name(spa), zio, (u_longlong_t)zio->io_size, error); } - zio->io_metaslab_class = mc = spa_normal_class(spa); + zio->io_metaslab_class = mc = newmc; ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); /* - * If normal class uses throttling, return to that pipeline + * If the new class uses throttling, return to that pipeline * stage. Otherwise just do another allocation attempt. */ if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&