mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-26 09:54:22 +03:00
Allow metaslab to be unloaded even when not freed from
On large systems, the memory used by loaded metaslabs can become a concern. While range trees are a fairly efficient data structure, on heavily fragmented pools they can still consume a significant amount of memory. This problem is amplified when we fail to unload metaslabs that we aren't using. Currently, we only unload a metaslab during metaslab_sync_done; in order for that function to be called on a given metaslab in a given txg, we have to have dirtied that metaslab in that txg. If the dirtying was the result of an allocation, we wouldn't be unloading it (since it wouldn't be 8 txgs since it was selected), so in effect we only unload a metaslab during txgs where it's being freed from. We move the unload logic from sync_done to a new function, and call that function on all metaslabs in a given vdev during vdev_sync_done(). Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #8837
This commit is contained in:
parent
06900c409b
commit
6f7bc75825
@ -50,6 +50,7 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
|
||||
void metaslab_fini(metaslab_t *);
|
||||
|
||||
int metaslab_load(metaslab_t *);
|
||||
void metaslab_potentially_unload(metaslab_t *, uint64_t);
|
||||
void metaslab_unload(metaslab_t *);
|
||||
|
||||
uint64_t metaslab_allocated_space(metaslab_t *);
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*/
|
||||
@ -2949,6 +2949,30 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_potentially_unload(metaslab_t *msp, uint64_t txg)
|
||||
{
|
||||
/*
|
||||
* If the metaslab is loaded and we've not tried to load or allocate
|
||||
* from it in 'metaslab_unload_delay' txgs, then unload it.
|
||||
*/
|
||||
if (msp->ms_loaded &&
|
||||
msp->ms_disabled == 0 &&
|
||||
msp->ms_selected_txg + metaslab_unload_delay < txg) {
|
||||
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
|
||||
VERIFY0(range_tree_space(
|
||||
msp->ms_allocating[(txg + t) & TXG_MASK]));
|
||||
}
|
||||
if (msp->ms_allocator != -1) {
|
||||
metaslab_passivate(msp, msp->ms_weight &
|
||||
~METASLAB_ACTIVE_MASK);
|
||||
}
|
||||
|
||||
if (!metaslab_debug_unload)
|
||||
metaslab_unload(msp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called after a transaction group has completely synced to mark
|
||||
* all of the metaslab's free space as usable.
|
||||
@ -3086,27 +3110,6 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
|
||||
*/
|
||||
metaslab_recalculate_weight_and_sort(msp);
|
||||
|
||||
/*
|
||||
* If the metaslab is loaded and we've not tried to load or allocate
|
||||
* from it in 'metaslab_unload_delay' txgs, then unload it.
|
||||
*/
|
||||
if (msp->ms_loaded &&
|
||||
msp->ms_disabled == 0 &&
|
||||
msp->ms_selected_txg + metaslab_unload_delay < txg) {
|
||||
|
||||
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
|
||||
VERIFY0(range_tree_space(
|
||||
msp->ms_allocating[(txg + t) & TXG_MASK]));
|
||||
}
|
||||
if (msp->ms_allocator != -1) {
|
||||
metaslab_passivate(msp, msp->ms_weight &
|
||||
~METASLAB_ACTIVE_MASK);
|
||||
}
|
||||
|
||||
if (!metaslab_debug_unload)
|
||||
metaslab_unload(msp);
|
||||
}
|
||||
|
||||
ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
|
||||
ASSERT0(range_tree_space(msp->ms_freeing));
|
||||
ASSERT0(range_tree_space(msp->ms_freed));
|
||||
|
@ -3234,6 +3234,20 @@ vdev_sync_done(vdev_t *vd, uint64_t txg)
|
||||
!= NULL)
|
||||
metaslab_sync_done(msp, txg);
|
||||
|
||||
/*
|
||||
* Because this function is only called on dirty vdevs, it's possible
|
||||
* we won't consider all metaslabs for unloading on every
|
||||
* txg. However, unless the system is largely idle it is likely that
|
||||
* we will dirty all vdevs within a few txgs.
|
||||
*/
|
||||
for (int i = 0; i < vd->vdev_ms_count; i++) {
|
||||
msp = vd->vdev_ms[i];
|
||||
mutex_enter(&msp->ms_lock);
|
||||
if (msp->ms_sm != NULL)
|
||||
metaslab_potentially_unload(msp, txg);
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
||||
if (reassess)
|
||||
metaslab_sync_reassess(vd->vdev_mg);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user