mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-24 17:06:34 +03:00
Fix short-lived txg caused by autotrim
Current autotrim causes short-lived txg through: 1. calling txg_wait_synced() in metaslab_enable() 2. calling txg_wait_open() with should_quiesce = true This patch addresses all the issues mentioned above. A new cv, vdev_autotrim_kick_cv is added to kick autotrim activity. It will be signaled once a txg is synced so that it does not change the original autotrim pace. Also because it is a cv, the wait is interruptible which speeds up the vdev_autotrim_stop_wait() call. Finally, combining big zfs_txg_timeout, txg_wait_open() also causes delay when exporting a pool. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: jxdking <lostking2008@hotmail.com> Issue #8993 Closes #12194
This commit is contained in:
parent
64bfa6bae3
commit
65d10bd87c
@ -329,6 +329,7 @@ struct vdev {
|
||||
list_node_t vdev_trim_node;
|
||||
kmutex_t vdev_autotrim_lock;
|
||||
kcondvar_t vdev_autotrim_cv;
|
||||
kcondvar_t vdev_autotrim_kick_cv;
|
||||
kthread_t *vdev_autotrim_thread;
|
||||
/* Protects vdev_trim_thread and vdev_trim_state. */
|
||||
kmutex_t vdev_trim_lock;
|
||||
|
@ -41,6 +41,7 @@ extern void vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state);
|
||||
extern void vdev_trim_stop_wait(spa_t *spa, list_t *vd_list);
|
||||
extern void vdev_trim_restart(vdev_t *vd);
|
||||
extern void vdev_autotrim(spa_t *spa);
|
||||
extern void vdev_autotrim_kick(spa_t *spa);
|
||||
extern void vdev_autotrim_stop_all(spa_t *spa);
|
||||
extern void vdev_autotrim_stop_wait(vdev_t *vd);
|
||||
extern void vdev_autotrim_restart(spa_t *spa);
|
||||
|
@ -9449,6 +9449,9 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
|
||||
spa_update_dspace(spa);
|
||||
|
||||
if (spa_get_autotrim(spa) == SPA_AUTOTRIM_ON)
|
||||
vdev_autotrim_kick(spa);
|
||||
|
||||
/*
|
||||
* It had better be the case that we didn't dirty anything
|
||||
* since vdev_config_sync().
|
||||
|
@ -696,6 +696,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_trim_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_autotrim_kick_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
@ -1148,6 +1149,7 @@ vdev_free(vdev_t *vd)
|
||||
mutex_destroy(&vd->vdev_trim_io_lock);
|
||||
cv_destroy(&vd->vdev_trim_cv);
|
||||
cv_destroy(&vd->vdev_autotrim_cv);
|
||||
cv_destroy(&vd->vdev_autotrim_kick_cv);
|
||||
cv_destroy(&vd->vdev_trim_io_cv);
|
||||
|
||||
mutex_destroy(&vd->vdev_rebuild_lock);
|
||||
|
@ -182,6 +182,25 @@ vdev_autotrim_should_stop(vdev_t *tvd)
|
||||
spa_get_autotrim(tvd->vdev_spa) == SPA_AUTOTRIM_OFF);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for given number of kicks, return true if the wait is aborted due to
|
||||
* vdev_autotrim_exit_wanted.
|
||||
*/
|
||||
static boolean_t
|
||||
vdev_autotrim_wait_kick(vdev_t *vd, int num_of_kick)
|
||||
{
|
||||
mutex_enter(&vd->vdev_autotrim_lock);
|
||||
for (int i = 0; i < num_of_kick; i++) {
|
||||
if (vd->vdev_autotrim_exit_wanted)
|
||||
break;
|
||||
cv_wait(&vd->vdev_autotrim_kick_cv, &vd->vdev_autotrim_lock);
|
||||
}
|
||||
boolean_t exit_wanted = vd->vdev_autotrim_exit_wanted;
|
||||
mutex_exit(&vd->vdev_autotrim_lock);
|
||||
|
||||
return (exit_wanted);
|
||||
}
|
||||
|
||||
/*
|
||||
* The sync task for updating the on-disk state of a manual TRIM. This
|
||||
* is scheduled by vdev_trim_change_state().
|
||||
@ -1190,7 +1209,6 @@ vdev_autotrim_thread(void *arg)
|
||||
|
||||
while (!vdev_autotrim_should_stop(vd)) {
|
||||
int txgs_per_trim = MAX(zfs_trim_txg_batch, 1);
|
||||
boolean_t issued_trim = B_FALSE;
|
||||
uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
|
||||
uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
|
||||
|
||||
@ -1224,6 +1242,8 @@ vdev_autotrim_thread(void *arg)
|
||||
i += txgs_per_trim) {
|
||||
metaslab_t *msp = vd->vdev_ms[i];
|
||||
range_tree_t *trim_tree;
|
||||
boolean_t issued_trim = B_FALSE;
|
||||
boolean_t wait_aborted = B_FALSE;
|
||||
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
metaslab_disable(msp);
|
||||
@ -1374,7 +1394,18 @@ vdev_autotrim_thread(void *arg)
|
||||
range_tree_vacate(trim_tree, NULL, NULL);
|
||||
range_tree_destroy(trim_tree);
|
||||
|
||||
metaslab_enable(msp, issued_trim, B_FALSE);
|
||||
/*
|
||||
* Wait for couples of kicks, to ensure the trim io is
|
||||
* synced. If the wait is aborted due to
|
||||
* vdev_autotrim_exit_wanted, we need to signal
|
||||
* metaslab_enable() to wait for sync.
|
||||
*/
|
||||
if (issued_trim) {
|
||||
wait_aborted = vdev_autotrim_wait_kick(vd,
|
||||
TXG_CONCURRENT_STATES + TXG_DEFER_SIZE);
|
||||
}
|
||||
|
||||
metaslab_enable(msp, wait_aborted, B_FALSE);
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
|
||||
for (uint64_t c = 0; c < children; c++) {
|
||||
@ -1388,17 +1419,14 @@ vdev_autotrim_thread(void *arg)
|
||||
}
|
||||
|
||||
kmem_free(tap, sizeof (trim_args_t) * children);
|
||||
|
||||
if (vdev_autotrim_should_stop(vd))
|
||||
break;
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
|
||||
/*
|
||||
* After completing the group of metaslabs wait for the next
|
||||
* open txg. This is done to make sure that a minimum of
|
||||
* zfs_trim_txg_batch txgs will occur before these metaslabs
|
||||
* are trimmed again.
|
||||
*/
|
||||
txg_wait_open(spa_get_dsl(spa), 0, issued_trim);
|
||||
vdev_autotrim_wait_kick(vd, 1);
|
||||
|
||||
shift++;
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
@ -1476,11 +1504,9 @@ vdev_autotrim_stop_wait(vdev_t *tvd)
|
||||
mutex_enter(&tvd->vdev_autotrim_lock);
|
||||
if (tvd->vdev_autotrim_thread != NULL) {
|
||||
tvd->vdev_autotrim_exit_wanted = B_TRUE;
|
||||
|
||||
while (tvd->vdev_autotrim_thread != NULL) {
|
||||
cv_wait(&tvd->vdev_autotrim_cv,
|
||||
&tvd->vdev_autotrim_lock);
|
||||
}
|
||||
cv_broadcast(&tvd->vdev_autotrim_kick_cv);
|
||||
cv_wait(&tvd->vdev_autotrim_cv,
|
||||
&tvd->vdev_autotrim_lock);
|
||||
|
||||
ASSERT3P(tvd->vdev_autotrim_thread, ==, NULL);
|
||||
tvd->vdev_autotrim_exit_wanted = B_FALSE;
|
||||
@ -1488,6 +1514,24 @@ vdev_autotrim_stop_wait(vdev_t *tvd)
|
||||
mutex_exit(&tvd->vdev_autotrim_lock);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_autotrim_kick(spa_t *spa)
|
||||
{
|
||||
ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
|
||||
|
||||
vdev_t *root_vd = spa->spa_root_vdev;
|
||||
vdev_t *tvd;
|
||||
|
||||
for (uint64_t i = 0; i < root_vd->vdev_children; i++) {
|
||||
tvd = root_vd->vdev_child[i];
|
||||
|
||||
mutex_enter(&tvd->vdev_autotrim_lock);
|
||||
if (tvd->vdev_autotrim_thread != NULL)
|
||||
cv_broadcast(&tvd->vdev_autotrim_kick_cv);
|
||||
mutex_exit(&tvd->vdev_autotrim_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for all of the vdev_autotrim_thread associated with the pool to
|
||||
* be terminated (canceled or stopped).
|
||||
|
Loading…
Reference in New Issue
Block a user