mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-30 02:34:14 +03:00
Refactor Log Size Limit
Original Log Size Limit implementation blocked all writes in case of limit reached until the TXG is committed and the log is freed. It caused huge delays and following speed spikes in application writes. This implementation instead smoothly throttles writes, using exactly the same mechanism as used for dirty data. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: jxdking <lostking2008@hotmail.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Issue #12284 Closes #13476
This commit is contained in:
committed by
Tony Hutter
parent
91e02156dd
commit
33223cbc3c
+38
-22
@@ -53,8 +53,8 @@ dmu_tx_stats_t dmu_tx_stats = {
|
||||
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_wrlog_delay", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
@@ -781,34 +781,49 @@ static void
|
||||
dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
|
||||
{
|
||||
dsl_pool_t *dp = tx->tx_pool;
|
||||
uint64_t delay_min_bytes =
|
||||
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
||||
hrtime_t wakeup, min_tx_time, now;
|
||||
uint64_t delay_min_bytes, wrlog;
|
||||
hrtime_t wakeup, tx_time = 0, now;
|
||||
|
||||
if (dirty <= delay_min_bytes)
|
||||
/* Calculate minimum transaction time for the dirty data amount. */
|
||||
delay_min_bytes =
|
||||
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
||||
if (dirty > delay_min_bytes) {
|
||||
/*
|
||||
* The caller has already waited until we are under the max.
|
||||
* We make them pass us the amount of dirty data so we don't
|
||||
* have to handle the case of it being >= the max, which
|
||||
* could cause a divide-by-zero if it's == the max.
|
||||
*/
|
||||
ASSERT3U(dirty, <, zfs_dirty_data_max);
|
||||
|
||||
tx_time = zfs_delay_scale * (dirty - delay_min_bytes) /
|
||||
(zfs_dirty_data_max - dirty);
|
||||
}
|
||||
|
||||
/* Calculate minimum transaction time for the TX_WRITE log size. */
|
||||
wrlog = aggsum_upper_bound(&dp->dp_wrlog_total);
|
||||
delay_min_bytes =
|
||||
zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100;
|
||||
if (wrlog >= zfs_wrlog_data_max) {
|
||||
tx_time = zfs_delay_max_ns;
|
||||
} else if (wrlog > delay_min_bytes) {
|
||||
tx_time = MAX(zfs_delay_scale * (wrlog - delay_min_bytes) /
|
||||
(zfs_wrlog_data_max - wrlog), tx_time);
|
||||
}
|
||||
|
||||
if (tx_time == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The caller has already waited until we are under the max.
|
||||
* We make them pass us the amount of dirty data so we don't
|
||||
* have to handle the case of it being >= the max, which could
|
||||
* cause a divide-by-zero if it's == the max.
|
||||
*/
|
||||
ASSERT3U(dirty, <, zfs_dirty_data_max);
|
||||
|
||||
tx_time = MIN(tx_time, zfs_delay_max_ns);
|
||||
now = gethrtime();
|
||||
min_tx_time = zfs_delay_scale *
|
||||
(dirty - delay_min_bytes) / (zfs_dirty_data_max - dirty);
|
||||
min_tx_time = MIN(min_tx_time, zfs_delay_max_ns);
|
||||
if (now > tx->tx_start + min_tx_time)
|
||||
if (now > tx->tx_start + tx_time)
|
||||
return;
|
||||
|
||||
DTRACE_PROBE3(delay__mintime, dmu_tx_t *, tx, uint64_t, dirty,
|
||||
uint64_t, min_tx_time);
|
||||
uint64_t, tx_time);
|
||||
|
||||
mutex_enter(&dp->dp_lock);
|
||||
wakeup = MAX(tx->tx_start + min_tx_time,
|
||||
dp->dp_last_wakeup + min_tx_time);
|
||||
wakeup = MAX(tx->tx_start + tx_time, dp->dp_last_wakeup + tx_time);
|
||||
dp->dp_last_wakeup = wakeup;
|
||||
mutex_exit(&dp->dp_lock);
|
||||
|
||||
@@ -886,8 +901,9 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
|
||||
}
|
||||
|
||||
if (!tx->tx_dirty_delayed &&
|
||||
dsl_pool_wrlog_over_max(tx->tx_pool)) {
|
||||
DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max);
|
||||
dsl_pool_need_wrlog_delay(tx->tx_pool)) {
|
||||
tx->tx_wait_dirty = B_TRUE;
|
||||
DMU_TX_STAT_BUMP(dmu_tx_wrlog_delay);
|
||||
return (SET_ERROR(ERESTART));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user