mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 10:24:22 +03:00
Freeing throttle should account for holes
Deletion throttle currently does not account for holes in a file. This means that it can activate when it shouldn't. To fix it we switch the throttle to be based on the number of L1 blocks we will have to dirty when freeing Reviewed by: Tom Caputi <tcaputi@datto.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alek Pinchuk <apinchuk@datto.com> Closes #7725 Closes #7888
This commit is contained in:
parent
dcec0a12c8
commit
65282ee9e0
@ -1,7 +1,7 @@
|
|||||||
'\" te
|
'\" te
|
||||||
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
|
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
|
||||||
.\" Copyright (c) 2017 Datto Inc.
|
|
||||||
.\" Copyright (c) 2018 by Delphix. All rights reserved.
|
.\" Copyright (c) 2018 by Delphix. All rights reserved.
|
||||||
|
.\" Copyright (c) 2019 Datto Inc.
|
||||||
.\" The contents of this file are subject to the terms of the Common Development
|
.\" The contents of this file are subject to the terms of the Common Development
|
||||||
.\" and Distribution License (the "License"). You may not use this file except
|
.\" and Distribution License (the "License"). You may not use this file except
|
||||||
.\" in compliance with the License. You can obtain a copy of the license at
|
.\" in compliance with the License. You can obtain a copy of the license at
|
||||||
@ -14,7 +14,7 @@
|
|||||||
.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
|
.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
|
||||||
.\" own identifying information:
|
.\" own identifying information:
|
||||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||||
.TH ZFS-MODULE-PARAMETERS 5 "Oct 28, 2017"
|
.TH ZFS-MODULE-PARAMETERS 5 "Feb 8, 2019"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
zfs\-module\-parameters \- ZFS module parameters
|
zfs\-module\-parameters \- ZFS module parameters
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
@ -1970,12 +1970,12 @@ Default value: \fB52,428,800\fR.
|
|||||||
\fBzfs_per_txg_dirty_frees_percent \fR (ulong)
|
\fBzfs_per_txg_dirty_frees_percent \fR (ulong)
|
||||||
.ad
|
.ad
|
||||||
.RS 12n
|
.RS 12n
|
||||||
Tunable to control percentage of dirtied blocks from frees in one TXG.
|
Tunable to control percentage of dirtied indirect blocks from frees allowed
|
||||||
After this threshold is crossed, additional dirty blocks from frees
|
into one TXG. After this threshold is crossed, additional frees will wait until
|
||||||
wait until the next TXG.
|
the next TXG.
|
||||||
A value of zero will disable this throttle.
|
A value of zero will disable this throttle.
|
||||||
.sp
|
.sp
|
||||||
Default value: \fB30\fR and \fB0\fR to disable.
|
Default value: \fB5\fR, set to \fB0\fR to disable.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||||
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
|
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
|
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
|
||||||
|
* Copyright (c) 2019 Datto Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/dmu.h>
|
#include <sys/dmu.h>
|
||||||
@ -61,12 +62,12 @@
|
|||||||
int zfs_nopwrite_enabled = 1;
|
int zfs_nopwrite_enabled = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Tunable to control percentage of dirtied blocks from frees in one TXG.
|
* Tunable to control percentage of dirtied L1 blocks from frees allowed into
|
||||||
* After this threshold is crossed, additional dirty blocks from frees
|
* one TXG. After this threshold is crossed, additional dirty blocks from frees
|
||||||
* wait until the next TXG.
|
* will wait until the next TXG.
|
||||||
* A value of zero will disable this throttle.
|
* A value of zero will disable this throttle.
|
||||||
*/
|
*/
|
||||||
unsigned long zfs_per_txg_dirty_frees_percent = 30;
|
unsigned long zfs_per_txg_dirty_frees_percent = 5;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable/disable forcing txg sync when dirty in dmu_offset_next.
|
* Enable/disable forcing txg sync when dirty in dmu_offset_next.
|
||||||
@ -709,11 +710,13 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
|||||||
*
|
*
|
||||||
* On input, *start should be the first offset that does not need to be
|
* On input, *start should be the first offset that does not need to be
|
||||||
* freed (e.g. "offset + length"). On return, *start will be the first
|
* freed (e.g. "offset + length"). On return, *start will be the first
|
||||||
* offset that should be freed.
|
* offset that should be freed and l1blks is set to the number of level 1
|
||||||
|
* indirect blocks found within the chunk.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
|
get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks)
|
||||||
{
|
{
|
||||||
|
uint64_t blks;
|
||||||
uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1);
|
uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1);
|
||||||
/* bytes of data covered by a level-1 indirect block */
|
/* bytes of data covered by a level-1 indirect block */
|
||||||
uint64_t iblkrange =
|
uint64_t iblkrange =
|
||||||
@ -723,11 +726,16 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
|
|||||||
|
|
||||||
if (*start - minimum <= iblkrange * maxblks) {
|
if (*start - minimum <= iblkrange * maxblks) {
|
||||||
*start = minimum;
|
*start = minimum;
|
||||||
|
/*
|
||||||
|
* Assume full L1 blocks and 128k recordsize to approximate the
|
||||||
|
* expected number of L1 blocks in this chunk
|
||||||
|
*/
|
||||||
|
*l1blks = minimum / (1024 * 128 * 1024);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
ASSERT(ISP2(iblkrange));
|
ASSERT(ISP2(iblkrange));
|
||||||
|
|
||||||
for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) {
|
for (blks = 0; *start > minimum && blks < maxblks; blks++) {
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -745,6 +753,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
|
|||||||
*start = minimum;
|
*start = minimum;
|
||||||
break;
|
break;
|
||||||
} else if (err != 0) {
|
} else if (err != 0) {
|
||||||
|
*l1blks = blks;
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -753,6 +762,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
|
|||||||
}
|
}
|
||||||
if (*start < minimum)
|
if (*start < minimum)
|
||||||
*start = minimum;
|
*start = minimum;
|
||||||
|
*l1blks = blks;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -792,7 +802,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||||||
dirty_frees_threshold =
|
dirty_frees_threshold =
|
||||||
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
|
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
|
||||||
else
|
else
|
||||||
dirty_frees_threshold = zfs_dirty_data_max / 4;
|
dirty_frees_threshold = zfs_dirty_data_max / 20;
|
||||||
|
|
||||||
if (length == DMU_OBJECT_END || offset + length > object_size)
|
if (length == DMU_OBJECT_END || offset + length > object_size)
|
||||||
length = object_size - offset;
|
length = object_size - offset;
|
||||||
@ -800,6 +810,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||||||
while (length != 0) {
|
while (length != 0) {
|
||||||
uint64_t chunk_end, chunk_begin, chunk_len;
|
uint64_t chunk_end, chunk_begin, chunk_len;
|
||||||
uint64_t long_free_dirty_all_txgs = 0;
|
uint64_t long_free_dirty_all_txgs = 0;
|
||||||
|
uint64_t l1blks;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
|
|
||||||
if (dmu_objset_zfs_unmounting(dn->dn_objset))
|
if (dmu_objset_zfs_unmounting(dn->dn_objset))
|
||||||
@ -808,7 +819,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||||||
chunk_end = chunk_begin = offset + length;
|
chunk_end = chunk_begin = offset + length;
|
||||||
|
|
||||||
/* move chunk_begin backwards to the beginning of this chunk */
|
/* move chunk_begin backwards to the beginning of this chunk */
|
||||||
err = get_next_chunk(dn, &chunk_begin, offset);
|
err = get_next_chunk(dn, &chunk_begin, offset, &l1blks);
|
||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
ASSERT3U(chunk_begin, >=, offset);
|
ASSERT3U(chunk_begin, >=, offset);
|
||||||
@ -849,9 +860,19 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to prevent unnecessary write throttling, for each
|
||||||
|
* TXG, we track the cumulative size of L1 blocks being dirtied
|
||||||
|
* in dnode_free_range() below. We compare this number to a
|
||||||
|
* tunable threshold, past which we prevent new L1 dirty freeing
|
||||||
|
* blocks from being added into the open TXG. See
|
||||||
|
* dmu_free_long_range_impl() for details. The threshold
|
||||||
|
* prevents write throttle activation due to dirty freeing L1
|
||||||
|
* blocks taking up a large percentage of zfs_dirty_data_max.
|
||||||
|
*/
|
||||||
mutex_enter(&dp->dp_lock);
|
mutex_enter(&dp->dp_lock);
|
||||||
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
|
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
|
||||||
chunk_len;
|
l1blks << dn->dn_indblkshift;
|
||||||
mutex_exit(&dp->dp_lock);
|
mutex_exit(&dp->dp_lock);
|
||||||
DTRACE_PROBE3(free__long__range,
|
DTRACE_PROBE3(free__long__range,
|
||||||
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
|
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
|
||||||
|
Loading…
Reference in New Issue
Block a user