mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 11:40:25 +03:00
OpenZFS 9426 - metaslab size can exceed offset addressable by spacemap
Authored by: Don Brady <don.brady@delphix.com> Reviewed by: Paul Dagnelie <pcd@delphix.com> Reviewed by: Matt Ahrens <matt@delphix.com> Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Approved by: Dan McDonald <danmcd@joyent.com> OpenZFS-issue: https://www.illumos.org/issues/9426 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/f1c88afb1 Closes #7700
This commit is contained in:
parent
e902ddb0f8
commit
e4e94ca315
@ -296,7 +296,7 @@ Use \fB1\fR for yes (default) and \fB0\fR for no.
|
|||||||
\fBvdev_max_ms_count\fR (int)
|
\fBvdev_max_ms_count\fR (int)
|
||||||
.ad
|
.ad
|
||||||
.RS 12n
|
.RS 12n
|
||||||
When a vdev is added, it will be divided into approximately (but no more than) this number of metaslabs.
|
When a vdev is added target this number of metaslabs per top-level vdev.
|
||||||
.sp
|
.sp
|
||||||
Default value: \fB200\fR.
|
Default value: \fB200\fR.
|
||||||
.RE
|
.RE
|
||||||
@ -312,6 +312,17 @@ Minimum number of metaslabs to create in a top-level vdev.
|
|||||||
Default value: \fB16\fR.
|
Default value: \fB16\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBvdev_ms_count_limit\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Practical upper limit of total metaslabs per top-level vdev.
|
||||||
|
.sp
|
||||||
|
Default value: \fB131,072\fR.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
@ -52,15 +52,21 @@
|
|||||||
#include <sys/zvol.h>
|
#include <sys/zvol.h>
|
||||||
#include <sys/zfs_ratelimit.h>
|
#include <sys/zfs_ratelimit.h>
|
||||||
|
|
||||||
/* maximum number of metaslabs per top-level vdev */
|
/* target number of metaslabs per top-level vdev */
|
||||||
int vdev_max_ms_count = 200;
|
int vdev_max_ms_count = 200;
|
||||||
|
|
||||||
/* minimum amount of metaslabs per top-level vdev */
|
/* minimum number of metaslabs per top-level vdev */
|
||||||
int vdev_min_ms_count = 16;
|
int vdev_min_ms_count = 16;
|
||||||
|
|
||||||
/* see comment in vdev_metaslab_set_size() */
|
/* practical upper limit of total metaslabs per top-level vdev */
|
||||||
|
int vdev_ms_count_limit = 1ULL << 17;
|
||||||
|
|
||||||
|
/* lower limit for metaslab size (512M) */
|
||||||
int vdev_default_ms_shift = 29;
|
int vdev_default_ms_shift = 29;
|
||||||
|
|
||||||
|
/* upper limit for metaslab size (256G) */
|
||||||
|
int vdev_max_ms_shift = 38;
|
||||||
|
|
||||||
int vdev_validate_skip = B_FALSE;
|
int vdev_validate_skip = B_FALSE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2130,34 +2136,53 @@ void
|
|||||||
vdev_metaslab_set_size(vdev_t *vd)
|
vdev_metaslab_set_size(vdev_t *vd)
|
||||||
{
|
{
|
||||||
uint64_t asize = vd->vdev_asize;
|
uint64_t asize = vd->vdev_asize;
|
||||||
uint64_t ms_shift = 0;
|
uint64_t ms_count = asize >> vdev_default_ms_shift;
|
||||||
|
uint64_t ms_shift;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For vdevs that are bigger than 8G the metaslab size varies in
|
* There are two dimensions to the metaslab sizing calculation:
|
||||||
* a way that the number of metaslabs increases in powers of two,
|
* the size of the metaslab and the count of metaslabs per vdev.
|
||||||
* linearly in terms of vdev_asize, starting from 16 metaslabs.
|
* In general, we aim for vdev_max_ms_count (200) metaslabs. The
|
||||||
* So for vdev_asize of 8G we get 16 metaslabs, for 16G, we get 32,
|
* range of the dimensions are as follows:
|
||||||
* and so on, until we hit the maximum metaslab count limit
|
*
|
||||||
* [vdev_max_ms_count] from which point the metaslab count stays
|
* 2^29 <= ms_size <= 2^38
|
||||||
* the same.
|
* 16 <= ms_count <= 131,072
|
||||||
|
*
|
||||||
|
* On the lower end of vdev sizes, we aim for metaslabs sizes of
|
||||||
|
* at least 512MB (2^29) to minimize fragmentation effects when
|
||||||
|
* testing with smaller devices. However, the count constraint
|
||||||
|
* of at least 16 metaslabs will override this minimum size goal.
|
||||||
|
*
|
||||||
|
* On the upper end of vdev sizes, we aim for a maximum metaslab
|
||||||
|
* size of 256GB. However, we will cap the total count to 2^17
|
||||||
|
* metaslabs to keep our memory footprint in check.
|
||||||
|
*
|
||||||
|
* The net effect of applying above constrains is summarized below.
|
||||||
|
*
|
||||||
|
* vdev size metaslab count
|
||||||
|
* -------------|-----------------
|
||||||
|
* < 8GB ~16
|
||||||
|
* 8GB - 100GB one per 512MB
|
||||||
|
* 100GB - 50TB ~200
|
||||||
|
* 50TB - 32PB one per 256GB
|
||||||
|
* > 32PB ~131,072
|
||||||
|
* -------------------------------
|
||||||
*/
|
*/
|
||||||
ms_shift = vdev_default_ms_shift;
|
|
||||||
|
|
||||||
if ((asize >> ms_shift) < vdev_min_ms_count) {
|
if (ms_count < vdev_min_ms_count)
|
||||||
/*
|
|
||||||
* For devices that are less than 8G we want to have
|
|
||||||
* exactly 16 metaslabs. We don't want less as integer
|
|
||||||
* division rounds down, so less metaslabs mean more
|
|
||||||
* wasted space. We don't want more as these vdevs are
|
|
||||||
* small and in the likely event that we are running
|
|
||||||
* out of space, the SPA will have a hard time finding
|
|
||||||
* space due to fragmentation.
|
|
||||||
*/
|
|
||||||
ms_shift = highbit64(asize / vdev_min_ms_count);
|
ms_shift = highbit64(asize / vdev_min_ms_count);
|
||||||
ms_shift = MAX(ms_shift, SPA_MAXBLOCKSHIFT);
|
else if (ms_count > vdev_max_ms_count)
|
||||||
|
|
||||||
} else if ((asize >> ms_shift) > vdev_max_ms_count) {
|
|
||||||
ms_shift = highbit64(asize / vdev_max_ms_count);
|
ms_shift = highbit64(asize / vdev_max_ms_count);
|
||||||
|
else
|
||||||
|
ms_shift = vdev_default_ms_shift;
|
||||||
|
|
||||||
|
if (ms_shift < SPA_MAXBLOCKSHIFT) {
|
||||||
|
ms_shift = SPA_MAXBLOCKSHIFT;
|
||||||
|
} else if (ms_shift > vdev_max_ms_shift) {
|
||||||
|
ms_shift = vdev_max_ms_shift;
|
||||||
|
/* cap the total count to constrain memory footprint */
|
||||||
|
if ((asize >> ms_shift) > vdev_ms_count_limit)
|
||||||
|
ms_shift = highbit64(asize / vdev_ms_count_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
vd->vdev_ms_shift = ms_shift;
|
vd->vdev_ms_shift = ms_shift;
|
||||||
@ -4392,13 +4417,16 @@ EXPORT_SYMBOL(vdev_clear);
|
|||||||
/* BEGIN CSTYLED */
|
/* BEGIN CSTYLED */
|
||||||
module_param(vdev_max_ms_count, int, 0644);
|
module_param(vdev_max_ms_count, int, 0644);
|
||||||
MODULE_PARM_DESC(vdev_max_ms_count,
|
MODULE_PARM_DESC(vdev_max_ms_count,
|
||||||
"Divide added vdev into approximately (but no more than) this number "
|
"Target number of metaslabs per top-level vdev");
|
||||||
"of metaslabs");
|
|
||||||
|
|
||||||
module_param(vdev_min_ms_count, int, 0644);
|
module_param(vdev_min_ms_count, int, 0644);
|
||||||
MODULE_PARM_DESC(vdev_min_ms_count,
|
MODULE_PARM_DESC(vdev_min_ms_count,
|
||||||
"Minimum number of metaslabs per top-level vdev");
|
"Minimum number of metaslabs per top-level vdev");
|
||||||
|
|
||||||
|
module_param(vdev_ms_count_limit, int, 0644);
|
||||||
|
MODULE_PARM_DESC(vdev_ms_count_limit,
|
||||||
|
"Practical upper limit of total metaslabs per top-level vdev");
|
||||||
|
|
||||||
module_param(zfs_delays_per_second, uint, 0644);
|
module_param(zfs_delays_per_second, uint, 0644);
|
||||||
MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
|
MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
|
||||||
"IO delays per second");
|
"IO delays per second");
|
||||||
|
Loading…
Reference in New Issue
Block a user