FreeBSD: Hardcode abd_chunk_size to PAGE_SIZE

It makes no sense to set it below PAGE_SIZE, since it increases all
overheads and makes returning memory to OS problematic.  It makes no
sense to set it above PAGE_SIZE, since such allocations and especially
frees are too expensive and cause KVA fragmentation to benefit from
fewer chunks.  After that it makes no sense to keep more complicated
math here.

What may have sense though is just a tunable border between linear and
scatter ABDs, previously also controlled by this tunable.  Retain that
functionality by taking abd_scatter_min_size tunable from Linux, just
with different default value.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Closes #12328
This commit is contained in:
Alexander Motin 2021-07-06 20:39:23 -04:00 committed by Tony Hutter
parent 41b33dce44
commit 49bb454120
2 changed files with 51 additions and 79 deletions

View File

@ -61,7 +61,6 @@ typedef struct abd {
struct abd_scatter { struct abd_scatter {
uint_t abd_offset; uint_t abd_offset;
#if defined(__FreeBSD__) && defined(_KERNEL) #if defined(__FreeBSD__) && defined(_KERNEL)
uint_t abd_chunk_size;
void *abd_chunks[1]; /* actually variable-length */ void *abd_chunks[1]; /* actually variable-length */
#else #else
uint_t abd_nents; uint_t abd_nents;

View File

@ -79,22 +79,29 @@ struct {
} abd_sums; } abd_sums;
/* /*
* The size of the chunks ABD allocates. Because the sizes allocated from the * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
* kmem_cache can't change, this tunable can only be modified at boot. Changing * ABD's for. Smaller allocations will use linear ABD's which use
* it at runtime would cause ABD iteration to work incorrectly for ABDs which * zio_[data_]buf_alloc().
* were allocated with the old size, so a safeguard has been put in place which *
* will cause the machine to panic if you change it and try to access the data * Scatter ABD's use at least one page each, so sub-page allocations waste
* within a scattered ABD. * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
* half of each page). Using linear ABD's for small allocations means that
* they will be put on slabs which contain many allocations.
*
* Linear ABDs for multi-page allocations are easier to use, and in some cases
* it allows to avoid buffer copying. But allocation and especially free
* of multi-page linear ABDs are expensive operations due to KVA mapping and
* unmapping, and with time they cause KVA fragmentations.
*/ */
size_t zfs_abd_chunk_size = 4096; size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
#if defined(_KERNEL) #if defined(_KERNEL)
SYSCTL_DECL(_vfs_zfs); SYSCTL_DECL(_vfs_zfs);
SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN, SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers"); &zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN, SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates"); &zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
#endif #endif
kmem_cache_t *abd_chunk_cache; kmem_cache_t *abd_chunk_cache;
@ -102,23 +109,16 @@ static kstat_t *abd_ksp;
/* /*
* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
* just a single zero'd sized zfs_abd_chunk_size buffer. This * just a single zero'd page-sized buffer. This allows us to conserve
* allows us to conserve memory by only using a single zero buffer * memory by only using a single zero buffer for the scatter chunks.
* for the scatter chunks.
*/ */
abd_t *abd_zero_scatter = NULL; abd_t *abd_zero_scatter = NULL;
static char *abd_zero_buf = NULL; static char *abd_zero_buf = NULL;
static void
abd_free_chunk(void *c)
{
kmem_cache_free(abd_chunk_cache, c);
}
static uint_t static uint_t
abd_chunkcnt_for_bytes(size_t size) abd_chunkcnt_for_bytes(size_t size)
{ {
return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); return ((size + PAGE_MASK) >> PAGE_SHIFT);
} }
static inline uint_t static inline uint_t
@ -132,7 +132,7 @@ abd_scatter_chunkcnt(abd_t *abd)
boolean_t boolean_t
abd_size_alloc_linear(size_t size) abd_size_alloc_linear(size_t size)
{ {
return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE); return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE);
} }
void void
@ -140,7 +140,7 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
{ {
uint_t n = abd_scatter_chunkcnt(abd); uint_t n = abd_scatter_chunkcnt(abd);
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
int waste = n * zfs_abd_chunk_size - abd->abd_size; int waste = (n << PAGE_SHIFT) - abd->abd_size;
if (op == ABDSTAT_INCR) { if (op == ABDSTAT_INCR) {
ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_BUMP(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size); ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
@ -173,11 +173,11 @@ abd_verify_scatter(abd_t *abd)
uint_t i, n; uint_t i, n;
/* /*
* There is no scatter linear pages in FreeBSD so there is an * There is no scatter linear pages in FreeBSD so there is
* if an error if the ABD has been marked as a linear page. * an error if the ABD has been marked as a linear page.
*/ */
ASSERT(!abd_is_linear_page(abd)); ASSERT(!abd_is_linear_page(abd));
ASSERT3U(ABD_SCATTER(abd).abd_offset, <, zfs_abd_chunk_size); ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
n = abd_scatter_chunkcnt(abd); n = abd_scatter_chunkcnt(abd);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL); ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
@ -191,11 +191,9 @@ abd_alloc_chunks(abd_t *abd, size_t size)
n = abd_chunkcnt_for_bytes(size); n = abd_chunkcnt_for_bytes(size);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); ABD_SCATTER(abd).abd_chunks[i] =
ASSERT3P(c, !=, NULL); kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
ABD_SCATTER(abd).abd_chunks[i] = c;
} }
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
} }
void void
@ -205,7 +203,8 @@ abd_free_chunks(abd_t *abd)
n = abd_scatter_chunkcnt(abd); n = abd_scatter_chunkcnt(abd);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]); kmem_cache_free(abd_chunk_cache,
ABD_SCATTER(abd).abd_chunks[i]);
} }
} }
@ -250,15 +249,13 @@ abd_alloc_zero_scatter(void)
uint_t i, n; uint_t i, n;
n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE); n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP); abd_zero_buf = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS; abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
ABD_SCATTER(abd_zero_scatter).abd_offset = 0; ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
zfs_abd_chunk_size;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
ABD_SCATTER(abd_zero_scatter).abd_chunks[i] = ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
@ -266,18 +263,18 @@ abd_alloc_zero_scatter(void)
} }
ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_BUMP(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size); ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
} }
static void static void
abd_free_zero_scatter(void) abd_free_zero_scatter(void)
{ {
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size); ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
abd_free_struct(abd_zero_scatter); abd_free_struct(abd_zero_scatter);
abd_zero_scatter = NULL; abd_zero_scatter = NULL;
kmem_free(abd_zero_buf, zfs_abd_chunk_size); kmem_cache_free(abd_chunk_cache, abd_zero_buf);
} }
static int static int
@ -305,7 +302,7 @@ abd_kstats_update(kstat_t *ksp, int rw)
void void
abd_init(void) abd_init(void)
{ {
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG); NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
wmsum_init(&abd_sums.abdstat_struct_size, 0); wmsum_init(&abd_sums.abdstat_struct_size, 0);
@ -382,7 +379,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
size_t chunkcnt = abd_chunkcnt_for_bytes( size_t chunkcnt = abd_chunkcnt_for_bytes(
(new_offset % zfs_abd_chunk_size) + size); (new_offset & PAGE_MASK) + size);
ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd)); ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
@ -397,7 +394,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
} }
if (abd == NULL) if (abd == NULL)
abd = abd_alloc_struct(chunkcnt * zfs_abd_chunk_size); abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
/* /*
* Even if this buf is filesystem metadata, we only track that * Even if this buf is filesystem metadata, we only track that
@ -405,34 +402,16 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
* this case. Therefore, we don't ever use ABD_FLAG_META here. * this case. Therefore, we don't ever use ABD_FLAG_META here.
*/ */
ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size; ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
/* Copy the scatterlist starting at the correct offset */ /* Copy the scatterlist starting at the correct offset */
(void) memcpy(&ABD_SCATTER(abd).abd_chunks, (void) memcpy(&ABD_SCATTER(abd).abd_chunks,
&ABD_SCATTER(sabd).abd_chunks[new_offset / &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
zfs_abd_chunk_size],
chunkcnt * sizeof (void *)); chunkcnt * sizeof (void *));
return (abd); return (abd);
} }
static inline size_t
abd_iter_scatter_chunk_offset(struct abd_iter *aiter)
{
ASSERT(!abd_is_linear(aiter->iter_abd));
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
aiter->iter_pos) % zfs_abd_chunk_size);
}
static inline size_t
abd_iter_scatter_chunk_index(struct abd_iter *aiter)
{
ASSERT(!abd_is_linear(aiter->iter_abd));
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
aiter->iter_pos) / zfs_abd_chunk_size);
}
/* /*
* Initialize the abd_iter. * Initialize the abd_iter.
*/ */
@ -483,29 +462,25 @@ void
abd_iter_map(struct abd_iter *aiter) abd_iter_map(struct abd_iter *aiter)
{ {
void *paddr; void *paddr;
size_t offset = 0;
ASSERT3P(aiter->iter_mapaddr, ==, NULL); ASSERT3P(aiter->iter_mapaddr, ==, NULL);
ASSERT0(aiter->iter_mapsize); ASSERT0(aiter->iter_mapsize);
/* Panic if someone has changed zfs_abd_chunk_size */
IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size ==
ABD_SCATTER(aiter->iter_abd).abd_chunk_size);
/* There's nothing left to iterate over, so do nothing */ /* There's nothing left to iterate over, so do nothing */
if (abd_iter_at_end(aiter)) if (abd_iter_at_end(aiter))
return; return;
if (abd_is_linear(aiter->iter_abd)) { abd_t *abd = aiter->iter_abd;
offset = aiter->iter_pos; size_t offset = aiter->iter_pos;
aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; if (abd_is_linear(abd)) {
paddr = ABD_LINEAR_BUF(aiter->iter_abd); aiter->iter_mapsize = abd->abd_size - offset;
paddr = ABD_LINEAR_BUF(abd);
} else { } else {
size_t index = abd_iter_scatter_chunk_index(aiter); offset += ABD_SCATTER(abd).abd_offset;
offset = abd_iter_scatter_chunk_offset(aiter); paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset, offset &= PAGE_MASK;
aiter->iter_abd->abd_size - aiter->iter_pos); aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index]; abd->abd_size - aiter->iter_pos);
} }
aiter->iter_mapaddr = (char *)paddr + offset; aiter->iter_mapaddr = (char *)paddr + offset;
} }
@ -517,12 +492,10 @@ abd_iter_map(struct abd_iter *aiter)
void void
abd_iter_unmap(struct abd_iter *aiter) abd_iter_unmap(struct abd_iter *aiter)
{ {
/* There's nothing left to unmap, so do nothing */ if (!abd_iter_at_end(aiter)) {
if (abd_iter_at_end(aiter)) ASSERT3P(aiter->iter_mapaddr, !=, NULL);
return; ASSERT3U(aiter->iter_mapsize, >, 0);
}
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
ASSERT3U(aiter->iter_mapsize, >, 0);
aiter->iter_mapaddr = NULL; aiter->iter_mapaddr = NULL;
aiter->iter_mapsize = 0; aiter->iter_mapsize = 0;