From bdd11cbb90a2afa54fd00935ac0d34b4ddf2515c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 6 Jul 2021 20:39:23 -0400 Subject: [PATCH] FreeBSD: Hardcode abd_chunk_size to PAGE_SIZE It makes no sense to set it below PAGE_SIZE, since it increases all overheads and makes returning memory to OS problematic. It makes no sense to set it above PAGE_SIZE, since such allocations and especially frees are too expensive and cause KVA fragmentation to benefit from fewer chunks. After that it makes no sense to keep more complicated math here. What may have sense though is just a tunable border between linear and scatter ABDs, previously also controlled by this tunable. Retain that functionality by taking abd_scatter_min_size tunable from Linux, just with different default value. Reviewed-by: Brian Behlendorf Reviewed-by: Brian Atkinson Reviewed-by: Ryan Moeller Signed-off-by: Alexander Motin Closes #12328 --- include/sys/abd.h | 1 - module/os/freebsd/zfs/abd_os.c | 129 +++++++++++++-------------------- 2 files changed, 51 insertions(+), 79 deletions(-) diff --git a/include/sys/abd.h b/include/sys/abd.h index a7eee89ca..6903e0c0e 100644 --- a/include/sys/abd.h +++ b/include/sys/abd.h @@ -61,7 +61,6 @@ typedef struct abd { struct abd_scatter { uint_t abd_offset; #if defined(__FreeBSD__) && defined(_KERNEL) - uint_t abd_chunk_size; void *abd_chunks[1]; /* actually variable-length */ #else uint_t abd_nents; diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c index 4f5b33d94..95a83542f 100644 --- a/module/os/freebsd/zfs/abd_os.c +++ b/module/os/freebsd/zfs/abd_os.c @@ -79,22 +79,29 @@ struct { } abd_sums; /* - * The size of the chunks ABD allocates. Because the sizes allocated from the - * kmem_cache can't change, this tunable can only be modified at boot. Changing - * it at runtime would cause ABD iteration to work incorrectly for ABDs which - * were allocated with the old size, so a safeguard has been put in place which - * will cause the machine to panic if you change it and try to access the data - * within a scattered ABD. + * zfs_abd_scatter_min_size is the minimum allocation size to use scatter + * ABD's for. Smaller allocations will use linear ABD's which use + * zio_[data_]buf_alloc(). + * + * Scatter ABD's use at least one page each, so sub-page allocations waste + * some space when allocated as scatter (e.g. 2KB scatter allocation wastes + * half of each page). Using linear ABD's for small allocations means that + * they will be put on slabs which contain many allocations. + * + * Linear ABDs for multi-page allocations are easier to use, and in some cases + * it allows to avoid buffer copying. But allocation and especially free + * of multi-page linear ABDs are expensive operations due to KVA mapping and + * unmapping, and with time they cause KVA fragmentations. */ -size_t zfs_abd_chunk_size = 4096; +size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1; #if defined(_KERNEL) SYSCTL_DECL(_vfs_zfs); SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN, &zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers"); -SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN, - &zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates"); +SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN, + &zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations."); #endif kmem_cache_t *abd_chunk_cache; @@ -102,23 +109,16 @@ static kstat_t *abd_ksp; /* * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are - * just a single zero'd sized zfs_abd_chunk_size buffer. This - * allows us to conserve memory by only using a single zero buffer - * for the scatter chunks. + * just a single zero'd page-sized buffer. This allows us to conserve + * memory by only using a single zero buffer for the scatter chunks. */ abd_t *abd_zero_scatter = NULL; static char *abd_zero_buf = NULL; -static void -abd_free_chunk(void *c) -{ - kmem_cache_free(abd_chunk_cache, c); -} - static uint_t abd_chunkcnt_for_bytes(size_t size) { - return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); + return ((size + PAGE_MASK) >> PAGE_SHIFT); } static inline uint_t @@ -132,7 +132,7 @@ abd_scatter_chunkcnt(abd_t *abd) boolean_t abd_size_alloc_linear(size_t size) { - return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE); + return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE); } void @@ -140,7 +140,7 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) { uint_t n = abd_scatter_chunkcnt(abd); ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); - int waste = n * zfs_abd_chunk_size - abd->abd_size; + int waste = (n << PAGE_SHIFT) - abd->abd_size; if (op == ABDSTAT_INCR) { ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size); @@ -173,11 +173,11 @@ abd_verify_scatter(abd_t *abd) uint_t i, n; /* - * There is no scatter linear pages in FreeBSD so there is an - * if an error if the ABD has been marked as a linear page. + * There is no scatter linear pages in FreeBSD so there is + * an error if the ABD has been marked as a linear page. */ ASSERT(!abd_is_linear_page(abd)); - ASSERT3U(ABD_SCATTER(abd).abd_offset, <, zfs_abd_chunk_size); + ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE); n = abd_scatter_chunkcnt(abd); for (i = 0; i < n; i++) { ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL); @@ -191,11 +191,9 @@ abd_alloc_chunks(abd_t *abd, size_t size) n = abd_chunkcnt_for_bytes(size); for (i = 0; i < n; i++) { - void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); - ASSERT3P(c, !=, NULL); - ABD_SCATTER(abd).abd_chunks[i] = c; + ABD_SCATTER(abd).abd_chunks[i] = + kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); } - ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size; } void @@ -205,7 +203,8 @@ abd_free_chunks(abd_t *abd) n = abd_scatter_chunkcnt(abd); for (i = 0; i < n; i++) { - abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]); + kmem_cache_free(abd_chunk_cache, + ABD_SCATTER(abd).abd_chunks[i]); } } @@ -250,15 +249,13 @@ abd_alloc_zero_scatter(void) uint_t i, n; n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE); - abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP); + abd_zero_buf = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS; abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; ABD_SCATTER(abd_zero_scatter).abd_offset = 0; - ABD_SCATTER(abd_zero_scatter).abd_chunk_size = - zfs_abd_chunk_size; for (i = 0; i < n; i++) { ABD_SCATTER(abd_zero_scatter).abd_chunks[i] = @@ -266,18 +263,18 @@ abd_alloc_zero_scatter(void) } ABDSTAT_BUMP(abdstat_scatter_cnt); - ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size); + ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE); } static void abd_free_zero_scatter(void) { ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); - ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size); + ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE); abd_free_struct(abd_zero_scatter); abd_zero_scatter = NULL; - kmem_free(abd_zero_buf, zfs_abd_chunk_size); + kmem_cache_free(abd_chunk_cache, abd_zero_buf); } static int @@ -305,7 +302,7 @@ abd_kstats_update(kstat_t *ksp, int rw) void abd_init(void) { - abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, + abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0, NULL, NULL, NULL, NULL, 0, KMC_NODEBUG); wmsum_init(&abd_sums.abdstat_struct_size, 0); @@ -382,7 +379,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; size_t chunkcnt = abd_chunkcnt_for_bytes( - (new_offset % zfs_abd_chunk_size) + size); + (new_offset & PAGE_MASK) + size); ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd)); @@ -397,7 +394,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, } if (abd == NULL) - abd = abd_alloc_struct(chunkcnt * zfs_abd_chunk_size); + abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT); /* * Even if this buf is filesystem metadata, we only track that @@ -405,34 +402,16 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, * this case. Therefore, we don't ever use ABD_FLAG_META here. */ - ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size; - ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size; + ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK; /* Copy the scatterlist starting at the correct offset */ (void) memcpy(&ABD_SCATTER(abd).abd_chunks, - &ABD_SCATTER(sabd).abd_chunks[new_offset / - zfs_abd_chunk_size], + &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT], chunkcnt * sizeof (void *)); return (abd); } -static inline size_t -abd_iter_scatter_chunk_offset(struct abd_iter *aiter) -{ - ASSERT(!abd_is_linear(aiter->iter_abd)); - return ((ABD_SCATTER(aiter->iter_abd).abd_offset + - aiter->iter_pos) % zfs_abd_chunk_size); -} - -static inline size_t -abd_iter_scatter_chunk_index(struct abd_iter *aiter) -{ - ASSERT(!abd_is_linear(aiter->iter_abd)); - return ((ABD_SCATTER(aiter->iter_abd).abd_offset + - aiter->iter_pos) / zfs_abd_chunk_size); -} - /* * Initialize the abd_iter. */ @@ -483,29 +462,25 @@ void abd_iter_map(struct abd_iter *aiter) { void *paddr; - size_t offset = 0; ASSERT3P(aiter->iter_mapaddr, ==, NULL); ASSERT0(aiter->iter_mapsize); - /* Panic if someone has changed zfs_abd_chunk_size */ - IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size == - ABD_SCATTER(aiter->iter_abd).abd_chunk_size); - /* There's nothing left to iterate over, so do nothing */ if (abd_iter_at_end(aiter)) return; - if (abd_is_linear(aiter->iter_abd)) { - offset = aiter->iter_pos; - aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; - paddr = ABD_LINEAR_BUF(aiter->iter_abd); + abd_t *abd = aiter->iter_abd; + size_t offset = aiter->iter_pos; + if (abd_is_linear(abd)) { + aiter->iter_mapsize = abd->abd_size - offset; + paddr = ABD_LINEAR_BUF(abd); } else { - size_t index = abd_iter_scatter_chunk_index(aiter); - offset = abd_iter_scatter_chunk_offset(aiter); - aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset, - aiter->iter_abd->abd_size - aiter->iter_pos); - paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index]; + offset += ABD_SCATTER(abd).abd_offset; + paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT]; + offset &= PAGE_MASK; + aiter->iter_mapsize = MIN(PAGE_SIZE - offset, + abd->abd_size - aiter->iter_pos); } aiter->iter_mapaddr = (char *)paddr + offset; } @@ -517,12 +492,10 @@ abd_iter_map(struct abd_iter *aiter) void abd_iter_unmap(struct abd_iter *aiter) { - /* There's nothing left to unmap, so do nothing */ - if (abd_iter_at_end(aiter)) - return; - - ASSERT3P(aiter->iter_mapaddr, !=, NULL); - ASSERT3U(aiter->iter_mapsize, >, 0); + if (!abd_iter_at_end(aiter)) { + ASSERT3P(aiter->iter_mapaddr, !=, NULL); + ASSERT3U(aiter->iter_mapsize, >, 0); + } aiter->iter_mapaddr = NULL; aiter->iter_mapsize = 0;