mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-01 05:49:35 +03:00
FreeBSD: Hardcode abd_chunk_size to PAGE_SIZE
It makes no sense to set it below PAGE_SIZE, since it increases all overheads and makes returning memory to OS problematic. It makes no sense to set it above PAGE_SIZE, since such allocations and especially frees are too expensive and cause KVA fragmentation to benefit from fewer chunks. After that it makes no sense to keep more complicated math here. What may have sense though is just a tunable border between linear and scatter ABDs, previously also controlled by this tunable. Retain that functionality by taking abd_scatter_min_size tunable from Linux, just with different default value. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Brian Atkinson <batkinson@lanl.gov> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Closes #12328
This commit is contained in:
parent
41b33dce44
commit
49bb454120
@ -61,7 +61,6 @@ typedef struct abd {
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||
uint_t abd_chunk_size;
|
||||
void *abd_chunks[1]; /* actually variable-length */
|
||||
#else
|
||||
uint_t abd_nents;
|
||||
|
@ -79,22 +79,29 @@ struct {
|
||||
} abd_sums;
|
||||
|
||||
/*
|
||||
* The size of the chunks ABD allocates. Because the sizes allocated from the
|
||||
* kmem_cache can't change, this tunable can only be modified at boot. Changing
|
||||
* it at runtime would cause ABD iteration to work incorrectly for ABDs which
|
||||
* were allocated with the old size, so a safeguard has been put in place which
|
||||
* will cause the machine to panic if you change it and try to access the data
|
||||
* within a scattered ABD.
|
||||
* zfs_abd_scatter_min_size is the minimum allocation size to use scatter
|
||||
* ABD's for. Smaller allocations will use linear ABD's which use
|
||||
* zio_[data_]buf_alloc().
|
||||
*
|
||||
* Scatter ABD's use at least one page each, so sub-page allocations waste
|
||||
* some space when allocated as scatter (e.g. 2KB scatter allocation wastes
|
||||
* half of each page). Using linear ABD's for small allocations means that
|
||||
* they will be put on slabs which contain many allocations.
|
||||
*
|
||||
* Linear ABDs for multi-page allocations are easier to use, and in some cases
|
||||
* it allows to avoid buffer copying. But allocation and especially free
|
||||
* of multi-page linear ABDs are expensive operations due to KVA mapping and
|
||||
* unmapping, and with time they cause KVA fragmentations.
|
||||
*/
|
||||
size_t zfs_abd_chunk_size = 4096;
|
||||
size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
|
||||
|
||||
#if defined(_KERNEL)
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN,
|
||||
&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
|
||||
#endif
|
||||
|
||||
kmem_cache_t *abd_chunk_cache;
|
||||
@ -102,23 +109,16 @@ static kstat_t *abd_ksp;
|
||||
|
||||
/*
|
||||
* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
|
||||
* just a single zero'd sized zfs_abd_chunk_size buffer. This
|
||||
* allows us to conserve memory by only using a single zero buffer
|
||||
* for the scatter chunks.
|
||||
* just a single zero'd page-sized buffer. This allows us to conserve
|
||||
* memory by only using a single zero buffer for the scatter chunks.
|
||||
*/
|
||||
abd_t *abd_zero_scatter = NULL;
|
||||
static char *abd_zero_buf = NULL;
|
||||
|
||||
static void
|
||||
abd_free_chunk(void *c)
|
||||
{
|
||||
kmem_cache_free(abd_chunk_cache, c);
|
||||
}
|
||||
|
||||
static uint_t
|
||||
abd_chunkcnt_for_bytes(size_t size)
|
||||
{
|
||||
return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size);
|
||||
return ((size + PAGE_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static inline uint_t
|
||||
@ -132,7 +132,7 @@ abd_scatter_chunkcnt(abd_t *abd)
|
||||
boolean_t
|
||||
abd_size_alloc_linear(size_t size)
|
||||
{
|
||||
return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE);
|
||||
return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
@ -140,7 +140,7 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
uint_t n = abd_scatter_chunkcnt(abd);
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
int waste = n * zfs_abd_chunk_size - abd->abd_size;
|
||||
int waste = (n << PAGE_SHIFT) - abd->abd_size;
|
||||
if (op == ABDSTAT_INCR) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||
@ -173,11 +173,11 @@ abd_verify_scatter(abd_t *abd)
|
||||
uint_t i, n;
|
||||
|
||||
/*
|
||||
* There is no scatter linear pages in FreeBSD so there is an
|
||||
* if an error if the ABD has been marked as a linear page.
|
||||
* There is no scatter linear pages in FreeBSD so there is
|
||||
* an error if the ABD has been marked as a linear page.
|
||||
*/
|
||||
ASSERT(!abd_is_linear_page(abd));
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <, zfs_abd_chunk_size);
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
|
||||
n = abd_scatter_chunkcnt(abd);
|
||||
for (i = 0; i < n; i++) {
|
||||
ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
|
||||
@ -191,11 +191,9 @@ abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
|
||||
n = abd_chunkcnt_for_bytes(size);
|
||||
for (i = 0; i < n; i++) {
|
||||
void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
ASSERT3P(c, !=, NULL);
|
||||
ABD_SCATTER(abd).abd_chunks[i] = c;
|
||||
ABD_SCATTER(abd).abd_chunks[i] =
|
||||
kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
}
|
||||
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
|
||||
}
|
||||
|
||||
void
|
||||
@ -205,7 +203,8 @@ abd_free_chunks(abd_t *abd)
|
||||
|
||||
n = abd_scatter_chunkcnt(abd);
|
||||
for (i = 0; i < n; i++) {
|
||||
abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]);
|
||||
kmem_cache_free(abd_chunk_cache,
|
||||
ABD_SCATTER(abd).abd_chunks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -250,15 +249,13 @@ abd_alloc_zero_scatter(void)
|
||||
uint_t i, n;
|
||||
|
||||
n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
|
||||
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
|
||||
abd_zero_buf = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
|
||||
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
|
||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
|
||||
|
||||
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
|
||||
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
|
||||
zfs_abd_chunk_size;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
|
||||
@ -266,18 +263,18 @@ abd_alloc_zero_scatter(void)
|
||||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
abd_free_zero_scatter(void)
|
||||
{
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
|
||||
|
||||
abd_free_struct(abd_zero_scatter);
|
||||
abd_zero_scatter = NULL;
|
||||
kmem_free(abd_zero_buf, zfs_abd_chunk_size);
|
||||
kmem_cache_free(abd_chunk_cache, abd_zero_buf);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -305,7 +302,7 @@ abd_kstats_update(kstat_t *ksp, int rw)
|
||||
void
|
||||
abd_init(void)
|
||||
{
|
||||
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
|
||||
abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
|
||||
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
|
||||
|
||||
wmsum_init(&abd_sums.abdstat_struct_size, 0);
|
||||
@ -382,7 +379,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
|
||||
|
||||
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
|
||||
size_t chunkcnt = abd_chunkcnt_for_bytes(
|
||||
(new_offset % zfs_abd_chunk_size) + size);
|
||||
(new_offset & PAGE_MASK) + size);
|
||||
|
||||
ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
|
||||
|
||||
@ -397,7 +394,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
|
||||
}
|
||||
|
||||
if (abd == NULL)
|
||||
abd = abd_alloc_struct(chunkcnt * zfs_abd_chunk_size);
|
||||
abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
|
||||
|
||||
/*
|
||||
* Even if this buf is filesystem metadata, we only track that
|
||||
@ -405,34 +402,16 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
|
||||
* this case. Therefore, we don't ever use ABD_FLAG_META here.
|
||||
*/
|
||||
|
||||
ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size;
|
||||
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
|
||||
ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
|
||||
|
||||
/* Copy the scatterlist starting at the correct offset */
|
||||
(void) memcpy(&ABD_SCATTER(abd).abd_chunks,
|
||||
&ABD_SCATTER(sabd).abd_chunks[new_offset /
|
||||
zfs_abd_chunk_size],
|
||||
&ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
|
||||
chunkcnt * sizeof (void *));
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_offset(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
|
||||
aiter->iter_pos) % zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_index(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
|
||||
aiter->iter_pos) / zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the abd_iter.
|
||||
*/
|
||||
@ -483,29 +462,25 @@ void
|
||||
abd_iter_map(struct abd_iter *aiter)
|
||||
{
|
||||
void *paddr;
|
||||
size_t offset = 0;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* Panic if someone has changed zfs_abd_chunk_size */
|
||||
IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size ==
|
||||
ABD_SCATTER(aiter->iter_abd).abd_chunk_size);
|
||||
|
||||
/* There's nothing left to iterate over, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
offset = aiter->iter_pos;
|
||||
aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
|
||||
paddr = ABD_LINEAR_BUF(aiter->iter_abd);
|
||||
abd_t *abd = aiter->iter_abd;
|
||||
size_t offset = aiter->iter_pos;
|
||||
if (abd_is_linear(abd)) {
|
||||
aiter->iter_mapsize = abd->abd_size - offset;
|
||||
paddr = ABD_LINEAR_BUF(abd);
|
||||
} else {
|
||||
size_t index = abd_iter_scatter_chunk_index(aiter);
|
||||
offset = abd_iter_scatter_chunk_offset(aiter);
|
||||
aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index];
|
||||
offset += ABD_SCATTER(abd).abd_offset;
|
||||
paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
|
||||
offset &= PAGE_MASK;
|
||||
aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
|
||||
abd->abd_size - aiter->iter_pos);
|
||||
}
|
||||
aiter->iter_mapaddr = (char *)paddr + offset;
|
||||
}
|
||||
@ -517,12 +492,10 @@ abd_iter_map(struct abd_iter *aiter)
|
||||
void
|
||||
abd_iter_unmap(struct abd_iter *aiter)
|
||||
{
|
||||
/* There's nothing left to unmap, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
if (!abd_iter_at_end(aiter)) {
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
}
|
||||
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user