mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 11:18:52 +03:00
fix abd_nr_pages_off for gang abd
`__vdev_disk_physio()` uses `abd_nr_pages_off()` to allocate a bio with a sufficient number of iovec's to process this zio (i.e. `nr_iovecs`/`bi_max_vecs`). If there are not enough iovec's in the bio, then additional bio's will be allocated. However, this is a sub-optimal code path. In particular, it requires several abd calls (to `abd_nr_pages_off()` and `abd_bio_map_off()`) which will have to walk the constituents of the ABD (the pages or the gang children) because they are looking for offsets > 0. For gang ABD's, `abd_nr_pages_off()` returns the number of iovec's needed for the first constituent, rather than the sum of all constituents (within the requested range). This always under-estimates the required number of iovec's, which causes us to always need several bio's. The end result is that `__vdev_disk_physio()` is usually O(n^2) for gang ABD's (and occasionally O(n^3), when more than 16 bio's are needed). This commit fixes `abd_nr_pages_off()`'s handling of gang ABD's, to correctly determine how many iovec's are needed, by adding up the number of iovec's for each of the gang children in the requested range. Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Brian Atkinson <batkinson@lanl.gov> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #11536
This commit is contained in:
@@ -925,17 +925,28 @@ abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
|
||||
{
|
||||
unsigned long pos;
|
||||
|
||||
while (abd_is_gang(abd))
|
||||
abd = abd_gang_get_offset(abd, &off);
|
||||
if (abd_is_gang(abd)) {
|
||||
unsigned long count = 0;
|
||||
|
||||
for (abd_t *cabd = abd_gang_get_offset(abd, &off);
|
||||
cabd != NULL && size != 0;
|
||||
cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
|
||||
ASSERT3U(off, <, cabd->abd_size);
|
||||
int mysize = MIN(size, cabd->abd_size - off);
|
||||
count += abd_nr_pages_off(cabd, mysize, off);
|
||||
size -= mysize;
|
||||
off = 0;
|
||||
}
|
||||
return (count);
|
||||
}
|
||||
|
||||
ASSERT(!abd_is_gang(abd));
|
||||
if (abd_is_linear(abd))
|
||||
pos = (unsigned long)abd_to_buf(abd) + off;
|
||||
else
|
||||
pos = ABD_SCATTER(abd).abd_offset + off;
|
||||
|
||||
return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
|
||||
(pos >> PAGE_SHIFT);
|
||||
return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
|
||||
(pos >> PAGE_SHIFT));
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
@@ -1010,7 +1021,6 @@ unsigned int
|
||||
abd_bio_map_off(struct bio *bio, abd_t *abd,
|
||||
unsigned int io_size, size_t off)
|
||||
{
|
||||
int i;
|
||||
struct abd_iter aiter;
|
||||
|
||||
ASSERT3U(io_size, <=, abd->abd_size - off);
|
||||
@@ -1024,7 +1034,7 @@ abd_bio_map_off(struct bio *bio, abd_t *abd,
|
||||
abd_iter_init(&aiter, abd);
|
||||
abd_iter_advance(&aiter, off);
|
||||
|
||||
for (i = 0; i < bio->bi_max_vecs; i++) {
|
||||
for (int i = 0; i < bio->bi_max_vecs; i++) {
|
||||
struct page *pg;
|
||||
size_t len, sgoff, pgoff;
|
||||
struct scatterlist *sg;
|
||||
|
||||
Reference in New Issue
Block a user