From b0be93e81a4ace96a799d3705da6832633eb4325 Mon Sep 17 00:00:00 2001 From: Isaac Huang Date: Wed, 31 Aug 2016 00:26:43 -0600 Subject: [PATCH] ABD page support to vdev_disk.c Signed-off-by: Isaac Huang --- include/sys/abd.h | 7 ++++ include/sys/spa.h | 1 - module/zfs/abd.c | 60 +++++++++++++++++++++++++++++++- module/zfs/vdev_disk.c | 79 +++++++++++++----------------------------- 4 files changed, 90 insertions(+), 57 deletions(-) diff --git a/include/sys/abd.h b/include/sys/abd.h index 43aaa7a15..6e3530aec 100644 --- a/include/sys/abd.h +++ b/include/sys/abd.h @@ -32,6 +32,7 @@ #include #ifdef _KERNEL #include +#include #include #endif @@ -112,6 +113,12 @@ int abd_cmp(abd_t *, abd_t *); int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t); void abd_zero_off(abd_t *, size_t, size_t); +#if defined(_KERNEL) && defined(HAVE_SPL) +unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int, + size_t); +unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t); +#endif + /* * Wrappers for calls with offsets of 0 */ diff --git a/include/sys/spa.h b/include/sys/spa.h index c35128c0d..d679e53d6 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -586,7 +586,6 @@ extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, nvlist_t *zplprops); -extern int spa_import_rootpool(char *devpath, char *devid); extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 9fa4a5d43..306c47536 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -999,8 +999,66 @@ abd_cmp(abd_t *dabd, abd_t *sabd) abd_cmp_cb, NULL)); } - #if defined(_KERNEL) && defined(HAVE_SPL) +/* + * bio_nr_pages for ABD. + * @off is the offset in @abd + */ +unsigned long +abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off) +{ + unsigned long pos; + + if (abd_is_linear(abd)) + pos = (unsigned long)abd_to_buf(abd) + off; + else + pos = abd->abd_u.abd_scatter.abd_offset + off; + + return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) + - (pos >> PAGE_SHIFT); +} + +/* + * bio_map for scatter ABD. + * @off is the offset in @abd + * Remaining IO size is returned + */ +unsigned int +abd_scatter_bio_map_off(struct bio *bio, abd_t *abd, + unsigned int io_size, size_t off) +{ + int i; + struct abd_iter aiter; + + ASSERT(!abd_is_linear(abd)); + ASSERT3U(io_size, <=, abd->abd_size - off); + + abd_iter_init(&aiter, abd); + abd_iter_advance(&aiter, off); + + for (i = 0; i < bio->bi_max_vecs; i++) { + struct page *pg; + size_t len, pgoff, index; + + if (io_size <= 0) + break; + + pgoff = abd_iter_scatter_chunk_offset(&aiter); + len = MIN(io_size, PAGESIZE - pgoff); + ASSERT(len > 0); + + index = abd_iter_scatter_chunk_index(&aiter); + pg = abd->abd_u.abd_scatter.abd_chunks[index]; + if (bio_add_page(bio, pg, len, pgoff) != len) + break; + + io_size -= len; + abd_iter_advance(&aiter, len); + } + + return (io_size); +} + /* Tunable Parameters */ module_param(zfs_abd_scatter_enabled, int, 0644); MODULE_PARM_DESC(zfs_abd_scatter_enabled, diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 67759d021..ae6ed4de9 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -43,7 +43,6 @@ static void *zfs_vdev_holder = VDEV_HOLDER; */ typedef struct dio_request { zio_t *dr_zio; /* Parent ZIO */ - void *dr_loanbuf; /* borrowed abd buffer */ atomic_t dr_ref; /* References */ int dr_error; /* Bio error */ int dr_bio_count; /* Count of bio's */ @@ -404,7 +403,6 @@ vdev_disk_dio_put(dio_request_t *dr) */ if (rc == 0) { zio_t *zio = dr->dr_zio; - void *loanbuf = dr->dr_loanbuf; int error = dr->dr_error; vdev_disk_dio_free(dr); @@ -414,14 +412,6 @@ vdev_disk_dio_put(dio_request_t *dr) ASSERT3S(zio->io_error, >=, 0); if (zio->io_error) vdev_disk_error(zio); - /* ABD placeholder */ - if (loanbuf != NULL) { - if (zio->io_type == ZIO_TYPE_READ) { - abd_copy_from_buf(zio->io_abd, loanbuf, - zio->io_size); - } - zio_buf_free(loanbuf, zio->io_size); - } zio_delay_interrupt(zio); } @@ -446,17 +436,10 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error) #endif } - /* Drop reference aquired by __vdev_disk_physio */ + /* Drop reference acquired by __vdev_disk_physio */ rc = vdev_disk_dio_put(dr); } -static inline unsigned long -bio_nr_pages(void *bio_ptr, unsigned int bio_size) -{ - return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >> - PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT)); -} - static unsigned int bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size) { @@ -496,6 +479,15 @@ bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size) return (bio_size); } +static unsigned int +bio_map_abd_off(struct bio *bio, abd_t *abd, unsigned int size, size_t off) +{ + if (abd_is_linear(abd)) + return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, size)); + + return (abd_scatter_bio_map_off(bio, abd, size, off)); +} + #ifndef bio_set_op_attrs #define bio_set_op_attrs(bio, rw, flags) \ do { (bio)->bi_rw |= (rw)|(flags); } while (0) @@ -528,11 +520,11 @@ vdev_submit_bio(struct bio *bio) } static int -__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, - size_t kbuf_size, uint64_t kbuf_offset, int rw, int flags) +__vdev_disk_physio(struct block_device *bdev, zio_t *zio, + size_t io_size, uint64_t io_offset, int rw, int flags) { dio_request_t *dr; - caddr_t bio_ptr; + uint64_t abd_offset; uint64_t bio_offset; int bio_size, bio_count = 16; int i = 0, error = 0; @@ -540,7 +532,8 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, struct blk_plug plug; #endif - ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size); + ASSERT(zio != NULL); + ASSERT3U(io_offset + io_size, <=, bdev->bd_inode->i_size); retry: dr = vdev_disk_dio_alloc(bio_count); @@ -559,32 +552,10 @@ retry: * their volume block size to match the maximum request size and * the common case will be one bio per vdev IO request. */ - if (zio != NULL) { - abd_t *abd = zio->io_abd; - /* - * ABD placeholder - * We can't use abd_borrow_buf routines here since our - * completion context is interrupt and abd refcounts - * take a mutex (in debug mode). - */ - if (abd_is_linear(abd)) { - bio_ptr = abd_to_buf(abd); - dr->dr_loanbuf = NULL; - } else { - bio_ptr = zio_buf_alloc(zio->io_size); - dr->dr_loanbuf = bio_ptr; - if (zio->io_type != ZIO_TYPE_READ) - abd_copy_to_buf(bio_ptr, abd, zio->io_size); - - } - } else { - bio_ptr = kbuf_ptr; - dr->dr_loanbuf = NULL; - } - - bio_offset = kbuf_offset; - bio_size = kbuf_size; + abd_offset = 0; + bio_offset = io_offset; + bio_size = io_size; for (i = 0; i <= dr->dr_bio_count; i++) { /* Finished constructing bio's for given buffer */ @@ -597,8 +568,6 @@ retry: * are needed we allocate a larger dio and warn the user. */ if (dr->dr_bio_count == i) { - if (dr->dr_loanbuf) - zio_buf_free(dr->dr_loanbuf, zio->io_size); vdev_disk_dio_free(dr); bio_count *= 2; goto retry; @@ -606,10 +575,9 @@ retry: /* bio_alloc() with __GFP_WAIT never returns NULL */ dr->dr_bio[i] = bio_alloc(GFP_NOIO, - MIN(bio_nr_pages(bio_ptr, bio_size), BIO_MAX_PAGES)); + MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset), + BIO_MAX_PAGES)); if (unlikely(dr->dr_bio[i] == NULL)) { - if (dr->dr_loanbuf) - zio_buf_free(dr->dr_loanbuf, zio->io_size); vdev_disk_dio_free(dr); return (ENOMEM); } @@ -624,10 +592,11 @@ retry: bio_set_op_attrs(dr->dr_bio[i], rw, flags); /* Remaining size is returned to become the new size */ - bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size); + bio_size = bio_map_abd_off(dr->dr_bio[i], zio->io_abd, + bio_size, abd_offset); /* Advance in buffer and construct another bio if needed */ - bio_ptr += BIO_BI_SIZE(dr->dr_bio[i]); + abd_offset += BIO_BI_SIZE(dr->dr_bio[i]); bio_offset += BIO_BI_SIZE(dr->dr_bio[i]); } @@ -769,7 +738,7 @@ vdev_disk_io_start(zio_t *zio) } zio->io_target_timestamp = zio_handle_io_delay(zio); - error = __vdev_disk_physio(vd->vd_bdev, zio, NULL, + error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_size, zio->io_offset, rw, flags); if (error) { zio->io_error = error;