From 787acae0b5cd139ea0f9fa60558cca28d4673b23 Mon Sep 17 00:00:00 2001 From: Giuseppe Di Natale Date: Sat, 16 Sep 2017 11:00:19 -0700 Subject: [PATCH] Linux 3.14 compat: IO acct, global_page_state, etc generic_start_io_acct/generic_end_io_acct in the master branch of the linux kernel requires that the request_queue be provided. Move the logic from freemem in the spl to arc_free_memory in arc.c. Do this so we can take advantage of global_page_state interface checks in zfs. Upstream kernel replaced struct block_device with struct gendisk in struct bio. Determine if the function bio_set_dev exists during configure and have zfs use that if it exists. bio_set_dev https://github.com/torvalds/linux/commit/74d4699 global_node_page_state https://github.com/torvalds/linux/commit/75ef718 io acct https://github.com/torvalds/linux/commit/d62e26b Reviewed-by: Brian Behlendorf Signed-off-by: Giuseppe Di Natale Closes #6635 --- config/kernel-bio_set_dev.m4 | 22 ++++++++++++++++++++ config/kernel-generic_io_acct.m4 | 35 ++++++++++++++++++++++++++++---- config/kernel.m4 | 4 +++- include/linux/blkdev_compat.h | 18 ++++++++++++---- module/zfs/arc.c | 22 ++++++++++++++++++-- module/zfs/vdev_disk.c | 12 +++++++++-- module/zfs/zvol.c | 18 ++++++++++------ 7 files changed, 112 insertions(+), 19 deletions(-) create mode 100644 config/kernel-bio_set_dev.m4 diff --git a/config/kernel-bio_set_dev.m4 b/config/kernel-bio_set_dev.m4 new file mode 100644 index 000000000..6be873c56 --- /dev/null +++ b/config/kernel-bio_set_dev.m4 @@ -0,0 +1,22 @@ +dnl # +dnl # Linux 4.14 API, +dnl # +dnl # The bio_set_dev() helper was introduced as part of the transition +dnl # to have struct gendisk in struct bio. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [ + AC_MSG_CHECKING([whether bio_set_dev() exists]) + ZFS_LINUX_TRY_COMPILE([ + #include + #include + ],[ + struct block_device *bdev = NULL; + struct bio *bio = NULL; + bio_set_dev(bio, bdev); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-generic_io_acct.m4 b/config/kernel-generic_io_acct.m4 index 25bfa3848..0aa762162 100644 --- a/config/kernel-generic_io_acct.m4 +++ b/config/kernel-generic_io_acct.m4 @@ -4,8 +4,8 @@ dnl # dnl # torvalds/linux@394ffa503bc40e32d7f54a9b817264e81ce131b4 allows us to dnl # increment iostat counters without generic_make_request(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ - AC_MSG_CHECKING([whether generic IO accounting symbols are avaliable]) +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG], [ + AC_MSG_CHECKING([whether 3 arg generic IO accounting symbols are available]) ZFS_LINUX_TRY_COMPILE_SYMBOL([ #include @@ -18,8 +18,35 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ generic_end_io_acct(0, NULL, 0); ], [generic_start_io_acct], [block/bio.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT, 1, - [generic_start_io_acct()/generic_end_io_acct() avaliable]) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, + [generic_start_io_acct()/generic_end_io_acct() available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # Linux 4.14 API, +dnl # +dnl # generic_start_io_acct/generic_end_io_acct now require request_queue to be +dnl # provided. No functional changes, but preparation for inflight accounting +dnl # +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG], [ + AC_MSG_CHECKING([whether 4 arg generic IO accounting symbols are available]) + ZFS_LINUX_TRY_COMPILE_SYMBOL([ + #include + + void (*generic_start_io_acct_f)(struct request_queue *, int, + unsigned long, struct hd_struct *) = &generic_start_io_acct; + void (*generic_end_io_acct_f)(struct request_queue *, int, + struct hd_struct *, unsigned long) = &generic_end_io_acct; + ], [ + generic_start_io_acct(NULL, 0, 0, NULL); + generic_end_io_acct(NULL, 0, NULL, 0); + ], [generic_start_io_acct], [block/bio.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, + [generic_start_io_acct()/generic_end_io_acct() 4 arg available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 3739f85fb..c3ed5cf33 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -24,6 +24,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE ZFS_AC_KERNEL_BIO_BVEC_ITER ZFS_AC_KERNEL_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_BIO_SET_DEV ZFS_AC_KERNEL_REQ_FAILFAST_MASK ZFS_AC_KERNEL_REQ_OP_DISCARD ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE @@ -113,7 +114,8 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN - ZFS_AC_KERNEL_GENERIC_IO_ACCT + ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG + ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG ZFS_AC_KERNEL_FPU ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 428664a0b..c8a8e856d 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -598,16 +598,26 @@ blk_queue_discard_granularity(struct request_queue *q, unsigned int dg) */ #define VDEV_HOLDER ((void *)0x2401de7) -#ifndef HAVE_GENERIC_IO_ACCT static inline void -generic_start_io_acct(int rw, unsigned long sectors, struct hd_struct *part) +blk_generic_start_io_acct(struct request_queue *q, int rw, + unsigned long sectors, struct hd_struct *part) { +#if defined(HAVE_GENERIC_IO_ACCT_3ARG) + generic_start_io_acct(rw, sectors, part); +#elif defined(HAVE_GENERIC_IO_ACCT_4ARG) + generic_start_io_acct(q, rw, sectors, part); +#endif } static inline void -generic_end_io_acct(int rw, struct hd_struct *part, unsigned long start_time) +blk_generic_end_io_acct(struct request_queue *q, int rw, + struct hd_struct *part, unsigned long start_time) { -} +#if defined(HAVE_GENERIC_IO_ACCT_3ARG) + generic_end_io_acct(rw, part, start_time); +#elif defined(HAVE_GENERIC_IO_ACCT_4ARG) + generic_end_io_acct(q, rw, part, start_time); #endif +} #endif /* _ZFS_BLKDEV_H */ diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 75f0af9d5..57bf6b83c 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -4665,6 +4665,24 @@ arc_all_memory(void) #endif } +#ifdef _KERNEL +static uint64_t +arc_free_memory(void) +{ +#ifdef ZFS_GLOBAL_NODE_PAGE_STATE + return (nr_free_pages() + + global_node_page_state(NR_INACTIVE_FILE) + + global_node_page_state(NR_INACTIVE_ANON) + + global_node_page_state(NR_SLAB_RECLAIMABLE)); +#else + return (nr_free_pages() + + global_page_state(NR_INACTIVE_FILE) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_SLAB_RECLAIMABLE)); +#endif +} +#endif + typedef enum free_memory_reason_t { FMR_UNKNOWN, FMR_NEEDFREE, @@ -4701,7 +4719,7 @@ arc_available_memory(void) int64_t lowest = INT64_MAX; free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL - uint64_t available_memory = ptob(freemem); + uint64_t available_memory = ptob(arc_free_memory()); int64_t n; #ifdef __linux__ pgcnt_t needfree = btop(arc_need_free); @@ -6904,7 +6922,7 @@ static int arc_memory_throttle(uint64_t reserve, uint64_t txg) { #ifdef _KERNEL - uint64_t available_memory = ptob(freemem); + uint64_t available_memory = ptob(arc_free_memory()); static uint64_t page_load = 0; static uint64_t last_txg = 0; #ifdef __linux__ diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index c6bb24a33..87cc2c892 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -503,6 +503,14 @@ vdev_submit_bio_impl(struct bio *bio) #endif } +#ifndef HAVE_BIO_SET_DEV +static inline void +bio_set_dev(struct bio *bio, struct block_device *bdev) +{ + bio->bi_bdev = bdev; +} +#endif /* !HAVE_BIO_SET_DEV */ + static inline void vdev_submit_bio(struct bio *bio) { @@ -585,7 +593,7 @@ retry: /* Matching put called by vdev_disk_physio_completion */ vdev_disk_dio_get(dr); - dr->dr_bio[i]->bi_bdev = bdev; + bio_set_dev(dr->dr_bio[i], bdev); BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9; dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; dr->dr_bio[i]->bi_private = dr; @@ -659,7 +667,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) bio->bi_end_io = vdev_disk_io_flush_completion; bio->bi_private = zio; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_flush(bio); vdev_submit_bio(bio); invalidate_bdev(bdev); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 1cb03fc7e..fe7dbb371 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -761,7 +761,8 @@ zvol_write(void *arg) ASSERT(zv && zv->zv_open_count > 0); start_jif = jiffies; - generic_start_io_acct(WRITE, bio_sectors(bio), &zv->zv_disk->part0); + blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio), + &zv->zv_disk->part0); sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; @@ -794,7 +795,8 @@ zvol_write(void *arg) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); - generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif); + blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0, + start_jif); BIO_END_IO(bio, -error); kmem_free(zvr, sizeof (zv_request_t)); } @@ -840,7 +842,8 @@ zvol_discard(void *arg) ASSERT(zv && zv->zv_open_count > 0); start_jif = jiffies; - generic_start_io_acct(WRITE, bio_sectors(bio), &zv->zv_disk->part0); + blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio), + &zv->zv_disk->part0); sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; @@ -881,7 +884,8 @@ unlock: zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); - generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif); + blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0, + start_jif); BIO_END_IO(bio, -error); kmem_free(zvr, sizeof (zv_request_t)); } @@ -902,7 +906,8 @@ zvol_read(void *arg) ASSERT(zv && zv->zv_open_count > 0); start_jif = jiffies; - generic_start_io_acct(READ, bio_sectors(bio), &zv->zv_disk->part0); + blk_generic_start_io_acct(zv->zv_queue, READ, bio_sectors(bio), + &zv->zv_disk->part0); while (uio.uio_resid > 0 && uio.uio_loffset < volsize) { uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1); @@ -922,7 +927,8 @@ zvol_read(void *arg) zfs_range_unlock(zvr->rl); rw_exit(&zv->zv_suspend_lock); - generic_end_io_acct(READ, &zv->zv_disk->part0, start_jif); + blk_generic_end_io_acct(zv->zv_queue, READ, &zv->zv_disk->part0, + start_jif); BIO_END_IO(bio, -error); kmem_free(zvr, sizeof (zv_request_t)); }