From 285b29d959d3792e45d75c2ce228552d396b445f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Dec 2014 16:40:21 -0800 Subject: [PATCH] Revert "Pre-allocate vdev I/O buffers" Commit 86dd0fd added preallocated I/O buffers. This is no longer required after the recent kmem changes designed to make our memory allocation interfaces behave more like those found on Illumos. A deadlock in this situation is no longer possible. However, these allocations still have the potential to be expensive. So a potential future optimization might be to perform then KM_NOSLEEP so that they either succeed of fail quicky. Either case is acceptable here because we can safely abort the aggregation. Signed-off-by: Brian Behlendorf --- include/sys/vdev_impl.h | 7 ------- include/sys/zio.h | 2 -- module/zfs/vdev_queue.c | 43 ++--------------------------------------- module/zfs/zio.c | 22 --------------------- 4 files changed, 2 insertions(+), 72 deletions(-) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index c0c8f5287..a8dc9510e 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -50,7 +50,6 @@ extern "C" { * Forward declarations that lots of things need. */ typedef struct vdev_queue vdev_queue_t; -typedef struct vdev_io vdev_io_t; typedef struct vdev_cache vdev_cache_t; typedef struct vdev_cache_entry vdev_cache_entry_t; @@ -117,16 +116,10 @@ struct vdev_queue { uint64_t vq_last_offset; hrtime_t vq_io_complete_ts; /* time last i/o completed */ hrtime_t vq_io_delta_ts; - list_t vq_io_list; zio_t vq_io_search; /* used as local for stack reduction */ kmutex_t vq_lock; }; -struct vdev_io { - char vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */ - list_node_t vi_node; -}; - /* * Virtual device descriptor */ diff --git a/include/sys/zio.h b/include/sys/zio.h index e10e965e6..18e7a40a3 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -522,8 +522,6 @@ extern void *zio_buf_alloc(size_t size); extern void zio_buf_free(void *buf, size_t size); extern void *zio_data_buf_alloc(size_t size); extern void zio_data_buf_free(void *buf, size_t size); -extern void *zio_vdev_alloc(void); -extern void zio_vdev_free(void *buf); extern void zio_resubmit_stage_async(void *); diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 34e4420da..3fa4219f2 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -328,9 +328,7 @@ void vdev_queue_init(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; - int max_active_sum; zio_priority_t p; - int i; mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); vq->vq_vdev = vd; @@ -352,39 +350,18 @@ vdev_queue_init(vdev_t *vd) vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_queue_node)); } - - /* - * A list of buffers which can be used for aggregate I/O, this - * avoids the need to allocate them on demand when memory is low. - */ - list_create(&vq->vq_io_list, sizeof (vdev_io_t), - offsetof(vdev_io_t, vi_node)); - - max_active_sum = zfs_vdev_sync_read_max_active + - zfs_vdev_sync_write_max_active + zfs_vdev_async_read_max_active + - zfs_vdev_async_write_max_active + zfs_vdev_scrub_max_active; - for (i = 0; i < max_active_sum; i++) - list_insert_tail(&vq->vq_io_list, zio_vdev_alloc()); } void vdev_queue_fini(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; - vdev_io_t *vi; zio_priority_t p; for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) avl_destroy(&vq->vq_class[p].vqc_queued_tree); avl_destroy(&vq->vq_active_tree); - while ((vi = list_head(&vq->vq_io_list)) != NULL) { - list_remove(&vq->vq_io_list, vi); - zio_vdev_free(vi); - } - - list_destroy(&vq->vq_io_list); - mutex_destroy(&vq->vq_lock); } @@ -468,9 +445,6 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) static void vdev_queue_agg_io_done(zio_t *aio) { - vdev_queue_t *vq = &aio->io_vd->vdev_queue; - vdev_io_t *vi = aio->io_data; - if (aio->io_type == ZIO_TYPE_READ) { zio_t *pio; while ((pio = zio_walk_parents(aio)) != NULL) { @@ -479,9 +453,7 @@ vdev_queue_agg_io_done(zio_t *aio) } } - mutex_enter(&vq->vq_lock); - list_insert_tail(&vq->vq_io_list, vi); - mutex_exit(&vq->vq_lock); + zio_buf_free(aio->io_data, aio->io_size); } /* @@ -496,7 +468,6 @@ vdev_queue_agg_io_done(zio_t *aio) static zio_t * vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) { - vdev_io_t *vi; zio_t *first, *last, *aio, *dio, *mandatory, *nio; uint64_t maxgap = 0; uint64_t size; @@ -529,12 +500,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (zio->io_type == ZIO_TYPE_READ) maxgap = zfs_vdev_read_gap_limit; - vi = list_head(&vq->vq_io_list); - if (vi == NULL) { - vi = zio_vdev_alloc(); - list_insert_head(&vq->vq_io_list, vi); - } - /* * We can aggregate I/Os that are sufficiently adjacent and of * the same flavor, as expressed by the AGG_INHERIT flags. @@ -622,13 +587,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (first == last) return (NULL); - ASSERT(vi != NULL); - size = IO_SPAN(first, last); ASSERT3U(size, <=, zfs_vdev_aggregation_limit); aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, - vi, size, first->io_type, zio->io_priority, + zio_buf_alloc(size), size, first->io_type, zio->io_priority, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL); aio->io_timestamp = first->io_timestamp; @@ -655,8 +618,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) zio_execute(dio); } while (dio != last); - list_remove(&vq->vq_io_list, vi); - return (aio); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index b4e19067f..6fa23d2fc 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -55,7 +55,6 @@ const char *zio_type_name[ZIO_TYPES] = { */ kmem_cache_t *zio_cache; kmem_cache_t *zio_link_cache; -kmem_cache_t *zio_vdev_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; int zio_bulk_flags = 0; @@ -132,8 +131,6 @@ zio_init(void) zio_cons, zio_dest, NULL, NULL, NULL, 0); zio_link_cache = kmem_cache_create("zio_link_cache", sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); - zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof (vdev_io_t), - PAGESIZE, NULL, NULL, NULL, NULL, NULL, 0); /* * For small buffers, we want a cache for each multiple of @@ -218,7 +215,6 @@ zio_fini(void) zio_data_buf_cache[c] = NULL; } - kmem_cache_destroy(zio_vdev_cache); kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_cache); @@ -285,24 +281,6 @@ zio_data_buf_free(void *buf, size_t size) kmem_cache_free(zio_data_buf_cache[c], buf); } -/* - * Dedicated I/O buffers to ensure that memory fragmentation never prevents - * or significantly delays the issuing of a zio. These buffers are used - * to aggregate I/O and could be used for raidz stripes. - */ -void * -zio_vdev_alloc(void) -{ - return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE)); -} - -void -zio_vdev_free(void *buf) -{ - kmem_cache_free(zio_vdev_cache, buf); - -} - /* * ========================================================================== * Push and pop I/O transform buffers