Revert "Pre-allocate vdev I/O buffers"

Commit 86dd0fd added preallocated I/O buffers.  This is no longer
required after the recent kmem changes designed to make our memory
allocation interfaces behave more like those found on Illumos.  A
deadlock in this situation is no longer possible.

However, these allocations still have the potential to be expensive.
So a potential future optimization might be to perform then KM_NOSLEEP
so that they either succeed of fail quicky.  Either case is acceptable
here because we can safely abort the aggregation.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2014-12-12 16:40:21 -08:00
parent 60e1eda929
commit 285b29d959
4 changed files with 2 additions and 72 deletions

View File

@ -50,7 +50,6 @@ extern "C" {
* Forward declarations that lots of things need. * Forward declarations that lots of things need.
*/ */
typedef struct vdev_queue vdev_queue_t; typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_io vdev_io_t;
typedef struct vdev_cache vdev_cache_t; typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t; typedef struct vdev_cache_entry vdev_cache_entry_t;
@ -117,16 +116,10 @@ struct vdev_queue {
uint64_t vq_last_offset; uint64_t vq_last_offset;
hrtime_t vq_io_complete_ts; /* time last i/o completed */ hrtime_t vq_io_complete_ts; /* time last i/o completed */
hrtime_t vq_io_delta_ts; hrtime_t vq_io_delta_ts;
list_t vq_io_list;
zio_t vq_io_search; /* used as local for stack reduction */ zio_t vq_io_search; /* used as local for stack reduction */
kmutex_t vq_lock; kmutex_t vq_lock;
}; };
struct vdev_io {
char vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */
list_node_t vi_node;
};
/* /*
* Virtual device descriptor * Virtual device descriptor
*/ */

View File

@ -522,8 +522,6 @@ extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size); extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size); extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size); extern void zio_data_buf_free(void *buf, size_t size);
extern void *zio_vdev_alloc(void);
extern void zio_vdev_free(void *buf);
extern void zio_resubmit_stage_async(void *); extern void zio_resubmit_stage_async(void *);

View File

@ -328,9 +328,7 @@ void
vdev_queue_init(vdev_t *vd) vdev_queue_init(vdev_t *vd)
{ {
vdev_queue_t *vq = &vd->vdev_queue; vdev_queue_t *vq = &vd->vdev_queue;
int max_active_sum;
zio_priority_t p; zio_priority_t p;
int i;
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
vq->vq_vdev = vd; vq->vq_vdev = vd;
@ -352,39 +350,18 @@ vdev_queue_init(vdev_t *vd)
vdev_queue_offset_compare, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_queue_node)); sizeof (zio_t), offsetof(struct zio, io_queue_node));
} }
/*
* A list of buffers which can be used for aggregate I/O, this
* avoids the need to allocate them on demand when memory is low.
*/
list_create(&vq->vq_io_list, sizeof (vdev_io_t),
offsetof(vdev_io_t, vi_node));
max_active_sum = zfs_vdev_sync_read_max_active +
zfs_vdev_sync_write_max_active + zfs_vdev_async_read_max_active +
zfs_vdev_async_write_max_active + zfs_vdev_scrub_max_active;
for (i = 0; i < max_active_sum; i++)
list_insert_tail(&vq->vq_io_list, zio_vdev_alloc());
} }
void void
vdev_queue_fini(vdev_t *vd) vdev_queue_fini(vdev_t *vd)
{ {
vdev_queue_t *vq = &vd->vdev_queue; vdev_queue_t *vq = &vd->vdev_queue;
vdev_io_t *vi;
zio_priority_t p; zio_priority_t p;
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
avl_destroy(&vq->vq_class[p].vqc_queued_tree); avl_destroy(&vq->vq_class[p].vqc_queued_tree);
avl_destroy(&vq->vq_active_tree); avl_destroy(&vq->vq_active_tree);
while ((vi = list_head(&vq->vq_io_list)) != NULL) {
list_remove(&vq->vq_io_list, vi);
zio_vdev_free(vi);
}
list_destroy(&vq->vq_io_list);
mutex_destroy(&vq->vq_lock); mutex_destroy(&vq->vq_lock);
} }
@ -468,9 +445,6 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
static void static void
vdev_queue_agg_io_done(zio_t *aio) vdev_queue_agg_io_done(zio_t *aio)
{ {
vdev_queue_t *vq = &aio->io_vd->vdev_queue;
vdev_io_t *vi = aio->io_data;
if (aio->io_type == ZIO_TYPE_READ) { if (aio->io_type == ZIO_TYPE_READ) {
zio_t *pio; zio_t *pio;
while ((pio = zio_walk_parents(aio)) != NULL) { while ((pio = zio_walk_parents(aio)) != NULL) {
@ -479,9 +453,7 @@ vdev_queue_agg_io_done(zio_t *aio)
} }
} }
mutex_enter(&vq->vq_lock); zio_buf_free(aio->io_data, aio->io_size);
list_insert_tail(&vq->vq_io_list, vi);
mutex_exit(&vq->vq_lock);
} }
/* /*
@ -496,7 +468,6 @@ vdev_queue_agg_io_done(zio_t *aio)
static zio_t * static zio_t *
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
{ {
vdev_io_t *vi;
zio_t *first, *last, *aio, *dio, *mandatory, *nio; zio_t *first, *last, *aio, *dio, *mandatory, *nio;
uint64_t maxgap = 0; uint64_t maxgap = 0;
uint64_t size; uint64_t size;
@ -529,12 +500,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
if (zio->io_type == ZIO_TYPE_READ) if (zio->io_type == ZIO_TYPE_READ)
maxgap = zfs_vdev_read_gap_limit; maxgap = zfs_vdev_read_gap_limit;
vi = list_head(&vq->vq_io_list);
if (vi == NULL) {
vi = zio_vdev_alloc();
list_insert_head(&vq->vq_io_list, vi);
}
/* /*
* We can aggregate I/Os that are sufficiently adjacent and of * We can aggregate I/Os that are sufficiently adjacent and of
* the same flavor, as expressed by the AGG_INHERIT flags. * the same flavor, as expressed by the AGG_INHERIT flags.
@ -622,13 +587,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
if (first == last) if (first == last)
return (NULL); return (NULL);
ASSERT(vi != NULL);
size = IO_SPAN(first, last); size = IO_SPAN(first, last);
ASSERT3U(size, <=, zfs_vdev_aggregation_limit); ASSERT3U(size, <=, zfs_vdev_aggregation_limit);
aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
vi, size, first->io_type, zio->io_priority, zio_buf_alloc(size), size, first->io_type, zio->io_priority,
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
vdev_queue_agg_io_done, NULL); vdev_queue_agg_io_done, NULL);
aio->io_timestamp = first->io_timestamp; aio->io_timestamp = first->io_timestamp;
@ -655,8 +618,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
zio_execute(dio); zio_execute(dio);
} while (dio != last); } while (dio != last);
list_remove(&vq->vq_io_list, vi);
return (aio); return (aio);
} }

View File

@ -55,7 +55,6 @@ const char *zio_type_name[ZIO_TYPES] = {
*/ */
kmem_cache_t *zio_cache; kmem_cache_t *zio_cache;
kmem_cache_t *zio_link_cache; kmem_cache_t *zio_link_cache;
kmem_cache_t *zio_vdev_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
int zio_bulk_flags = 0; int zio_bulk_flags = 0;
@ -132,8 +131,6 @@ zio_init(void)
zio_cons, zio_dest, NULL, NULL, NULL, 0); zio_cons, zio_dest, NULL, NULL, NULL, 0);
zio_link_cache = kmem_cache_create("zio_link_cache", zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof (vdev_io_t),
PAGESIZE, NULL, NULL, NULL, NULL, NULL, 0);
/* /*
* For small buffers, we want a cache for each multiple of * For small buffers, we want a cache for each multiple of
@ -218,7 +215,6 @@ zio_fini(void)
zio_data_buf_cache[c] = NULL; zio_data_buf_cache[c] = NULL;
} }
kmem_cache_destroy(zio_vdev_cache);
kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_link_cache);
kmem_cache_destroy(zio_cache); kmem_cache_destroy(zio_cache);
@ -285,24 +281,6 @@ zio_data_buf_free(void *buf, size_t size)
kmem_cache_free(zio_data_buf_cache[c], buf); kmem_cache_free(zio_data_buf_cache[c], buf);
} }
/*
* Dedicated I/O buffers to ensure that memory fragmentation never prevents
* or significantly delays the issuing of a zio. These buffers are used
* to aggregate I/O and could be used for raidz stripes.
*/
void *
zio_vdev_alloc(void)
{
return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE));
}
void
zio_vdev_free(void *buf)
{
kmem_cache_free(zio_vdev_cache, buf);
}
/* /*
* ========================================================================== * ==========================================================================
* Push and pop I/O transform buffers * Push and pop I/O transform buffers