mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 11:40:25 +03:00
Revert "Pre-allocate vdev I/O buffers"
Commit 86dd0fd
added preallocated I/O buffers. This is no longer
required after the recent kmem changes designed to make our memory
allocation interfaces behave more like those found on Illumos. A
deadlock in this situation is no longer possible.
However, these allocations still have the potential to be expensive.
So a potential future optimization might be to perform then KM_NOSLEEP
so that they either succeed of fail quicky. Either case is acceptable
here because we can safely abort the aggregation.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
60e1eda929
commit
285b29d959
@ -50,7 +50,6 @@ extern "C" {
|
|||||||
* Forward declarations that lots of things need.
|
* Forward declarations that lots of things need.
|
||||||
*/
|
*/
|
||||||
typedef struct vdev_queue vdev_queue_t;
|
typedef struct vdev_queue vdev_queue_t;
|
||||||
typedef struct vdev_io vdev_io_t;
|
|
||||||
typedef struct vdev_cache vdev_cache_t;
|
typedef struct vdev_cache vdev_cache_t;
|
||||||
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||||
|
|
||||||
@ -117,16 +116,10 @@ struct vdev_queue {
|
|||||||
uint64_t vq_last_offset;
|
uint64_t vq_last_offset;
|
||||||
hrtime_t vq_io_complete_ts; /* time last i/o completed */
|
hrtime_t vq_io_complete_ts; /* time last i/o completed */
|
||||||
hrtime_t vq_io_delta_ts;
|
hrtime_t vq_io_delta_ts;
|
||||||
list_t vq_io_list;
|
|
||||||
zio_t vq_io_search; /* used as local for stack reduction */
|
zio_t vq_io_search; /* used as local for stack reduction */
|
||||||
kmutex_t vq_lock;
|
kmutex_t vq_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vdev_io {
|
|
||||||
char vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */
|
|
||||||
list_node_t vi_node;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Virtual device descriptor
|
* Virtual device descriptor
|
||||||
*/
|
*/
|
||||||
|
@ -522,8 +522,6 @@ extern void *zio_buf_alloc(size_t size);
|
|||||||
extern void zio_buf_free(void *buf, size_t size);
|
extern void zio_buf_free(void *buf, size_t size);
|
||||||
extern void *zio_data_buf_alloc(size_t size);
|
extern void *zio_data_buf_alloc(size_t size);
|
||||||
extern void zio_data_buf_free(void *buf, size_t size);
|
extern void zio_data_buf_free(void *buf, size_t size);
|
||||||
extern void *zio_vdev_alloc(void);
|
|
||||||
extern void zio_vdev_free(void *buf);
|
|
||||||
|
|
||||||
extern void zio_resubmit_stage_async(void *);
|
extern void zio_resubmit_stage_async(void *);
|
||||||
|
|
||||||
|
@ -328,9 +328,7 @@ void
|
|||||||
vdev_queue_init(vdev_t *vd)
|
vdev_queue_init(vdev_t *vd)
|
||||||
{
|
{
|
||||||
vdev_queue_t *vq = &vd->vdev_queue;
|
vdev_queue_t *vq = &vd->vdev_queue;
|
||||||
int max_active_sum;
|
|
||||||
zio_priority_t p;
|
zio_priority_t p;
|
||||||
int i;
|
|
||||||
|
|
||||||
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
vq->vq_vdev = vd;
|
vq->vq_vdev = vd;
|
||||||
@ -352,39 +350,18 @@ vdev_queue_init(vdev_t *vd)
|
|||||||
vdev_queue_offset_compare,
|
vdev_queue_offset_compare,
|
||||||
sizeof (zio_t), offsetof(struct zio, io_queue_node));
|
sizeof (zio_t), offsetof(struct zio, io_queue_node));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* A list of buffers which can be used for aggregate I/O, this
|
|
||||||
* avoids the need to allocate them on demand when memory is low.
|
|
||||||
*/
|
|
||||||
list_create(&vq->vq_io_list, sizeof (vdev_io_t),
|
|
||||||
offsetof(vdev_io_t, vi_node));
|
|
||||||
|
|
||||||
max_active_sum = zfs_vdev_sync_read_max_active +
|
|
||||||
zfs_vdev_sync_write_max_active + zfs_vdev_async_read_max_active +
|
|
||||||
zfs_vdev_async_write_max_active + zfs_vdev_scrub_max_active;
|
|
||||||
for (i = 0; i < max_active_sum; i++)
|
|
||||||
list_insert_tail(&vq->vq_io_list, zio_vdev_alloc());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
vdev_queue_fini(vdev_t *vd)
|
vdev_queue_fini(vdev_t *vd)
|
||||||
{
|
{
|
||||||
vdev_queue_t *vq = &vd->vdev_queue;
|
vdev_queue_t *vq = &vd->vdev_queue;
|
||||||
vdev_io_t *vi;
|
|
||||||
zio_priority_t p;
|
zio_priority_t p;
|
||||||
|
|
||||||
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
|
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
|
||||||
avl_destroy(&vq->vq_class[p].vqc_queued_tree);
|
avl_destroy(&vq->vq_class[p].vqc_queued_tree);
|
||||||
avl_destroy(&vq->vq_active_tree);
|
avl_destroy(&vq->vq_active_tree);
|
||||||
|
|
||||||
while ((vi = list_head(&vq->vq_io_list)) != NULL) {
|
|
||||||
list_remove(&vq->vq_io_list, vi);
|
|
||||||
zio_vdev_free(vi);
|
|
||||||
}
|
|
||||||
|
|
||||||
list_destroy(&vq->vq_io_list);
|
|
||||||
|
|
||||||
mutex_destroy(&vq->vq_lock);
|
mutex_destroy(&vq->vq_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -468,9 +445,6 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
|
|||||||
static void
|
static void
|
||||||
vdev_queue_agg_io_done(zio_t *aio)
|
vdev_queue_agg_io_done(zio_t *aio)
|
||||||
{
|
{
|
||||||
vdev_queue_t *vq = &aio->io_vd->vdev_queue;
|
|
||||||
vdev_io_t *vi = aio->io_data;
|
|
||||||
|
|
||||||
if (aio->io_type == ZIO_TYPE_READ) {
|
if (aio->io_type == ZIO_TYPE_READ) {
|
||||||
zio_t *pio;
|
zio_t *pio;
|
||||||
while ((pio = zio_walk_parents(aio)) != NULL) {
|
while ((pio = zio_walk_parents(aio)) != NULL) {
|
||||||
@ -479,9 +453,7 @@ vdev_queue_agg_io_done(zio_t *aio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_enter(&vq->vq_lock);
|
zio_buf_free(aio->io_data, aio->io_size);
|
||||||
list_insert_tail(&vq->vq_io_list, vi);
|
|
||||||
mutex_exit(&vq->vq_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -496,7 +468,6 @@ vdev_queue_agg_io_done(zio_t *aio)
|
|||||||
static zio_t *
|
static zio_t *
|
||||||
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
||||||
{
|
{
|
||||||
vdev_io_t *vi;
|
|
||||||
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
|
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
|
||||||
uint64_t maxgap = 0;
|
uint64_t maxgap = 0;
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
@ -529,12 +500,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
|||||||
if (zio->io_type == ZIO_TYPE_READ)
|
if (zio->io_type == ZIO_TYPE_READ)
|
||||||
maxgap = zfs_vdev_read_gap_limit;
|
maxgap = zfs_vdev_read_gap_limit;
|
||||||
|
|
||||||
vi = list_head(&vq->vq_io_list);
|
|
||||||
if (vi == NULL) {
|
|
||||||
vi = zio_vdev_alloc();
|
|
||||||
list_insert_head(&vq->vq_io_list, vi);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can aggregate I/Os that are sufficiently adjacent and of
|
* We can aggregate I/Os that are sufficiently adjacent and of
|
||||||
* the same flavor, as expressed by the AGG_INHERIT flags.
|
* the same flavor, as expressed by the AGG_INHERIT flags.
|
||||||
@ -622,13 +587,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
|||||||
if (first == last)
|
if (first == last)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
|
||||||
ASSERT(vi != NULL);
|
|
||||||
|
|
||||||
size = IO_SPAN(first, last);
|
size = IO_SPAN(first, last);
|
||||||
ASSERT3U(size, <=, zfs_vdev_aggregation_limit);
|
ASSERT3U(size, <=, zfs_vdev_aggregation_limit);
|
||||||
|
|
||||||
aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
|
aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
|
||||||
vi, size, first->io_type, zio->io_priority,
|
zio_buf_alloc(size), size, first->io_type, zio->io_priority,
|
||||||
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
|
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
|
||||||
vdev_queue_agg_io_done, NULL);
|
vdev_queue_agg_io_done, NULL);
|
||||||
aio->io_timestamp = first->io_timestamp;
|
aio->io_timestamp = first->io_timestamp;
|
||||||
@ -655,8 +618,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
|||||||
zio_execute(dio);
|
zio_execute(dio);
|
||||||
} while (dio != last);
|
} while (dio != last);
|
||||||
|
|
||||||
list_remove(&vq->vq_io_list, vi);
|
|
||||||
|
|
||||||
return (aio);
|
return (aio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +55,6 @@ const char *zio_type_name[ZIO_TYPES] = {
|
|||||||
*/
|
*/
|
||||||
kmem_cache_t *zio_cache;
|
kmem_cache_t *zio_cache;
|
||||||
kmem_cache_t *zio_link_cache;
|
kmem_cache_t *zio_link_cache;
|
||||||
kmem_cache_t *zio_vdev_cache;
|
|
||||||
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
|
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
|
||||||
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
|
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
|
||||||
int zio_bulk_flags = 0;
|
int zio_bulk_flags = 0;
|
||||||
@ -132,8 +131,6 @@ zio_init(void)
|
|||||||
zio_cons, zio_dest, NULL, NULL, NULL, 0);
|
zio_cons, zio_dest, NULL, NULL, NULL, 0);
|
||||||
zio_link_cache = kmem_cache_create("zio_link_cache",
|
zio_link_cache = kmem_cache_create("zio_link_cache",
|
||||||
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||||
zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof (vdev_io_t),
|
|
||||||
PAGESIZE, NULL, NULL, NULL, NULL, NULL, 0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For small buffers, we want a cache for each multiple of
|
* For small buffers, we want a cache for each multiple of
|
||||||
@ -218,7 +215,6 @@ zio_fini(void)
|
|||||||
zio_data_buf_cache[c] = NULL;
|
zio_data_buf_cache[c] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
kmem_cache_destroy(zio_vdev_cache);
|
|
||||||
kmem_cache_destroy(zio_link_cache);
|
kmem_cache_destroy(zio_link_cache);
|
||||||
kmem_cache_destroy(zio_cache);
|
kmem_cache_destroy(zio_cache);
|
||||||
|
|
||||||
@ -285,24 +281,6 @@ zio_data_buf_free(void *buf, size_t size)
|
|||||||
kmem_cache_free(zio_data_buf_cache[c], buf);
|
kmem_cache_free(zio_data_buf_cache[c], buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Dedicated I/O buffers to ensure that memory fragmentation never prevents
|
|
||||||
* or significantly delays the issuing of a zio. These buffers are used
|
|
||||||
* to aggregate I/O and could be used for raidz stripes.
|
|
||||||
*/
|
|
||||||
void *
|
|
||||||
zio_vdev_alloc(void)
|
|
||||||
{
|
|
||||||
return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE));
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
zio_vdev_free(void *buf)
|
|
||||||
{
|
|
||||||
kmem_cache_free(zio_vdev_cache, buf);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ==========================================================================
|
* ==========================================================================
|
||||||
* Push and pop I/O transform buffers
|
* Push and pop I/O transform buffers
|
||||||
|
Loading…
Reference in New Issue
Block a user