mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-26 04:07:45 +03:00
Fix vdev_queue_aggregate() deadlock
This deadlock may manifest itself in slightly different ways but
at the core it is caused by a memory allocation blocking on file-
system reclaim in the zio pipeline. This is normally impossible
because zio_execute() disables filesystem reclaim by setting
PF_FSTRANS on the thread. However, kmem cache allocations may
still indirectly block on file system reclaim while holding the
critical vq->vq_lock as shown below.
To resolve this issue zio_buf_alloc_flags() is introduced which
allocation flags to be passed. This can then be used in
vdev_queue_aggregate() with KM_NOSLEEP when allocating the
aggregate IO buffer. Since aggregating the IO is purely a
performance optimization we want this to either succeed or fail
quickly. Trying too hard to allocate this memory under the
vq->vq_lock can negatively impact performance and result in
this deadlock.
* z_wr_iss
zio_vdev_io_start
vdev_queue_io -> Takes vq->vq_lock
vdev_queue_io_to_issue
vdev_queue_aggregate
zio_buf_alloc -> Waiting on spl_kmem_cache process
* z_wr_int
zio_vdev_io_done
vdev_queue_io_done
mutex_lock -> Waiting on vq->vq_lock held by z_wr_iss
* txg_sync
spa_sync
dsl_pool_sync
zio_wait -> Waiting on zio being handled by z_wr_int
* spl_kmem_cache
spl_cache_grow_work
kv_alloc
spl_vmalloc
...
evict
zpl_evict_inode
zfs_inactive
dmu_tx_wait
txg_wait_open -> Waiting on txg_sync
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes #3808
Closes #3867
This commit is contained in:
@@ -248,6 +248,20 @@ zio_data_buf_alloc(size_t size)
|
||||
return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Use zio_buf_alloc_flags when specific allocation flags are needed. e.g.
|
||||
* passing KM_NOSLEEP when it is acceptable for an allocation to fail.
|
||||
*/
|
||||
void *
|
||||
zio_buf_alloc_flags(size_t size, int flags)
|
||||
{
|
||||
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
|
||||
|
||||
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
|
||||
|
||||
return (kmem_cache_alloc(zio_buf_cache[c], flags));
|
||||
}
|
||||
|
||||
void
|
||||
zio_buf_free(void *buf, size_t size)
|
||||
{
|
||||
@@ -3475,6 +3489,7 @@ zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_phys_t *zb1,
|
||||
EXPORT_SYMBOL(zio_type_name);
|
||||
EXPORT_SYMBOL(zio_buf_alloc);
|
||||
EXPORT_SYMBOL(zio_data_buf_alloc);
|
||||
EXPORT_SYMBOL(zio_buf_alloc_flags);
|
||||
EXPORT_SYMBOL(zio_buf_free);
|
||||
EXPORT_SYMBOL(zio_data_buf_free);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user