mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-03 23:09:35 +03:00
Prevent deadlock in arc_read in Linux memory reclaim callback
Using zfs with Lustre, an arc_read can trigger kernel memory allocation that in turn leads to a memory reclaim callback and a deadlock within a single zfs process. This change uses spl_fstrans_mark and spl_trans_unmark to prevent the reclaim attempt and the deadlock (https://zfsonlinux.topicbox.com/groups/zfs-devel/T4db2c705ec1804ba). The stack trace observed is: __schedule at ffffffff81610f2e schedule at ffffffff81611558 schedule_preempt_disabled at ffffffff8161184a __mutex_lock at ffffffff816131e8 arc_buf_destroy at ffffffffa0bf37d7 [zfs] dbuf_destroy at ffffffffa0bfa6fe [zfs] dbuf_evict_one at ffffffffa0bfaa96 [zfs] dbuf_rele_and_unlock at ffffffffa0bfa561 [zfs] dbuf_rele_and_unlock at ffffffffa0bfa32b [zfs] osd_object_delete at ffffffffa0b64ecc [osd_zfs] lu_object_free at ffffffffa06d6a74 [obdclass] lu_site_purge_objects at ffffffffa06d7fc1 [obdclass] lu_cache_shrink_scan at ffffffffa06d81b8 [obdclass] shrink_slab at ffffffff811ca9d8 shrink_node at ffffffff811cfd94 do_try_to_free_pages at ffffffff811cfe63 try_to_free_pages at ffffffff811d01c4 __alloc_pages_slowpath at ffffffff811be7f2 __alloc_pages_nodemask at ffffffff811bf3ed new_slab at ffffffff81226304 ___slab_alloc at ffffffff812272ab __slab_alloc at ffffffff8122740c kmem_cache_alloc at ffffffff81227578 spl_kmem_cache_alloc at ffffffffa048a1fd [spl] arc_buf_alloc_impl at ffffffffa0befba2 [zfs] arc_read at ffffffffa0bf0924 [zfs] dbuf_read at ffffffffa0bf9083 [zfs] dmu_buf_hold_by_dnode at ffffffffa0c04869 [zfs] Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Mark Roper <markroper@gmail.com> Closes #9987
This commit is contained in:
parent
4e55349857
commit
009ff83548
@ -6178,6 +6178,17 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||
ASSERT(!embedded_bp ||
|
||||
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
|
||||
|
||||
/*
|
||||
* Normally SPL_FSTRANS will already be set since kernel threads which
|
||||
* expect to call the DMU interfaces will set it when created. System
|
||||
* calls are similarly handled by setting/cleaning the bit in the
|
||||
* registered callback (module/os/.../zfs/zpl_*).
|
||||
*
|
||||
* External consumers such as Lustre which call the exported DMU
|
||||
* interfaces may not have set SPL_FSTRANS. To avoid a deadlock
|
||||
* on the hash_lock always set and clear the bit.
|
||||
*/
|
||||
fstrans_cookie_t cookie = spl_fstrans_mark();
|
||||
top:
|
||||
if (!embedded_bp) {
|
||||
/*
|
||||
@ -6636,6 +6647,7 @@ out:
|
||||
/* embedded bps don't actually go to disk */
|
||||
if (!embedded_bp)
|
||||
spa_read_history_add(spa, zb, *arc_flags);
|
||||
spl_fstrans_unmark(cookie);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user