From 16fcdea36340c658b4557fd34a74915fd618f7a6 Mon Sep 17 00:00:00 2001 From: Chris Dunlop Date: Wed, 6 May 2015 09:59:17 +1000 Subject: [PATCH] arc_evict, arc_evict_ghost: reduce stack usage using kmem_zalloc With debugging enabled and depending on your kernel config, the size of arc_buf_hdr_t can blow out the stack of arc_evict() and arc_evict_ghost() to greater than 1024 bytes. Let's avoid this. Signed-off-by: Chris Dunlop Signed-off-by: Brian Behlendorf Closes #3377 --- module/zfs/arc.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 421c81e1c..6975ada62 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1825,13 +1825,15 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, kmutex_t *hash_lock; boolean_t have_lock; void *stolen = NULL; - arc_buf_hdr_t marker = {{{ 0 }}}; + arc_buf_hdr_t *marker; int count = 0; ASSERT(state == arc_mru || state == arc_mfu); evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; + marker = kmem_zalloc(sizeof (arc_buf_hdr_t), KM_SLEEP); + top: mutex_enter(&state->arcs_mtx); mutex_enter(&evicted_state->arcs_mtx); @@ -1866,14 +1868,14 @@ top: * the hot code path, so don't sleep. */ if (!recycle && count++ > arc_evict_iterations) { - list_insert_after(list, ab, &marker); + list_insert_after(list, ab, marker); mutex_exit(&evicted_state->arcs_mtx); mutex_exit(&state->arcs_mtx); kpreempt(KPREEMPT_SYNC); mutex_enter(&state->arcs_mtx); mutex_enter(&evicted_state->arcs_mtx); - ab_prev = list_prev(list, &marker); - list_remove(list, &marker); + ab_prev = list_prev(list, marker); + list_remove(list, marker); count = 0; continue; } @@ -1957,6 +1959,8 @@ top: goto top; } + kmem_free(marker, sizeof (arc_buf_hdr_t)); + if (bytes_evicted < bytes) dprintf("only evicted %lld bytes from %x\n", (longlong_t)bytes_evicted, state->arcs_state); @@ -1986,7 +1990,7 @@ arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes, arc_buf_contents_t type) { arc_buf_hdr_t *ab, *ab_prev; - arc_buf_hdr_t marker; + arc_buf_hdr_t *marker; list_t *list = &state->arcs_list[type]; kmutex_t *hash_lock; uint64_t bytes_deleted = 0; @@ -1994,7 +1998,9 @@ arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes, int count = 0; ASSERT(GHOST_STATE(state)); - bzero(&marker, sizeof (marker)); + + marker = kmem_zalloc(sizeof (arc_buf_hdr_t), KM_SLEEP); + top: mutex_enter(&state->arcs_mtx); for (ab = list_tail(list); ab; ab = ab_prev) { @@ -2020,12 +2026,12 @@ top: * before reacquiring the lock. */ if (count++ > arc_evict_iterations) { - list_insert_after(list, ab, &marker); + list_insert_after(list, ab, marker); mutex_exit(&state->arcs_mtx); kpreempt(KPREEMPT_SYNC); mutex_enter(&state->arcs_mtx); - ab_prev = list_prev(list, &marker); - list_remove(list, &marker); + ab_prev = list_prev(list, marker); + list_remove(list, marker); count = 0; continue; } @@ -2057,13 +2063,13 @@ top: * hash lock to become available. Once its * available, restart from where we left off. */ - list_insert_after(list, ab, &marker); + list_insert_after(list, ab, marker); mutex_exit(&state->arcs_mtx); mutex_enter(hash_lock); mutex_exit(hash_lock); mutex_enter(&state->arcs_mtx); - ab_prev = list_prev(list, &marker); - list_remove(list, &marker); + ab_prev = list_prev(list, marker); + list_remove(list, marker); } else { bufs_skipped += 1; } @@ -2076,6 +2082,8 @@ top: goto top; } + kmem_free(marker, sizeof (arc_buf_hdr_t)); + if (bufs_skipped) { ARCSTAT_INCR(arcstat_mutex_miss, bufs_skipped); ASSERT(bytes >= 0);