mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 18:34:22 +03:00
Reset l2ad_hand and l2ad_first in l2arc_evict
Increasing l2arc_write_size or l2arc_write_boost can result in l2arc_write_buffers() not having enough space to perform its writes and panic zio_write_phys(). Instead of resetting l2ad_hand to l2ad_start at the end of l2arc_write_buffers() and not taking into account a possible user-mediated increase of l2arc_write_max, we do this in l2arc_evict(), right after l2arc_write_size() has run. If there is not enough space to evict (ie we will exceed l2ad_end) we evict to the end of the device, reset l2ad_hand to l2ad_start, set l2ad_first to 0 and iterate l2arc_evict(). We avoid infinite iteration of l2arc_evict() by making sure in l2arc_write_size() that l2ad_start + size does not exceed l2ad_end. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #10154
This commit is contained in:
parent
c96a32e1a3
commit
37c22948e5
@ -7467,9 +7467,9 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
l2arc_write_size(void)
|
l2arc_write_size(l2arc_dev_t *dev)
|
||||||
{
|
{
|
||||||
uint64_t size;
|
uint64_t size, dev_size;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure our globals have meaningful values in case the user
|
* Make sure our globals have meaningful values in case the user
|
||||||
@ -7486,6 +7486,23 @@ l2arc_write_size(void)
|
|||||||
if (arc_warm == B_FALSE)
|
if (arc_warm == B_FALSE)
|
||||||
size += l2arc_write_boost;
|
size += l2arc_write_boost;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure the write size does not exceed the size of the cache
|
||||||
|
* device. This is important in l2arc_evict(), otherwise infinite
|
||||||
|
* iteration can occur.
|
||||||
|
*/
|
||||||
|
dev_size = dev->l2ad_end - dev->l2ad_start;
|
||||||
|
if (size >= dev_size) {
|
||||||
|
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
|
||||||
|
"exceeds the size of the cache device (guid %llu), "
|
||||||
|
"resetting them to the default (%d)",
|
||||||
|
dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
|
||||||
|
size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
|
||||||
|
|
||||||
|
if (arc_warm == B_FALSE)
|
||||||
|
size += l2arc_write_boost;
|
||||||
|
}
|
||||||
|
|
||||||
return (size);
|
return (size);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -8008,22 +8025,22 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
|
|||||||
arc_buf_hdr_t *hdr, *hdr_prev;
|
arc_buf_hdr_t *hdr, *hdr_prev;
|
||||||
kmutex_t *hash_lock;
|
kmutex_t *hash_lock;
|
||||||
uint64_t taddr;
|
uint64_t taddr;
|
||||||
|
boolean_t rerun;
|
||||||
|
|
||||||
buflist = &dev->l2ad_buflist;
|
buflist = &dev->l2ad_buflist;
|
||||||
|
|
||||||
if (!all && dev->l2ad_first) {
|
top:
|
||||||
|
rerun = B_FALSE;
|
||||||
|
if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
|
||||||
/*
|
/*
|
||||||
* This is the first sweep through the device. There is
|
* When there is no space to accomodate upcoming writes,
|
||||||
* nothing to evict.
|
* evict to the end. Then bump the write hand to the start
|
||||||
*/
|
* and iterate. This iteration does not happen indefinitely
|
||||||
return;
|
* as we make sure in l2arc_write_size() that when l2ad_hand
|
||||||
}
|
* is reset, the write size does not exceed the end of the
|
||||||
|
* device.
|
||||||
if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) {
|
|
||||||
/*
|
|
||||||
* When nearing the end of the device, evict to the end
|
|
||||||
* before the device write hand jumps to the start.
|
|
||||||
*/
|
*/
|
||||||
|
rerun = B_TRUE;
|
||||||
taddr = dev->l2ad_end;
|
taddr = dev->l2ad_end;
|
||||||
} else {
|
} else {
|
||||||
taddr = dev->l2ad_hand + distance;
|
taddr = dev->l2ad_hand + distance;
|
||||||
@ -8031,7 +8048,15 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
|
|||||||
DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
|
DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
|
||||||
uint64_t, taddr, boolean_t, all);
|
uint64_t, taddr, boolean_t, all);
|
||||||
|
|
||||||
top:
|
if (!all && dev->l2ad_first) {
|
||||||
|
/*
|
||||||
|
* This is the first sweep through the device. There is
|
||||||
|
* nothing to evict.
|
||||||
|
*/
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
retry:
|
||||||
mutex_enter(&dev->l2ad_mtx);
|
mutex_enter(&dev->l2ad_mtx);
|
||||||
for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) {
|
for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) {
|
||||||
hdr_prev = list_prev(buflist, hdr);
|
hdr_prev = list_prev(buflist, hdr);
|
||||||
@ -8052,7 +8077,7 @@ top:
|
|||||||
mutex_exit(&dev->l2ad_mtx);
|
mutex_exit(&dev->l2ad_mtx);
|
||||||
mutex_enter(hash_lock);
|
mutex_enter(hash_lock);
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
goto top;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -8101,6 +8126,17 @@ top:
|
|||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
}
|
}
|
||||||
mutex_exit(&dev->l2ad_mtx);
|
mutex_exit(&dev->l2ad_mtx);
|
||||||
|
|
||||||
|
out:
|
||||||
|
if (rerun) {
|
||||||
|
/*
|
||||||
|
* Bump device hand to the device start if it is approaching the
|
||||||
|
* end. l2arc_evict() has already evicted ahead for this case.
|
||||||
|
*/
|
||||||
|
dev->l2ad_hand = dev->l2ad_start;
|
||||||
|
dev->l2ad_first = B_FALSE;
|
||||||
|
goto top;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -8448,15 +8484,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
|
ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
|
||||||
ARCSTAT_INCR(arcstat_l2_psize, write_psize);
|
ARCSTAT_INCR(arcstat_l2_psize, write_psize);
|
||||||
|
|
||||||
/*
|
|
||||||
* Bump device hand to the device start if it is approaching the end.
|
|
||||||
* l2arc_evict() will already have evicted ahead for this case.
|
|
||||||
*/
|
|
||||||
if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
|
|
||||||
dev->l2ad_hand = dev->l2ad_start;
|
|
||||||
dev->l2ad_first = B_FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
dev->l2ad_writing = B_TRUE;
|
dev->l2ad_writing = B_TRUE;
|
||||||
(void) zio_wait(pio);
|
(void) zio_wait(pio);
|
||||||
dev->l2ad_writing = B_FALSE;
|
dev->l2ad_writing = B_FALSE;
|
||||||
@ -8539,7 +8566,7 @@ l2arc_feed_thread(void *unused)
|
|||||||
|
|
||||||
ARCSTAT_BUMP(arcstat_l2_feeds);
|
ARCSTAT_BUMP(arcstat_l2_feeds);
|
||||||
|
|
||||||
size = l2arc_write_size();
|
size = l2arc_write_size(dev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Evict L2ARC buffers that will be overwritten.
|
* Evict L2ARC buffers that will be overwritten.
|
||||||
|
@ -49,7 +49,7 @@ post =
|
|||||||
[tests/functional/cache]
|
[tests/functional/cache]
|
||||||
tests = ['cache_001_pos', 'cache_002_pos', 'cache_003_pos', 'cache_004_neg',
|
tests = ['cache_001_pos', 'cache_002_pos', 'cache_003_pos', 'cache_004_neg',
|
||||||
'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg',
|
'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg',
|
||||||
'cache_009_pos', 'cache_010_neg', 'cache_011_pos']
|
'cache_009_pos', 'cache_010_neg', 'cache_011_pos', 'cache_012_pos']
|
||||||
tags = ['functional', 'cache']
|
tags = ['functional', 'cache']
|
||||||
|
|
||||||
[tests/functional/cachefile]
|
[tests/functional/cachefile]
|
||||||
|
@ -4053,3 +4053,16 @@ function ls_xattr # path
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function get_arcstat # stat
|
||||||
|
{
|
||||||
|
if is_linux; then
|
||||||
|
typeset stat=$1
|
||||||
|
typeset zfs_arcstats="/proc/spl/kstat/zfs/arcstats"
|
||||||
|
[[ -f "$zfs_arcstats" ]] || return 1
|
||||||
|
grep $stat $zfs_arcstats | awk '{print $3}'
|
||||||
|
return $?
|
||||||
|
else
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
@ -35,6 +35,9 @@ DISABLE_IVSET_GUID_CHECK disable_ivset_guid_check zfs_disable_ivset_guid_check
|
|||||||
INITIALIZE_CHUNK_SIZE initialize_chunk_size zfs_initialize_chunk_size
|
INITIALIZE_CHUNK_SIZE initialize_chunk_size zfs_initialize_chunk_size
|
||||||
INITIALIZE_VALUE initialize_value zfs_initialize_value
|
INITIALIZE_VALUE initialize_value zfs_initialize_value
|
||||||
KEEP_LOG_SPACEMAPS_AT_EXPORT keep_log_spacemaps_at_export zfs_keep_log_spacemaps_at_export
|
KEEP_LOG_SPACEMAPS_AT_EXPORT keep_log_spacemaps_at_export zfs_keep_log_spacemaps_at_export
|
||||||
|
L2ARC_NOPREFETCH l2arc.noprefetch l2arc_noprefetch
|
||||||
|
L2ARC_WRITE_BOOST l2arc.write_boost l2arc_write_boost
|
||||||
|
L2ARC_WRITE_MAX l2arc.write_max l2arc_write_max
|
||||||
LIVELIST_CONDENSE_NEW_ALLOC livelist.condense.new_alloc zfs_livelist_condense_new_alloc
|
LIVELIST_CONDENSE_NEW_ALLOC livelist.condense.new_alloc zfs_livelist_condense_new_alloc
|
||||||
LIVELIST_CONDENSE_SYNC_CANCEL livelist.condense.sync_cancel zfs_livelist_condense_sync_cancel
|
LIVELIST_CONDENSE_SYNC_CANCEL livelist.condense.sync_cancel zfs_livelist_condense_sync_cancel
|
||||||
LIVELIST_CONDENSE_SYNC_PAUSE livelist.condense.sync_pause zfs_livelist_condense_sync_pause
|
LIVELIST_CONDENSE_SYNC_PAUSE livelist.condense.sync_pause zfs_livelist_condense_sync_pause
|
||||||
|
@ -12,7 +12,8 @@ dist_pkgdata_SCRIPTS = \
|
|||||||
cache_008_neg.ksh \
|
cache_008_neg.ksh \
|
||||||
cache_009_pos.ksh \
|
cache_009_pos.ksh \
|
||||||
cache_010_neg.ksh \
|
cache_010_neg.ksh \
|
||||||
cache_011_pos.ksh
|
cache_011_pos.ksh \
|
||||||
|
cache_012_pos.ksh
|
||||||
|
|
||||||
dist_pkgdata_DATA = \
|
dist_pkgdata_DATA = \
|
||||||
cache.cfg \
|
cache.cfg \
|
||||||
|
110
tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh
vendored
Executable file
110
tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh
vendored
Executable file
@ -0,0 +1,110 @@
|
|||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# This file and its contents are supplied under the terms of the
|
||||||
|
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||||
|
# You may only use this file in accordance with the terms of version
|
||||||
|
# 1.0 of the CDDL.
|
||||||
|
#
|
||||||
|
# A full copy of the text of the CDDL should have accompanied this
|
||||||
|
# source. A copy of the CDDL is also available via the Internet at
|
||||||
|
# http://www.illumos.org/license/CDDL.
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2020, George Amanakis. All rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/tests/functional/cache/cache.cfg
|
||||||
|
. $STF_SUITE/tests/functional/cache/cache.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Looping around a cache device with l2arc_write_size exceeding
|
||||||
|
# the device size succeeds.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Create pool with a cache device.
|
||||||
|
# 2. Set l2arc_write_max to a value larger than the cache device.
|
||||||
|
# 3. Create a file larger than the cache device and random read
|
||||||
|
# for 10 sec.
|
||||||
|
# 4. Verify that l2arc_write_max is set back to the default.
|
||||||
|
# 5. Set l2arc_write_max to a value less than the cache device size but
|
||||||
|
# larger than the default (64MB).
|
||||||
|
# 6. Record the l2_size.
|
||||||
|
# 7. Random read for 1 sec.
|
||||||
|
# 8. Record the l2_size again.
|
||||||
|
# 9. If (6) <= (8) then we have not looped around yet.
|
||||||
|
# 10. If (6) > (8) then we looped around. Break out of the loop and test.
|
||||||
|
# 11. Destroy pool.
|
||||||
|
#
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
log_assert "Looping around a cache device succeeds."
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
if poolexists $TESTPOOL ; then
|
||||||
|
destroy_pool $TESTPOOL
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_must set_tunable32 L2ARC_WRITE_MAX $write_max
|
||||||
|
log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
|
||||||
|
}
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
typeset write_max=$(get_tunable L2ARC_WRITE_MAX)
|
||||||
|
typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
|
||||||
|
log_must set_tunable32 L2ARC_NOPREFETCH 0
|
||||||
|
|
||||||
|
typeset VDEV="$VDIR/vdev.disk"
|
||||||
|
typeset VDEV_SZ=$(( 4 * 1024 * 1024 * 1024 ))
|
||||||
|
typeset VCACHE="$VDIR/vdev.cache"
|
||||||
|
typeset VCACHE_SZ=$(( $VDEV_SZ / 2 ))
|
||||||
|
|
||||||
|
typeset fill_mb=$(( floor($VDEV_SZ * 3 / 4 ) ))
|
||||||
|
export DIRECTORY=/$TESTPOOL
|
||||||
|
export NUMJOBS=4
|
||||||
|
export RUNTIME=10
|
||||||
|
export PERF_RANDSEED=1234
|
||||||
|
export PERF_COMPPERCENT=66
|
||||||
|
export PERF_COMPCHUNK=0
|
||||||
|
export BLOCKSIZE=128K
|
||||||
|
export SYNC_TYPE=0
|
||||||
|
export DIRECT=1
|
||||||
|
export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))
|
||||||
|
|
||||||
|
log_must set_tunable32 L2ARC_WRITE_MAX $(( $VCACHE_SZ * 2 ))
|
||||||
|
|
||||||
|
log_must truncate -s $VCACHE_SZ $VCACHE
|
||||||
|
log_must truncate -s $VDEV_SZ $VDEV
|
||||||
|
|
||||||
|
log_must zpool create -f $TESTPOOL $VDEV cache $VCACHE
|
||||||
|
|
||||||
|
log_must fio $FIO_SCRIPTS/mkfiles.fio
|
||||||
|
log_must fio $FIO_SCRIPTS/random_reads.fio
|
||||||
|
|
||||||
|
typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)
|
||||||
|
|
||||||
|
log_must test $write_max2 -eq $write_max
|
||||||
|
|
||||||
|
log_must set_tunable32 L2ARC_WRITE_MAX $(( 64 * 1024 * 1024 ))
|
||||||
|
export RUNTIME=1
|
||||||
|
|
||||||
|
typeset do_once=true
|
||||||
|
while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
|
||||||
|
typeset l2_size1=$(get_arcstat l2_size)
|
||||||
|
log_must fio $FIO_SCRIPTS/random_reads.fio
|
||||||
|
typeset l2_size2=$(get_arcstat l2_size)
|
||||||
|
do_once=false
|
||||||
|
done
|
||||||
|
|
||||||
|
log_must test $l2_size1 -gt $l2_size2
|
||||||
|
|
||||||
|
log_must zpool destroy $TESTPOOL
|
||||||
|
|
||||||
|
log_pass "Looping around a cache device succeeds."
|
Loading…
Reference in New Issue
Block a user