mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 10:24:22 +03:00
OpenZFS 7910 - l2arc_write_buffers() may write beyond target_sz
Authored by: Andriy Gapon <avg@FreeBSD.org> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Ported-by: Giuseppe Di Natale <dinatale2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7910 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/cb6af4b Closes #6291
This commit is contained in:
parent
23873bbb5f
commit
018503911c
@ -626,8 +626,8 @@ typedef struct arc_stats {
|
|||||||
kstat_named_t arcstat_l2_abort_lowmem;
|
kstat_named_t arcstat_l2_abort_lowmem;
|
||||||
kstat_named_t arcstat_l2_cksum_bad;
|
kstat_named_t arcstat_l2_cksum_bad;
|
||||||
kstat_named_t arcstat_l2_io_error;
|
kstat_named_t arcstat_l2_io_error;
|
||||||
kstat_named_t arcstat_l2_size;
|
kstat_named_t arcstat_l2_lsize;
|
||||||
kstat_named_t arcstat_l2_asize;
|
kstat_named_t arcstat_l2_psize;
|
||||||
kstat_named_t arcstat_l2_hdr_size;
|
kstat_named_t arcstat_l2_hdr_size;
|
||||||
kstat_named_t arcstat_memory_throttle_count;
|
kstat_named_t arcstat_memory_throttle_count;
|
||||||
kstat_named_t arcstat_memory_direct_count;
|
kstat_named_t arcstat_memory_direct_count;
|
||||||
@ -2984,19 +2984,19 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
|
|||||||
{
|
{
|
||||||
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
|
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
|
||||||
l2arc_dev_t *dev = l2hdr->b_dev;
|
l2arc_dev_t *dev = l2hdr->b_dev;
|
||||||
uint64_t asize = arc_hdr_size(hdr);
|
uint64_t psize = arc_hdr_size(hdr);
|
||||||
|
|
||||||
ASSERT(MUTEX_HELD(&dev->l2ad_mtx));
|
ASSERT(MUTEX_HELD(&dev->l2ad_mtx));
|
||||||
ASSERT(HDR_HAS_L2HDR(hdr));
|
ASSERT(HDR_HAS_L2HDR(hdr));
|
||||||
|
|
||||||
list_remove(&dev->l2ad_buflist, hdr);
|
list_remove(&dev->l2ad_buflist, hdr);
|
||||||
|
|
||||||
ARCSTAT_INCR(arcstat_l2_asize, -asize);
|
ARCSTAT_INCR(arcstat_l2_psize, -psize);
|
||||||
ARCSTAT_INCR(arcstat_l2_size, -HDR_GET_LSIZE(hdr));
|
ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
|
||||||
|
|
||||||
vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
|
vdev_space_update(dev->l2ad_vdev, -psize, 0, 0);
|
||||||
|
|
||||||
(void) refcount_remove_many(&dev->l2ad_alloc, asize, hdr);
|
(void) refcount_remove_many(&dev->l2ad_alloc, psize, hdr);
|
||||||
arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
|
arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7007,8 +7007,8 @@ top:
|
|||||||
list_remove(buflist, hdr);
|
list_remove(buflist, hdr);
|
||||||
arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
|
arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
|
||||||
|
|
||||||
ARCSTAT_INCR(arcstat_l2_asize, -arc_hdr_size(hdr));
|
ARCSTAT_INCR(arcstat_l2_psize, -arc_hdr_size(hdr));
|
||||||
ARCSTAT_INCR(arcstat_l2_size, -HDR_GET_LSIZE(hdr));
|
ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
|
||||||
|
|
||||||
bytes_dropped += arc_hdr_size(hdr);
|
bytes_dropped += arc_hdr_size(hdr);
|
||||||
(void) refcount_remove_many(&dev->l2ad_alloc,
|
(void) refcount_remove_many(&dev->l2ad_alloc,
|
||||||
@ -7269,7 +7269,7 @@ top:
|
|||||||
/*
|
/*
|
||||||
* This doesn't exist in the ARC. Destroy.
|
* This doesn't exist in the ARC. Destroy.
|
||||||
* arc_hdr_destroy() will call list_remove()
|
* arc_hdr_destroy() will call list_remove()
|
||||||
* and decrement arcstat_l2_size.
|
* and decrement arcstat_l2_lsize.
|
||||||
*/
|
*/
|
||||||
arc_change_state(arc_anon, hdr, hash_lock);
|
arc_change_state(arc_anon, hdr, hash_lock);
|
||||||
arc_hdr_destroy(hdr);
|
arc_hdr_destroy(hdr);
|
||||||
@ -7311,7 +7311,7 @@ static uint64_t
|
|||||||
l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||||
{
|
{
|
||||||
arc_buf_hdr_t *hdr, *hdr_prev, *head;
|
arc_buf_hdr_t *hdr, *hdr_prev, *head;
|
||||||
uint64_t write_asize, write_psize, write_sz, headroom;
|
uint64_t write_asize, write_psize, write_lsize, headroom;
|
||||||
boolean_t full;
|
boolean_t full;
|
||||||
l2arc_write_callback_t *cb;
|
l2arc_write_callback_t *cb;
|
||||||
zio_t *pio, *wzio;
|
zio_t *pio, *wzio;
|
||||||
@ -7321,7 +7321,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
ASSERT3P(dev->l2ad_vdev, !=, NULL);
|
ASSERT3P(dev->l2ad_vdev, !=, NULL);
|
||||||
|
|
||||||
pio = NULL;
|
pio = NULL;
|
||||||
write_sz = write_asize = write_psize = 0;
|
write_lsize = write_asize = write_psize = 0;
|
||||||
full = B_FALSE;
|
full = B_FALSE;
|
||||||
head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
|
head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
|
||||||
arc_hdr_set_flags(head, ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_HAS_L2HDR);
|
arc_hdr_set_flags(head, ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_HAS_L2HDR);
|
||||||
@ -7352,8 +7352,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
|
|
||||||
for (; hdr; hdr = hdr_prev) {
|
for (; hdr; hdr = hdr_prev) {
|
||||||
kmutex_t *hash_lock;
|
kmutex_t *hash_lock;
|
||||||
uint64_t asize, size;
|
|
||||||
abd_t *to_write;
|
|
||||||
|
|
||||||
if (arc_warm == B_FALSE)
|
if (arc_warm == B_FALSE)
|
||||||
hdr_prev = multilist_sublist_next(mls, hdr);
|
hdr_prev = multilist_sublist_next(mls, hdr);
|
||||||
@ -7382,7 +7380,22 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((write_asize + HDR_GET_LSIZE(hdr)) > target_sz) {
|
/*
|
||||||
|
* We rely on the L1 portion of the header below, so
|
||||||
|
* it's invalid for this header to have been evicted out
|
||||||
|
* of the ghost cache, prior to being written out. The
|
||||||
|
* ARC_FLAG_L2_WRITING bit ensures this won't happen.
|
||||||
|
*/
|
||||||
|
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||||
|
|
||||||
|
ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
|
||||||
|
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
||||||
|
ASSERT3U(arc_hdr_size(hdr), >, 0);
|
||||||
|
uint64_t psize = arc_hdr_size(hdr);
|
||||||
|
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
|
||||||
|
psize);
|
||||||
|
|
||||||
|
if ((write_asize + asize) > target_sz) {
|
||||||
full = B_TRUE;
|
full = B_TRUE;
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
break;
|
break;
|
||||||
@ -7417,20 +7430,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
list_insert_head(&dev->l2ad_buflist, hdr);
|
list_insert_head(&dev->l2ad_buflist, hdr);
|
||||||
mutex_exit(&dev->l2ad_mtx);
|
mutex_exit(&dev->l2ad_mtx);
|
||||||
|
|
||||||
/*
|
(void) refcount_add_many(&dev->l2ad_alloc, psize, hdr);
|
||||||
* We rely on the L1 portion of the header below, so
|
|
||||||
* it's invalid for this header to have been evicted out
|
|
||||||
* of the ghost cache, prior to being written out. The
|
|
||||||
* ARC_FLAG_L2_WRITING bit ensures this won't happen.
|
|
||||||
*/
|
|
||||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
|
||||||
|
|
||||||
ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
|
|
||||||
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
|
||||||
ASSERT3U(arc_hdr_size(hdr), >, 0);
|
|
||||||
size = arc_hdr_size(hdr);
|
|
||||||
|
|
||||||
(void) refcount_add_many(&dev->l2ad_alloc, size, hdr);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Normally the L2ARC can use the hdr's data, but if
|
* Normally the L2ARC can use the hdr's data, but if
|
||||||
@ -7446,18 +7446,18 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
* lifetime of the ZIO and be cleaned up afterwards, we
|
* lifetime of the ZIO and be cleaned up afterwards, we
|
||||||
* add it to the l2arc_free_on_write queue.
|
* add it to the l2arc_free_on_write queue.
|
||||||
*/
|
*/
|
||||||
asize = vdev_psize_to_asize(dev->l2ad_vdev, size);
|
abd_t *to_write;
|
||||||
if (!HDR_SHARED_DATA(hdr) && size == asize) {
|
if (!HDR_SHARED_DATA(hdr) && psize == asize) {
|
||||||
to_write = hdr->b_l1hdr.b_pabd;
|
to_write = hdr->b_l1hdr.b_pabd;
|
||||||
} else {
|
} else {
|
||||||
to_write = abd_alloc_for_io(asize,
|
to_write = abd_alloc_for_io(asize,
|
||||||
HDR_ISTYPE_METADATA(hdr));
|
HDR_ISTYPE_METADATA(hdr));
|
||||||
abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
|
abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
|
||||||
if (asize != size) {
|
if (asize != psize) {
|
||||||
abd_zero_off(to_write, size,
|
abd_zero_off(to_write, psize,
|
||||||
asize - size);
|
asize - psize);
|
||||||
}
|
}
|
||||||
l2arc_free_abd_on_write(to_write, size,
|
l2arc_free_abd_on_write(to_write, asize,
|
||||||
arc_buf_type(hdr));
|
arc_buf_type(hdr));
|
||||||
}
|
}
|
||||||
wzio = zio_write_phys(pio, dev->l2ad_vdev,
|
wzio = zio_write_phys(pio, dev->l2ad_vdev,
|
||||||
@ -7466,12 +7466,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
ZIO_PRIORITY_ASYNC_WRITE,
|
ZIO_PRIORITY_ASYNC_WRITE,
|
||||||
ZIO_FLAG_CANFAIL, B_FALSE);
|
ZIO_FLAG_CANFAIL, B_FALSE);
|
||||||
|
|
||||||
write_sz += HDR_GET_LSIZE(hdr);
|
write_lsize += HDR_GET_LSIZE(hdr);
|
||||||
DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
|
DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
|
||||||
zio_t *, wzio);
|
zio_t *, wzio);
|
||||||
|
|
||||||
write_asize += size;
|
write_psize += psize;
|
||||||
write_psize += asize;
|
write_asize += asize;
|
||||||
dev->l2ad_hand += asize;
|
dev->l2ad_hand += asize;
|
||||||
|
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
@ -7487,7 +7487,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
|
|
||||||
/* No buffers selected for writing? */
|
/* No buffers selected for writing? */
|
||||||
if (pio == NULL) {
|
if (pio == NULL) {
|
||||||
ASSERT0(write_sz);
|
ASSERT0(write_lsize);
|
||||||
ASSERT(!HDR_HAS_L1HDR(head));
|
ASSERT(!HDR_HAS_L1HDR(head));
|
||||||
kmem_cache_free(hdr_l2only_cache, head);
|
kmem_cache_free(hdr_l2only_cache, head);
|
||||||
return (0);
|
return (0);
|
||||||
@ -7495,10 +7495,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||||||
|
|
||||||
ASSERT3U(write_asize, <=, target_sz);
|
ASSERT3U(write_asize, <=, target_sz);
|
||||||
ARCSTAT_BUMP(arcstat_l2_writes_sent);
|
ARCSTAT_BUMP(arcstat_l2_writes_sent);
|
||||||
ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
|
ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize);
|
||||||
ARCSTAT_INCR(arcstat_l2_size, write_sz);
|
ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
|
||||||
ARCSTAT_INCR(arcstat_l2_asize, write_asize);
|
ARCSTAT_INCR(arcstat_l2_psize, write_psize);
|
||||||
vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0);
|
vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bump device hand to the device start if it is approaching the end.
|
* Bump device hand to the device start if it is approaching the end.
|
||||||
|
Loading…
Reference in New Issue
Block a user