mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-25 02:49:32 +03:00
fix l2arc compression buffers leak
Commit log from FreeBSD: We have observed that arc_release() can be called concurrently with a l2arc in-flight write. Also, we have observed that arc_hdr_destroy() can be called from arc_write_done() for a zio with ZIO_FLAG_IO_REWRITE flag in similar circumstances. Previously the l2arc headers would be freed while leaking their associated compression buffers. Now the buffers are placed on l2arc_free_on_write list for delayed freeing. This is similar to what was already done to arc buffers that were supposed to be freed concurrently with in-flight writes of those buffers. In addition to fixing the discovered leaks this change also adds some protective code to assert that a compression buffer associated with a l2arc header is never leaked. A new kstat l2_cdata_free_on_write is added. It keeps a count of delayed compression buffer frees which previously would have been leaks. Tested by: Vitalij Satanivskij <satan@ukr.net> et al Requested by: many MFC after: 2 weeks Sponsored by: HybridCluster / ClusterHQ References: https://illumos.org/issues/5222 https://github.com/freebsd/freebsd/commit/b98f85d http://thread.gmane.org/gmane.os.freebsd.current/155757/focus=155781 http://lists.open-zfs.org/pipermail/developer/2014-January/000455.html http://lists.open-zfs.org/pipermail/developer/2014-February/000523.html Ported-by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3029
This commit is contained in:
parent
19ea3d25df
commit
037763e44e
@ -304,6 +304,7 @@ typedef struct arc_stats {
|
|||||||
kstat_named_t arcstat_l2_evict_lock_retry;
|
kstat_named_t arcstat_l2_evict_lock_retry;
|
||||||
kstat_named_t arcstat_l2_evict_reading;
|
kstat_named_t arcstat_l2_evict_reading;
|
||||||
kstat_named_t arcstat_l2_free_on_write;
|
kstat_named_t arcstat_l2_free_on_write;
|
||||||
|
kstat_named_t arcstat_l2_cdata_free_on_write;
|
||||||
kstat_named_t arcstat_l2_abort_lowmem;
|
kstat_named_t arcstat_l2_abort_lowmem;
|
||||||
kstat_named_t arcstat_l2_cksum_bad;
|
kstat_named_t arcstat_l2_cksum_bad;
|
||||||
kstat_named_t arcstat_l2_io_error;
|
kstat_named_t arcstat_l2_io_error;
|
||||||
@ -392,6 +393,7 @@ static arc_stats_t arc_stats = {
|
|||||||
{ "l2_evict_lock_retry", KSTAT_DATA_UINT64 },
|
{ "l2_evict_lock_retry", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_evict_reading", KSTAT_DATA_UINT64 },
|
{ "l2_evict_reading", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_free_on_write", KSTAT_DATA_UINT64 },
|
{ "l2_free_on_write", KSTAT_DATA_UINT64 },
|
||||||
|
{ "l2_cdata_free_on_write", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_abort_lowmem", KSTAT_DATA_UINT64 },
|
{ "l2_abort_lowmem", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_cksum_bad", KSTAT_DATA_UINT64 },
|
{ "l2_cksum_bad", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_io_error", KSTAT_DATA_UINT64 },
|
{ "l2_io_error", KSTAT_DATA_UINT64 },
|
||||||
@ -1466,6 +1468,21 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
|
|||||||
data, metadata, hits);
|
data, metadata, hits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
arc_buf_free_on_write(void *data, size_t size,
|
||||||
|
void (*free_func)(void *, size_t))
|
||||||
|
{
|
||||||
|
l2arc_data_free_t *df;
|
||||||
|
|
||||||
|
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
|
||||||
|
df->l2df_data = data;
|
||||||
|
df->l2df_size = size;
|
||||||
|
df->l2df_func = free_func;
|
||||||
|
mutex_enter(&l2arc_free_on_write_mtx);
|
||||||
|
list_insert_head(l2arc_free_on_write, df);
|
||||||
|
mutex_exit(&l2arc_free_on_write_mtx);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free the arc data buffer. If it is an l2arc write in progress,
|
* Free the arc data buffer. If it is an l2arc write in progress,
|
||||||
* the buffer is placed on l2arc_free_on_write to be freed later.
|
* the buffer is placed on l2arc_free_on_write to be freed later.
|
||||||
@ -1476,14 +1493,7 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
|
|||||||
arc_buf_hdr_t *hdr = buf->b_hdr;
|
arc_buf_hdr_t *hdr = buf->b_hdr;
|
||||||
|
|
||||||
if (HDR_L2_WRITING(hdr)) {
|
if (HDR_L2_WRITING(hdr)) {
|
||||||
l2arc_data_free_t *df;
|
arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
|
||||||
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
|
|
||||||
df->l2df_data = buf->b_data;
|
|
||||||
df->l2df_size = hdr->b_size;
|
|
||||||
df->l2df_func = free_func;
|
|
||||||
mutex_enter(&l2arc_free_on_write_mtx);
|
|
||||||
list_insert_head(l2arc_free_on_write, df);
|
|
||||||
mutex_exit(&l2arc_free_on_write_mtx);
|
|
||||||
ARCSTAT_BUMP(arcstat_l2_free_on_write);
|
ARCSTAT_BUMP(arcstat_l2_free_on_write);
|
||||||
} else {
|
} else {
|
||||||
free_func(buf->b_data, hdr->b_size);
|
free_func(buf->b_data, hdr->b_size);
|
||||||
@ -1494,6 +1504,23 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
|
|||||||
* Free up buf->b_data and if 'remove' is set, then pull the
|
* Free up buf->b_data and if 'remove' is set, then pull the
|
||||||
* arc_buf_t off of the the arc_buf_hdr_t's list and free it.
|
* arc_buf_t off of the the arc_buf_hdr_t's list and free it.
|
||||||
*/
|
*/
|
||||||
|
static void
|
||||||
|
arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
|
||||||
|
{
|
||||||
|
l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
|
||||||
|
|
||||||
|
if (l2hdr->b_tmp_cdata == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ASSERT(HDR_L2_WRITING(hdr));
|
||||||
|
arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
|
||||||
|
zio_data_buf_free);
|
||||||
|
ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
|
||||||
|
l2hdr->b_tmp_cdata = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
|
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
|
||||||
{
|
{
|
||||||
@ -1589,6 +1616,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
|
|||||||
|
|
||||||
if (l2hdr != NULL) {
|
if (l2hdr != NULL) {
|
||||||
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
|
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
|
||||||
|
arc_buf_l2_cdata_free(hdr);
|
||||||
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
|
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
|
||||||
ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
|
ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
|
||||||
vdev_space_update(l2hdr->b_dev->l2ad_vdev,
|
vdev_space_update(l2hdr->b_dev->l2ad_vdev,
|
||||||
@ -3602,6 +3630,7 @@ arc_release(arc_buf_t *buf, void *tag)
|
|||||||
l2hdr = hdr->b_l2hdr;
|
l2hdr = hdr->b_l2hdr;
|
||||||
if (l2hdr) {
|
if (l2hdr) {
|
||||||
mutex_enter(&l2arc_buflist_mtx);
|
mutex_enter(&l2arc_buflist_mtx);
|
||||||
|
arc_buf_l2_cdata_free(hdr);
|
||||||
hdr->b_l2hdr = NULL;
|
hdr->b_l2hdr = NULL;
|
||||||
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
|
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
|
||||||
}
|
}
|
||||||
@ -4843,6 +4872,11 @@ top:
|
|||||||
ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
|
ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
|
||||||
bytes_evicted += abl2->b_asize;
|
bytes_evicted += abl2->b_asize;
|
||||||
ab->b_l2hdr = NULL;
|
ab->b_l2hdr = NULL;
|
||||||
|
/*
|
||||||
|
* We are destroying l2hdr, so ensure that
|
||||||
|
* its compressed buffer, if any, is not leaked.
|
||||||
|
*/
|
||||||
|
ASSERT(abl2->b_tmp_cdata == NULL);
|
||||||
kmem_cache_free(l2arc_hdr_cache, abl2);
|
kmem_cache_free(l2arc_hdr_cache, abl2);
|
||||||
arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
|
arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
|
||||||
ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
|
ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
|
||||||
@ -5077,6 +5111,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
|
|||||||
buf_data = l2hdr->b_tmp_cdata;
|
buf_data = l2hdr->b_tmp_cdata;
|
||||||
buf_sz = l2hdr->b_asize;
|
buf_sz = l2hdr->b_asize;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the data has not been compressed, then clear b_tmp_cdata
|
||||||
|
* to make sure that it points only to a temporary compression
|
||||||
|
* buffer.
|
||||||
|
*/
|
||||||
|
if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
|
||||||
|
l2hdr->b_tmp_cdata = NULL;
|
||||||
|
|
||||||
/* Compression may have squashed the buffer to zero length. */
|
/* Compression may have squashed the buffer to zero length. */
|
||||||
if (buf_sz != 0) {
|
if (buf_sz != 0) {
|
||||||
uint64_t buf_p_sz;
|
uint64_t buf_p_sz;
|
||||||
@ -5267,15 +5309,18 @@ l2arc_release_cdata_buf(arc_buf_hdr_t *ab)
|
|||||||
{
|
{
|
||||||
l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
|
l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
|
||||||
|
|
||||||
if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
|
ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
|
||||||
|
if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
|
||||||
/*
|
/*
|
||||||
* If the data was compressed, then we've allocated a
|
* If the data was compressed, then we've allocated a
|
||||||
* temporary buffer for it, so now we need to release it.
|
* temporary buffer for it, so now we need to release it.
|
||||||
*/
|
*/
|
||||||
ASSERT(l2hdr->b_tmp_cdata != NULL);
|
ASSERT(l2hdr->b_tmp_cdata != NULL);
|
||||||
zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
|
zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
|
||||||
|
l2hdr->b_tmp_cdata = NULL;
|
||||||
|
} else {
|
||||||
|
ASSERT(l2hdr->b_tmp_cdata == NULL);
|
||||||
}
|
}
|
||||||
l2hdr->b_tmp_cdata = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user