From 19ea3d25df99995d2e62592cd6bc74f883f0e8e0 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 29 Jan 2015 15:09:51 -0800 Subject: [PATCH] Use zio buffers in zil_itx_create() The zil_itx_create() function uses the vmem_alloc() allocator for its buffers because when logging a write that buffer may be as large as 64K. This is non-optimal because we may need to allocate many of of these buffers and this interface has the potential to be slow. Instead, use zio_data_buf_alloc() which is specifically designed to be able to efficiently allocate a wide range of buffer sizes. In addition, do some cleanup and use the zil_itx_destroy() function to always free an itx structure. This way we're always sure the right allocation functions are used. Notice that in the current code kmem_free() and vmem_free() were both used. This happened to work because these wrappers map to the same internal SPL function. This was identified as a potential problem when a low-end memory constrained system began logging the following warnings. There was no deadlock here just repeated allocation failures resulting in increased latency. Possible memory allocation deadlock: size=65792 lflags=0x42d0 Pid: 20118, comm: kvm Tainted: P O 3.2.0-0.bpo.4-amd64 Call Trace: [] ? spl_kmem_alloc_impl+0x115/0x127 [spl] [] ? spl_kmem_alloc_debug+0x9/0x36 [spl] [] ? zil_itx_create+0x2d/0x59 [zfs] [] ? zfs_log_write+0x13a/0x2f0 [zfs] [] ? zfs_write+0x85b/0x9bb [zfs] [] ? zpl_aio_write+0xca/0x110 [zfs] [] ? do_sync_readv_writev+0xa3/0xde [] ? do_readv_writev+0xaf/0x125 [] ? sys_pwritev+0x55/0x9a [] ? system_call_fastpath+0x16/0x1b Signed-off-by: Brian Behlendorf Signed-off-by: Richard Yao Closes #3059 --- module/zfs/zil.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 9bf8c5e5d..15897b363 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1192,7 +1192,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize) lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t); - itx = vmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP); + itx = zio_data_buf_alloc(offsetof(itx_t, itx_lr) + lrsize); itx->itx_lr.lrc_txtype = txtype; itx->itx_lr.lrc_reclen = lrsize; itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */ @@ -1207,7 +1207,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize) void zil_itx_destroy(itx_t *itx) { - vmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); + zio_data_buf_free(itx, offsetof(itx_t, itx_lr)+itx->itx_lr.lrc_reclen); } /* @@ -1228,8 +1228,7 @@ zil_itxg_clean(itxs_t *itxs) if (itx->itx_callback != NULL) itx->itx_callback(itx->itx_callback_data); list_remove(list, itx); - kmem_free(itx, offsetof(itx_t, itx_lr) + - itx->itx_lr.lrc_reclen); + zil_itx_destroy(itx); } cookie = NULL; @@ -1240,8 +1239,7 @@ zil_itxg_clean(itxs_t *itxs) if (itx->itx_callback != NULL) itx->itx_callback(itx->itx_callback_data); list_remove(list, itx); - kmem_free(itx, offsetof(itx_t, itx_lr) + - itx->itx_lr.lrc_reclen); + zil_itx_destroy(itx); } list_destroy(list); kmem_free(ian, sizeof (itx_async_node_t)); @@ -1308,8 +1306,7 @@ zil_remove_async(zilog_t *zilog, uint64_t oid) if (itx->itx_callback != NULL) itx->itx_callback(itx->itx_callback_data); list_remove(&clean_list, itx); - kmem_free(itx, offsetof(itx_t, itx_lr) + - itx->itx_lr.lrc_reclen); + zil_itx_destroy(itx); } list_destroy(&clean_list); } @@ -1589,8 +1586,7 @@ zil_commit_writer(zilog_t *zilog) if (itx->itx_callback != NULL) itx->itx_callback(itx->itx_callback_data); list_remove(&zilog->zl_itx_commit_list, itx); - kmem_free(itx, offsetof(itx_t, itx_lr) - + itx->itx_lr.lrc_reclen); + zil_itx_destroy(itx); } mutex_enter(&zilog->zl_lock);