87 lines
3.2 KiB
Diff
87 lines
3.2 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: lidongyang <gnaygnodil@gmail.com>
|
||
|
Date: Sat, 23 Dec 2017 05:19:51 +1100
|
||
|
Subject: [PATCH] Call commit callbacks from the tail of the list
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
Our zfs backed Lustre MDT had soft lockups while under heavy metadata
|
||
|
workloads while handling transaction callbacks from osd_zfs.
|
||
|
|
||
|
The problem is zfs is not taking advantage of the fast path in
|
||
|
Lustre's trans callback handling, where Lustre will skip the calls
|
||
|
to ptlrpc_commit_replies() when it already saw a higher transaction
|
||
|
number.
|
||
|
|
||
|
This patch corrects this, it also has a positive impact on metadata
|
||
|
performance on Lustre with osd_zfs, plus some cleanup in the headers.
|
||
|
|
||
|
A similar issue for ext4/ldiskfs is described on:
|
||
|
https://jira.hpdd.intel.com/browse/LU-6527
|
||
|
|
||
|
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
|
||
|
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||
|
Signed-off-by: Li Dongyang <dongyang.li@anu.edu.au>
|
||
|
Closes #6986
|
||
|
(cherry picked from commit 8d82a19def540bba43c8c7597142ff53f7a0b7e5)
|
||
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||
|
---
|
||
|
include/sys/dmu.h | 5 +++++
|
||
|
include/sys/dmu_tx.h | 4 ----
|
||
|
module/zfs/dmu_tx.c | 2 +-
|
||
|
3 files changed, 6 insertions(+), 5 deletions(-)
|
||
|
|
||
|
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
|
||
|
index d24615262..bcdf7d646 100644
|
||
|
--- a/include/sys/dmu.h
|
||
|
+++ b/include/sys/dmu.h
|
||
|
@@ -713,11 +713,16 @@ void dmu_tx_mark_netfree(dmu_tx_t *tx);
|
||
|
* to stable storage and will also be called if the dmu_tx is aborted.
|
||
|
* If there is any error which prevents the transaction from being committed to
|
||
|
* disk, the callback will be called with a value of error != 0.
|
||
|
+ *
|
||
|
+ * When multiple callbacks are registered to the transaction, the callbacks
|
||
|
+ * will be called in reverse order to let Lustre, the only user of commit
|
||
|
+ * callback currently, take the fast path of its commit callback handling.
|
||
|
*/
|
||
|
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
|
||
|
|
||
|
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||
|
void *dcb_data);
|
||
|
+void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
||
|
|
||
|
/*
|
||
|
* Free up the data blocks for a defined range of a file. If size is
|
||
|
diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
|
||
|
index f16e1e858..d82a79310 100644
|
||
|
--- a/include/sys/dmu_tx.h
|
||
|
+++ b/include/sys/dmu_tx.h
|
||
|
@@ -145,10 +145,6 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||
|
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
|
||
|
void dmu_tx_wait(dmu_tx_t *tx);
|
||
|
|
||
|
-void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||
|
- void *dcb_data);
|
||
|
-void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
||
|
-
|
||
|
/*
|
||
|
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||
|
*/
|
||
|
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
|
||
|
index 097fa774a..c3cc03a69 100644
|
||
|
--- a/module/zfs/dmu_tx.c
|
||
|
+++ b/module/zfs/dmu_tx.c
|
||
|
@@ -1200,7 +1200,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
|
||
|
{
|
||
|
dmu_tx_callback_t *dcb;
|
||
|
|
||
|
- while ((dcb = list_head(cb_list)) != NULL) {
|
||
|
+ while ((dcb = list_tail(cb_list)) != NULL) {
|
||
|
list_remove(cb_list, dcb);
|
||
|
dcb->dcb_func(dcb->dcb_data, error);
|
||
|
kmem_free(dcb, sizeof (dmu_tx_callback_t));
|
||
|
--
|
||
|
2.14.2
|
||
|
|