From 53b1f5eac602b1d576907fa7409f91ac03d607f2 Mon Sep 17 00:00:00 2001 From: Prakash Surya Date: Tue, 23 Oct 2018 09:44:37 -0700 Subject: [PATCH] OpenZFS 9963 - Separate tunable for disabling ZIL vdev flush Porting Notes: * Add options to zfs-module-parameters(5) man page. * zfs_nocacheflush move to vdev.c instead of vdev_disk.c, since the latter doesn't get built for user space. Authored by: Prakash Surya Reviewed by: Matt Ahrens Reviewed by: Brad Lewis Reviewed by: Patrick Mooney Reviewed by: Tom Caputi Reviewed by: George Melikov Approved by: Dan McDonald Ported-by: Signed-off-by: Brian Behlendorf OpenZFS-issue: https://www.illumos.org/issues/9963 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/f8fdf68125 Closes #8186 --- man/man5/zfs-module-parameters.5 | 20 ++++++++++++++++---- module/zfs/vdev.c | 10 ++++++++++ module/zfs/zil.c | 17 +++++++++-------- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 837a7d1c3..55115c266 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1846,8 +1846,9 @@ Use \fB1\fR for yes and \fB0\fR for no (default). \fBzfs_nocacheflush\fR (int) .ad .RS 12n -Disable cache flush operations on disks when writing. Beware, this may cause -corruption if disks re-order writes. +Disable cache flush operations on disks when writing. Setting this will +cause pool corruption on power loss if a volatile out-of-order write cache +is enabled. .sp Use \fB1\fR for yes and \fB0\fR for no (default). .RE @@ -1903,8 +1904,6 @@ A value of zero will disable this throttle. Default value: \fB30\fR and \fB0\fR to disable. .RE - - .sp .ne 2 .na @@ -2527,6 +2526,19 @@ value of 100% will create a maximum of one thread per cpu. Default value: \fB100\fR%. .RE +.sp +.ne 2 +.na +\fBzil_nocacheflush\fR (int) +.ad +.RS 12n +Disable the cache flush commands that are normally sent to the disk(s) by +the ZIL after an LWB write has completed. Setting this will cause ZIL +corruption on power loss if a volatile out-of-order write cache is enabled. +.sp +Use \fB1\fR for yes and \fB0\fR for no (default). +.RE + .sp .ne 2 .na diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index ff5a15365..8273e7907 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -99,6 +99,13 @@ int zfs_scan_ignore_errors = 0; */ int vdev_standard_sm_blksz = (1 << 17); +/* + * Tunable parameter for debugging or performance analysis. Setting this + * will cause pool corruption on power loss if a volatile out-of-order + * write cache is enabled. + */ +int zfs_nocacheflush = 0; + /*PRINTFLIKE2*/ void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...) @@ -4650,5 +4657,8 @@ MODULE_PARM_DESC(zfs_scan_ignore_errors, module_param(vdev_validate_skip, int, 0644); MODULE_PARM_DESC(vdev_validate_skip, "Bypass vdev_validate()"); + +module_param(zfs_nocacheflush, int, 0644); +MODULE_PARM_DESC(zfs_nocacheflush, "Disable cache flushes"); /* END CSTYLED */ #endif diff --git a/module/zfs/zil.c b/module/zfs/zil.c index d5ebf7561..a453c26c3 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -118,11 +118,12 @@ static kstat_t *zil_ksp; int zil_replay_disable = 0; /* - * Tunable parameter for debugging or performance analysis. Setting - * zfs_nocacheflush will cause corruption on power loss if a volatile - * out-of-order write cache is enabled. + * Disable the DKIOCFLUSHWRITECACHE commands that are normally sent to + * the disk(s) by the ZIL after an LWB write has completed. Setting this + * will cause ZIL corruption on power loss if a volatile out-of-order + * write cache is enabled. */ -int zfs_nocacheflush = 0; +int zil_nocacheflush = 0; /* * Limit SLOG write size per commit executed with synchronous priority. @@ -1041,7 +1042,7 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) int ndvas = BP_GET_NDVAS(bp); int i; - if (zfs_nocacheflush) + if (zil_nocacheflush) return; mutex_enter(&lwb->lwb_vdev_lock); @@ -1065,7 +1066,7 @@ zil_lwb_add_txg(lwb_t *lwb, uint64_t txg) /* * This function is a called after all VDEVs associated with a given lwb * write have completed their DKIOCFLUSHWRITECACHE command; or as soon - * as the lwb write completes, if "zfs_nocacheflush" is set. + * as the lwb write completes, if "zil_nocacheflush" is set. * * The intention is for this function to be called as soon as the * contents of an lwb are considered "stable" on disk, and will survive @@ -3513,8 +3514,8 @@ MODULE_PARM_DESC(zfs_commit_timeout_pct, "ZIL block open timeout percentage"); module_param(zil_replay_disable, int, 0644); MODULE_PARM_DESC(zil_replay_disable, "Disable intent logging replay"); -module_param(zfs_nocacheflush, int, 0644); -MODULE_PARM_DESC(zfs_nocacheflush, "Disable cache flushes"); +module_param(zil_nocacheflush, int, 0644); +MODULE_PARM_DESC(zil_nocacheflush, "Disable ZIL cache flushes"); module_param(zil_slog_bulk, ulong, 0644); MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit");