From 66aca24730adfb2e3875e5148a03dd1fb435d438 Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Fri, 24 Mar 2017 17:28:38 -0400 Subject: [PATCH] SEEK_HOLE should not block on txg_wait_synced() Force flushing of txg's can be painfully slow when competing for disk IO, since this is a process meant to execute asynchronously. Optimize this path via allowing data/hole seeking if the file is clean, but if dirty fall back to old logic. This is a compromise to disabling the feature entirely. Reviewed-by: Giuseppe Di Natale Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: Debabrata Banerjee Closes #4306 Closes #5962 --- man/man5/zfs-module-parameters.5 | 14 +++++++++ module/zfs/dmu.c | 49 +++++++++++++++++++++++++++----- module/zfs/zfs_vnops.c | 4 +++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 1ab43cc06..72ff0686c 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1427,6 +1427,20 @@ Enable NOP writes Use \fB1\fR for yes (default) and \fB0\fR to disable. .RE +.sp +.ne 2 +.na +\fBzfs_dmu_offset_next_sync\fR (int) +.ad +.RS 12n +Enable forcing txg sync to find holes. When enabled forces ZFS to act +like prior versions when SEEK_HOLE or SEEK_DATA flags are used, which +when a dnode is dirty causes txg's to be synced so that this data can be +found. +.sp +Use \fB1\fR for yes and \fB0\fR to disable (default). +.RE + .sp .ne 2 .na diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 4e62e0435..4929ef9ab 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -67,6 +67,11 @@ int zfs_nopwrite_enabled = 1; */ unsigned long zfs_per_txg_dirty_frees_percent = 30; +/* + * Enable/disable forcing txg sync when dirty in dmu_offset_next. + */ +int zfs_dmu_offset_next_sync = 0; + const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { { DMU_BSWAP_UINT8, TRUE, "unallocated" }, { DMU_BSWAP_ZAP, TRUE, "object directory" }, @@ -1989,24 +1994,43 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zp->zp_nopwrite = nopwrite; } +/* + * This function is only called from zfs_holey_common() for zpl_llseek() + * in order to determine the location of holes. In order to accurately + * report holes all dirty data must be synced to disk. This causes extremely + * poor performance when seeking for holes in a dirty file. As a compromise, + * only provide hole data when the dnode is clean. When a dnode is dirty + * report the dnode as having no holes which is always a safe thing to do. + */ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; int i, err; + boolean_t clean = B_TRUE; err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); + /* - * Sync any current changes before + * Check if dnode is dirty + */ + if (dn->dn_dirtyctx != DN_UNDIRTIED) { + for (i = 0; i < TXG_SIZE; i++) { + if (!list_is_empty(&dn->dn_dirty_records[i])) { + clean = B_FALSE; + break; + } + } + } + + /* + * If compatibility option is on, sync any current changes before * we go trundling through the block pointers. */ - for (i = 0; i < TXG_SIZE; i++) { - if (list_link_active(&dn->dn_dirty_link[i])) - break; - } - if (i != TXG_SIZE) { + if (!clean && zfs_dmu_offset_next_sync) { + clean = B_TRUE; dnode_rele(dn, FTAG); txg_wait_synced(dmu_objset_pool(os), 0); err = dnode_hold(os, object, FTAG, &dn); @@ -2014,7 +2038,12 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) return (err); } - err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); + if (clean) + err = dnode_next_offset(dn, + (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); + else + err = SET_ERROR(EBUSY); + dnode_rele(dn, FTAG); return (err); @@ -2238,5 +2267,11 @@ MODULE_PARM_DESC(zfs_nopwrite_enabled, "Enable NOP writes"); module_param(zfs_per_txg_dirty_frees_percent, ulong, 0644); MODULE_PARM_DESC(zfs_per_txg_dirty_frees_percent, "percentage of dirtied blocks from frees in one TXG"); + +module_param(zfs_dmu_offset_next_sync, int, 0644); +MODULE_PARM_DESC(zfs_dmu_offset_next_sync, + "Enable forcing txg sync to find holes"); + /* END CSTYLED */ + #endif diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 4afae6c36..72a3104c7 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -278,6 +278,10 @@ zfs_holey_common(struct inode *ip, int cmd, loff_t *off) if (error == ESRCH) return (SET_ERROR(ENXIO)); + /* file was dirty, so fall back to using file_sz logic */ + if (error == EBUSY) + error = 0; + /* * We could find a hole that begins after the logical end-of-file, * because dmu_offset_next() only works on whole blocks. If the