mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 19:28:53 +03:00
Linux: zfs_putpage: handle page writeback errors
Page writeback is considered completed when the associated itx callback completes. A syncing writeback will receive the error in its callback directly, but an in-flight async writeback that was promoted to sync by the ZIL may also receive an error. Writeback errors, even syncing writeback errors, are not especially serious on their own, because the error will ultimately be returned to the zil_commit() caller, either zfs_fsync() for an explicit sync op (eg msync()) or to zfs_putpage() itself for a syncing (WB_SYNC_ALL) writeback (kernel housekeeping or sync_file_range(SYNC_FILE_RANGE_WAIT_AFTER). The only thing we need to do when a page writeback fails is to re-mark the page dirty, since we don't know if it made it to disk yet. This will ensure that it gets written out again in the future, either some scheduled async writeback or another explicit syncing call. On the other side, we need to make sure that if a syncing op arrives, any changes on dirty pages are written back to the DMU and/or the ZIL first. We do this by starting an _async_ (WB_SYNC_NONE) writeback on the file mapping at the start of the sync op (fsync(), msync(), etc). An async op will get an async itx created and logged, ready for the followup zfs_fsync()->zil_commit() to find, while avoiding a zil_commit() call for every page in the range. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #17398
This commit is contained in:
committed by
Brian Behlendorf
parent
391e85f519
commit
3d6ee9a68c
@@ -107,6 +107,10 @@ zpl_iterate(struct file *filp, struct dir_context *ctx)
|
||||
return (error);
|
||||
}
|
||||
|
||||
static inline int
|
||||
zpl_write_cache_pages(struct address_space *mapping,
|
||||
struct writeback_control *wbc, void *data);
|
||||
|
||||
static int
|
||||
zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
@@ -116,9 +120,38 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
||||
if (error)
|
||||
return (error);
|
||||
/*
|
||||
* Force dirty pages in the range out to the DMU and the log, ready
|
||||
* for zil_commit() to write down.
|
||||
*
|
||||
* We call write_cache_pages() directly to ensure that zpl_putpage() is
|
||||
* called with the flags we need. We need WB_SYNC_NONE to avoid a call
|
||||
* to zil_commit() (since we're doing this as a kind of pre-sync); but
|
||||
* we do need for_sync so that the pages remain in writeback until
|
||||
* they're on disk, and so that we get an error if the DMU write fails.
|
||||
*/
|
||||
if (filemap_range_has_page(inode->i_mapping, start, end)) {
|
||||
int for_sync = 1;
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
.nr_to_write = LONG_MAX,
|
||||
.range_start = start,
|
||||
.range_end = end,
|
||||
};
|
||||
error =
|
||||
zpl_write_cache_pages(inode->i_mapping, &wbc, &for_sync);
|
||||
if (error != 0) {
|
||||
/*
|
||||
* Unclear what state things are in. zfs_putpage() will
|
||||
* ensure the pages remain dirty if they haven't been
|
||||
* written down to the DMU, but because there may be
|
||||
* nothing logged, we can't assume that zfs_sync() ->
|
||||
* zil_commit() will give us a useful error. It's
|
||||
* safest if we just error out here.
|
||||
*/
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
@@ -495,15 +528,25 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||
if (sync_mode != wbc->sync_mode) {
|
||||
if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||
return (result);
|
||||
if (zfsvfs->z_log != NULL)
|
||||
result = -zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
|
||||
if (zfsvfs->z_log != NULL) {
|
||||
/*
|
||||
* We don't want to block here if the pool suspends,
|
||||
* because this is not a syncing op by itself, but
|
||||
* might be part of one that the caller will
|
||||
* coordinate.
|
||||
*/
|
||||
result = -zil_commit_flags(zfsvfs->z_log, zp->z_id,
|
||||
ZIL_COMMIT_NOW);
|
||||
}
|
||||
|
||||
zpl_exit(zfsvfs, FTAG);
|
||||
|
||||
/*
|
||||
* If zil_commit() failed, it's unclear what state things
|
||||
* are currently in. putpage() has written back out what
|
||||
* it can to the DMU, but it may not be on disk. We have
|
||||
* little choice but to escape.
|
||||
* If zil_commit_flags() failed, it's unclear what state things
|
||||
* are currently in. putpage() has written back out what it can
|
||||
* to the DMU, but it may not be on disk. We have little choice
|
||||
* but to escape.
|
||||
*/
|
||||
if (result != 0)
|
||||
return (result);
|
||||
|
||||
Reference in New Issue
Block a user