FreeBSD: zfs_putpages: don't undirty pages until after write completes

zfs_putpages() would put the entire range of pages onto the ZIL, then
return VM_PAGER_OK for each page to the kernel. However, an associated
zil_commit() or txg sync had not happened at this point, so the write
may not actually be on disk.

So, we rework it to use a ZIL commit callback, and do the post-write
work of undirtying the page and signaling completion there. We return
VM_PAGER_PEND to the kernel instead so it knows that we will take care
of it.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Mark Johnston <markj@FreeBSD.org>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #17445
This commit is contained in:
Rob Norris 2025-06-05 10:15:26 +10:00 committed by Brian Behlendorf
parent aa964ce61b
commit 238eab7dc1
3 changed files with 47 additions and 15 deletions

View File

@ -35,6 +35,7 @@
extern const int zfs_vm_pagerret_bad; extern const int zfs_vm_pagerret_bad;
extern const int zfs_vm_pagerret_error; extern const int zfs_vm_pagerret_error;
extern const int zfs_vm_pagerret_ok; extern const int zfs_vm_pagerret_ok;
extern const int zfs_vm_pagerret_pend;
extern const int zfs_vm_pagerput_sync; extern const int zfs_vm_pagerput_sync;
extern const int zfs_vm_pagerput_inval; extern const int zfs_vm_pagerput_inval;

View File

@ -43,6 +43,7 @@
const int zfs_vm_pagerret_bad = VM_PAGER_BAD; const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
const int zfs_vm_pagerret_error = VM_PAGER_ERROR; const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
const int zfs_vm_pagerret_ok = VM_PAGER_OK; const int zfs_vm_pagerret_ok = VM_PAGER_OK;
const int zfs_vm_pagerret_pend = VM_PAGER_PEND;
const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC; const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL; const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;

View File

@ -25,6 +25,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Nexenta Systems, Inc. * Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2025, Klara, Inc.
*/ */
/* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2007 Jeremy Teo */
@ -4084,6 +4085,33 @@ zfs_freebsd_getpages(struct vop_getpages_args *ap)
ap->a_rahead)); ap->a_rahead));
} }
typedef struct {
uint_t pca_npages;
vm_page_t pca_pages[];
} putpage_commit_arg_t;
static void
zfs_putpage_commit_cb(void *arg)
{
putpage_commit_arg_t *pca = arg;
vm_object_t object = pca->pca_pages[0]->object;
zfs_vmobject_wlock(object);
for (uint_t i = 0; i < pca->pca_npages; i++) {
vm_page_t pp = pca->pca_pages[i];
vm_page_undirty(pp);
vm_page_sunbusy(pp);
}
vm_object_pip_wakeupn(object, pca->pca_npages);
zfs_vmobject_wunlock(object);
kmem_free(pca,
offsetof(putpage_commit_arg_t, pca_pages[pca->pca_npages]));
}
static int static int
zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
int *rtvals) int *rtvals)
@ -4185,10 +4213,12 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
} }
if (zp->z_blksz < PAGE_SIZE) { if (zp->z_blksz < PAGE_SIZE) {
for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { vm_ooffset_t woff = off;
tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; size_t wlen = len;
for (i = 0; wlen > 0; woff += tocopy, wlen -= tocopy, i++) {
tocopy = MIN(PAGE_SIZE, wlen);
va = zfs_map_page(ma[i], &sf); va = zfs_map_page(ma[i], &sf);
dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx);
zfs_unmap_page(sf); zfs_unmap_page(sf);
} }
} else { } else {
@ -4209,19 +4239,19 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
ASSERT0(err); ASSERT0(err);
/*
* XXX we should be passing a callback to undirty
* but that would make the locking messier
*/
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
len, commit, B_FALSE, NULL, NULL);
zfs_vmobject_wlock(object); putpage_commit_arg_t *pca = kmem_alloc(
for (i = 0; i < ncount; i++) { offsetof(putpage_commit_arg_t, pca_pages[ncount]),
rtvals[i] = zfs_vm_pagerret_ok; KM_SLEEP);
vm_page_undirty(ma[i]); pca->pca_npages = ncount;
} memcpy(pca->pca_pages, ma, sizeof (vm_page_t) * ncount);
zfs_vmobject_wunlock(object);
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp,
off, len, commit, B_FALSE, zfs_putpage_commit_cb, pca);
for (i = 0; i < ncount; i++)
rtvals[i] = zfs_vm_pagerret_pend;
VM_CNT_INC(v_vnodeout); VM_CNT_INC(v_vnodeout);
VM_CNT_ADD(v_vnodepgsout, ncount); VM_CNT_ADD(v_vnodepgsout, ncount);
} }