Linux/vnops: implement STATX_DIOALIGN

This statx(2) mask returns the alignment restrictions for O_DIRECT
access on the given file.

We're expected to return both memory and IO alignment. For memory, it's
always PAGE_SIZE. For IO, we return the current block size for the file,
which is the required alignment for an arbitrary block, and for the
first block we'll fall back to the ARC when necessary, so it should
always work.

Sponsored-by: https://despairlabs.com/sponsor/
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <robn@despairlabs.com>
Closes #16972
This commit is contained in:
Rob Norris
2025-03-14 04:15:14 +11:00
committed by GitHub
parent 0433523ca2
commit 13ec35ce3b
11 changed files with 548 additions and 2 deletions
+39
View File
@@ -25,6 +25,7 @@
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
* Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -1083,6 +1084,44 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
return (error);
}
/*
* Get the optimal alignment to ensure direct IO can be performed without
* incurring any RMW penalty on write. If direct IO is not enabled for this
* file, returns an error.
*/
int
zfs_get_direct_alignment(znode_t *zp, uint64_t *alignp)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
if (!zfs_dio_enabled || zfsvfs->z_os->os_direct == ZFS_DIRECT_DISABLED)
return (SET_ERROR(EOPNOTSUPP));
/*
* If the file has multiple blocks, then its block size is fixed
* forever, and so is the ideal alignment.
*
* If however it only has a single block, then we want to return the
* max block size it could possibly grown to (ie, the dataset
* recordsize). We do this so that a program querying alignment
* immediately after the file is created gets a value that won't change
* once the file has grown into the second block and beyond.
*
* Because we don't have a count of blocks easily available here, we
* check if the apparent file size is smaller than its current block
* size (meaning, the file hasn't yet grown into the current block
* size) and then, check if the block size is smaller than the dataset
* maximum (meaning, if the file grew past the current block size, the
* block size could would be increased).
*/
if (zp->z_size <= zp->z_blksz && zp->z_blksz < zfsvfs->z_max_blksz)
*alignp = MAX(zfsvfs->z_max_blksz, PAGE_SIZE);
else
*alignp = MAX(zp->z_blksz, PAGE_SIZE);
return (0);
}
#ifdef ZFS_DEBUG
static int zil_fault_io = 0;
#endif