diff --git a/config/kernel-bdev-physical-size.m4 b/config/kernel-bdev-physical-size.m4 new file mode 100644 index 000000000..0a1fe8e26 --- /dev/null +++ b/config/kernel-bdev-physical-size.m4 @@ -0,0 +1,39 @@ +dnl # +dnl # 2.6.30 API change +dnl # +dnl # The bdev_physical_block_size() interface was added to provide a way +dnl # to determine the smallest write which can be performed without a +dnl # read-modify-write operation. From the kernel documentation: +dnl # +dnl # What: /sys/block//queue/physical_block_size +dnl # Date: May 2009 +dnl # Contact: Martin K. Petersen +dnl # Description: +dnl # This is the smallest unit the storage device can write +dnl # without resorting to read-modify-write operation. It is +dnl # usually the same as the logical block size but may be +dnl # bigger. One example is SATA drives with 4KB sectors +dnl # that expose a 512-byte logical block size to the +dnl # operating system. +dnl # +dnl # Unfortunately, this interface isn't entirely reliable because +dnl # drives are sometimes known to misreport this value. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE], [ + AC_MSG_CHECKING([whether bdev_physical_block_size() is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Wno-unused-but-set-variable" + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + struct block_device *bdev = NULL; + bdev_physical_block_size(bdev); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDEV_PHYSICAL_BLOCK_SIZE, 1, + [bdev_physical_block_size() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index d10c6e628..71b0161a8 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -14,6 +14,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE + ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE ZFS_AC_KERNEL_BIO_EMPTY_BARRIER ZFS_AC_KERNEL_BIO_FAILFAST ZFS_AC_KERNEL_BIO_FAILFAST_DTD diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index a5294ceba..1ff8eeaf3 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -394,13 +394,27 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags) /* * 2.6.30 API change - * Change to make it explicit there this is the logical block size. + * To ensure good performance preferentially use the physical block size + * for proper alignment. The physical size is supposed to be the internal + * sector size used by the device. This is often 4096 byte for AF devices, + * while a smaller 512 byte logical size is supported for compatibility. + * + * Unfortunately, many drives still misreport their physical sector size. + * For devices which are known to lie you may need to manually set this + * at pool creation time with 'zpool create -o ashift=12 ...'. + * + * When the physical block size interface isn't available, we fall back to + * the logical block size interface and then the older hard sector size. */ -#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE -# define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev) +#ifdef HAVE_BDEV_PHYSICAL_BLOCK_SIZE +# define vdev_bdev_block_size(bdev) bdev_physical_block_size(bdev) #else -# define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev) -#endif +# ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE +# define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev) +# else +# define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev) +# endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */ +#endif /* HAVE_BDEV_PHYSICAL_BLOCK_SIZE */ /* * 2.6.37 API change diff --git a/zfs_config.h.in b/zfs_config.h.in index 260bd5ab7..9ea8d2c53 100644 --- a/zfs_config.h.in +++ b/zfs_config.h.in @@ -24,6 +24,9 @@ /* bdev_logical_block_size() is available */ #undef HAVE_BDEV_LOGICAL_BLOCK_SIZE +/* bdev_physical_block_size() is available */ +#undef HAVE_BDEV_PHYSICAL_BLOCK_SIZE + /* struct super_block has s_bdi */ #undef HAVE_BDI