mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-01-25 10:12:13 +03:00
zvol: Fix blk-mq sync
The zvol blk-mq codepaths would erroneously send FLUSH and TRIM commands down the read codepath, rather than write. This fixes the issue, and updates the zvol_misc_fua test to verify that sync writes are actually happening. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Reviewed-by: Ameer Hamza <ahamza@ixsystems.com> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #17761 Closes #17765
This commit is contained in:
parent
a9bcf4faf3
commit
9079f986ae
@ -542,24 +542,6 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
|
|||||||
}
|
}
|
||||||
#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
|
#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
|
||||||
|
|
||||||
/*
|
|
||||||
* All the io_*() helper functions below can operate on a bio, or a rq, but
|
|
||||||
* not both. The older submit_bio() codepath will pass a bio, and the
|
|
||||||
* newer blk-mq codepath will pass a rq.
|
|
||||||
*/
|
|
||||||
static inline int
|
|
||||||
io_data_dir(struct bio *bio, struct request *rq)
|
|
||||||
{
|
|
||||||
if (rq != NULL) {
|
|
||||||
if (op_is_write(req_op(rq))) {
|
|
||||||
return (WRITE);
|
|
||||||
} else {
|
|
||||||
return (READ);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (bio_data_dir(bio));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
io_is_flush(struct bio *bio, struct request *rq)
|
io_is_flush(struct bio *bio, struct request *rq)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -484,7 +484,28 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
|||||||
fstrans_cookie_t cookie = spl_fstrans_mark();
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
||||||
uint64_t offset = io_offset(bio, rq);
|
uint64_t offset = io_offset(bio, rq);
|
||||||
uint64_t size = io_size(bio, rq);
|
uint64_t size = io_size(bio, rq);
|
||||||
int rw = io_data_dir(bio, rq);
|
int rw;
|
||||||
|
|
||||||
|
if (rq != NULL) {
|
||||||
|
/*
|
||||||
|
* Flush & trim requests go down the zvol_write codepath. Or
|
||||||
|
* more specifically:
|
||||||
|
*
|
||||||
|
* If request is a write, or if it's op_is_sync() and not a
|
||||||
|
* read, or if it's a flush, or if it's a discard, then send the
|
||||||
|
* request down the write path.
|
||||||
|
*/
|
||||||
|
if (op_is_write(rq->cmd_flags) ||
|
||||||
|
(op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) ||
|
||||||
|
req_op(rq) == REQ_OP_FLUSH ||
|
||||||
|
op_is_discard(rq->cmd_flags)) {
|
||||||
|
rw = WRITE;
|
||||||
|
} else {
|
||||||
|
rw = READ;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rw = bio_data_dir(bio);
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(zv->zv_flags & ZVOL_REMOVING)) {
|
if (unlikely(zv->zv_flags & ZVOL_REMOVING)) {
|
||||||
zvol_end_io(bio, rq, SET_ERROR(ENXIO));
|
zvol_end_io(bio, rq, SET_ERROR(ENXIO));
|
||||||
|
|||||||
@ -50,17 +50,53 @@ fi
|
|||||||
|
|
||||||
typeset datafile1="$(mktemp -t zvol_misc_fua1.XXXXXX)"
|
typeset datafile1="$(mktemp -t zvol_misc_fua1.XXXXXX)"
|
||||||
typeset datafile2="$(mktemp -t zvol_misc_fua2.XXXXXX)"
|
typeset datafile2="$(mktemp -t zvol_misc_fua2.XXXXXX)"
|
||||||
|
typeset datafile3="$(mktemp -t zvol_misc_fua3_log.XXXXXX)"
|
||||||
typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
|
typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
|
||||||
|
|
||||||
|
typeset DISK1=${DISKS%% *}
|
||||||
function cleanup
|
function cleanup
|
||||||
{
|
{
|
||||||
rm "$datafile1" "$datafile2"
|
log_must zpool remove $TESTPOOL $datafile3
|
||||||
|
rm "$datafile1" "$datafile2" "$datafile2"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prints the total number of sync writes for a vdev
|
||||||
|
# $1: vdev
|
||||||
|
function get_sync
|
||||||
|
{
|
||||||
|
zpool iostat -p -H -v -r $TESTPOOL $1 | \
|
||||||
|
awk '/[0-9]+$/{s+=$4+$5} END{print s}'
|
||||||
}
|
}
|
||||||
|
|
||||||
function do_test {
|
function do_test {
|
||||||
# Wait for udev to create symlinks to our zvol
|
# Wait for udev to create symlinks to our zvol
|
||||||
block_device_wait $zvolpath
|
block_device_wait $zvolpath
|
||||||
|
|
||||||
|
# Write using sync (creates FLUSH calls after writes, but not FUA)
|
||||||
|
old_vdev_writes=$(get_sync $DISK1)
|
||||||
|
old_log_writes=$(get_sync $datafile3)
|
||||||
|
|
||||||
|
log_must fio --name=write_iops --size=5M \
|
||||||
|
--ioengine=libaio --verify=0 --bs=4K \
|
||||||
|
--iodepth=1 --rw=randwrite --group_reporting=1 \
|
||||||
|
--filename=$zvolpath --sync=1
|
||||||
|
|
||||||
|
vdev_writes=$(( $(get_sync $DISK1) - $old_vdev_writes))
|
||||||
|
log_writes=$(( $(get_sync $datafile3) - $old_log_writes))
|
||||||
|
|
||||||
|
# When we're doing sync writes, we should see many more writes go to
|
||||||
|
# the log vs the first vdev. Experiments show anywhere from a 160-320x
|
||||||
|
# ratio of writes to the log vs the first vdev (due to some straggler
|
||||||
|
# writes to the first vdev).
|
||||||
|
#
|
||||||
|
# Check that we have a large ratio (100x) of sync writes going to the
|
||||||
|
# log device
|
||||||
|
ratio=$(($log_writes / $vdev_writes))
|
||||||
|
log_note "Got $log_writes log writes, $vdev_writes vdev writes."
|
||||||
|
if [ $ratio -lt 100 ] ; then
|
||||||
|
log_fail "Expected > 100x more log writes than vdev writes. "
|
||||||
|
fi
|
||||||
|
|
||||||
# Create a data file
|
# Create a data file
|
||||||
log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
|
log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
|
||||||
|
|
||||||
@ -81,6 +117,8 @@ log_assert "Verify that a ZFS volume can do Force Unit Access (FUA)"
|
|||||||
log_onexit cleanup
|
log_onexit cleanup
|
||||||
|
|
||||||
log_must zfs set compression=off $TESTPOOL/$TESTVOL
|
log_must zfs set compression=off $TESTPOOL/$TESTVOL
|
||||||
|
log_must truncate -s 100M $datafile3
|
||||||
|
log_must zpool add $TESTPOOL log $datafile3
|
||||||
|
|
||||||
log_note "Testing without blk-mq"
|
log_note "Testing without blk-mq"
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user