Fix volume WR_INDIRECT log replay (#6620)

The portion of the zvol_replay_write() handler responsible for
replaying indirect log records for some reason never existed.
As a result indirect log records were not being correctly replayed.

This went largely unnoticed since the majority of zvol log records
were of the type WR_COPIED or WR_NEED_COPY prior to OpenZFS 7578.

This patch updates zvol_replay_write() to correctly handle these
log records and adds a new test case which verifies volume replay
to prevent any regression.  The existing test case which verified
replay on filesystem was renamed slog_replay_fs.ksh for clarity.

Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #6603
This commit is contained in:
Brian Behlendorf 2017-09-13 16:04:16 -07:00 committed by Tony Hutter
parent 45d1abc74d
commit a2a0440918
13 changed files with 223 additions and 43 deletions

View File

@ -599,26 +599,37 @@ static int
zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
{ {
objset_t *os = zv->zv_objset; objset_t *os = zv->zv_objset;
char *data = (char *)(lr + 1); /* data follows lr_write_t */ char *data = (char *)(lr + 1); /* data follows lr_write_t */
uint64_t off = lr->lr_offset; uint64_t offset, length;
uint64_t len = lr->lr_length;
dmu_tx_t *tx; dmu_tx_t *tx;
int error; int error;
if (byteswap) if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr)); byteswap_uint64_array(lr, sizeof (*lr));
offset = lr->lr_offset;
length = lr->lr_length;
/* If it's a dmu_sync() block, write the whole block */
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
if (length < blocksize) {
offset -= offset % blocksize;
length = blocksize;
}
}
tx = dmu_tx_create(os); tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
error = dmu_tx_assign(tx, TXG_WAIT); error = dmu_tx_assign(tx, TXG_WAIT);
if (error) { if (error) {
dmu_tx_abort(tx); dmu_tx_abort(tx);
} else { } else {
dmu_write(os, ZVOL_OBJ, off, len, data, tx); dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
dmu_tx_commit(tx); dmu_tx_commit(tx);
} }
return (SET_ERROR(error)); return (error);
} }
static int static int

View File

@ -224,8 +224,8 @@ constrain_path() {
# Exceptions # Exceptions
ln -fs "$STF_PATH/awk" "$STF_PATH/nawk" ln -fs "$STF_PATH/awk" "$STF_PATH/nawk"
ln -fs /sbin/fsck.ext2 "$STF_PATH/fsck" ln -fs /sbin/fsck.ext4 "$STF_PATH/fsck"
ln -fs /sbin/mkfs.ext2 "$STF_PATH/newfs" ln -fs /sbin/mkfs.ext4 "$STF_PATH/newfs"
ln -fs "$STF_PATH/gzip" "$STF_PATH/compress" ln -fs "$STF_PATH/gzip" "$STF_PATH/compress"
ln -fs "$STF_PATH/gunzip" "$STF_PATH/uncompress" ln -fs "$STF_PATH/gunzip" "$STF_PATH/uncompress"
ln -fs "$STF_PATH/exportfs" "$STF_PATH/share" ln -fs "$STF_PATH/exportfs" "$STF_PATH/share"

View File

@ -511,7 +511,7 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
'slog_013_pos', 'slog_014_pos', 'slog_015_pos'] 'slog_013_pos', 'slog_014_pos', 'slog_replay_fs', 'slog_replay_volume']
[tests/functional/snapshot] [tests/functional/snapshot]
tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',

View File

@ -36,6 +36,7 @@ export SYSTEM_FILES='arp
egrep egrep
exportfs exportfs
expr expr
fallocate
false false
fdisk fdisk
file file

View File

@ -140,7 +140,7 @@ function ismounted
[[ "$1" == "$dir" || "$1" == "$name" ]] && return 0 [[ "$1" == "$dir" || "$1" == "$name" ]] && return 0
;; ;;
ext2) ext*)
out=$(df -t $fstype $1 2>/dev/null) out=$(df -t $fstype $1 2>/dev/null)
return $? return $?
;; ;;

View File

@ -33,7 +33,7 @@
. $STF_SUITE/tests/functional/cli_root/zfs_copies/zfs_copies.cfg . $STF_SUITE/tests/functional/cli_root/zfs_copies/zfs_copies.cfg
# #
# umount the ufs|ext2 fs if there is timedout in the ufs|ext2 test # umount the ufs|ext fs if there is timedout in the ufs|ext test
# #
if ismounted $FS_MNTPOINT $NEWFS_DEFAULT_FS ; then if ismounted $FS_MNTPOINT $NEWFS_DEFAULT_FS ; then

View File

@ -73,7 +73,7 @@ function setup_snap_env
[[ $type == 'volume' ]]; then [[ $type == 'volume' ]]; then
# #
# At the first time, Make a UFS file system in volume and # At the first time, Make a UFS file system in volume and
# mount it. Otherwise, only check if this ufs|ext2 file system # mount it. Otherwise, only check if this ufs|ext file system
# was mounted. # was mounted.
# #
log_must eval "echo "y" | \ log_must eval "echo "y" | \
@ -81,8 +81,8 @@ function setup_snap_env
[[ ! -d $TESTDIR1 ]] && log_must mkdir $TESTDIR1 [[ ! -d $TESTDIR1 ]] && log_must mkdir $TESTDIR1
# Make sure the ufs|ext2 filesystem hasn't been mounted, # Make sure the ufs|ext filesystem hasn't been mounted,
# then mount the new ufs|ext2 filesystem. # then mount the new ufs|ext filesystem.
if ! ismounted $TESTDIR1 $NEWFS_DEFAULT_FS; then if ! ismounted $TESTDIR1 $NEWFS_DEFAULT_FS; then
log_must mount \ log_must mount \
$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL $TESTDIR1 $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL $TESTDIR1

View File

@ -53,11 +53,11 @@ function create_pool_test
} }
# #
# Create a ufs|ext2 file system and make a file within the file # Create a ufs|ext file system and make a file within the file
# system for storage pool vdev # system for storage pool vdev
# $1, file size # $1, file size
# $2, file name # $2, file name
# $3, disk name to create ufs|ext2 file system # $3, disk name to create ufs|ext file system
# #
function create_blockfile function create_blockfile
{ {
@ -83,7 +83,7 @@ function create_blockfile
} }
# #
# Umount the ext2|ufs filesystem and remove the mountpoint # Umount the ufs|ext filesystem and remove the mountpoint
# $1, the mount point # $1, the mount point
# #
function clean_blockfile function clean_blockfile

View File

@ -18,4 +18,5 @@ dist_pkgdata_SCRIPTS = \
slog_012_neg.ksh \ slog_012_neg.ksh \
slog_013_pos.ksh \ slog_013_pos.ksh \
slog_014_pos.ksh \ slog_014_pos.ksh \
slog_015_pos.ksh slog_replay_fs.ksh \
slog_replay_volume.ksh

View File

@ -45,6 +45,6 @@ if [[ -d $VDEV2 ]]; then
log_must rm -rf $VDIR2 log_must rm -rf $VDIR2
fi fi
log_must mkdir -p $VDIR $VDIR2 log_must mkdir -p $VDIR $VDIR2
log_must mkfile $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2
log_pass log_pass

View File

@ -48,18 +48,24 @@
# 1. Create an empty file system (TESTFS) # 1. Create an empty file system (TESTFS)
# 2. Freeze TESTFS # 2. Freeze TESTFS
# 3. Run various user commands that create files, directories and ACLs # 3. Run various user commands that create files, directories and ACLs
# 4. Copy TESTFS to temporary location (TESTDIR) # 4. Copy TESTFS to temporary location (TESTDIR/copy)
# 5. Unmount filesystem # 5. Unmount filesystem
# <at this stage TESTFS is empty again and unfrozen, and the # <at this stage TESTFS is empty again and unfrozen, and the
# intent log contains a complete set of deltas to replay it> # intent log contains a complete set of deltas to replay it>
# 6. Remount TESTFS <which replays the intent log> # 6. Remount TESTFS <which replays the intent log>
# 7. Compare TESTFS against the TESTDIR copy # 7. Compare TESTFS against the TESTDIR/copy
# #
verify_runnable "global" verify_runnable "global"
function cleanup_fs
{
rm -f $TESTDIR/checksum
cleanup
}
log_assert "Replay of intent log succeeds." log_assert "Replay of intent log succeeds."
log_onexit cleanup log_onexit cleanup_fs
# #
# 1. Create an empty file system (TESTFS) # 1. Create an empty file system (TESTFS)
@ -67,7 +73,6 @@ log_onexit cleanup
log_must zpool create $TESTPOOL $VDEV log mirror $LDEV log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
log_must zfs set compression=on $TESTPOOL log_must zfs set compression=on $TESTPOOL
log_must zfs create $TESTPOOL/$TESTFS log_must zfs create $TESTPOOL/$TESTFS
log_must mkdir -p $TESTDIR
# #
# This dd command works around an issue where ZIL records aren't created # This dd command works around an issue where ZIL records aren't created
@ -107,8 +112,9 @@ log_must mkdir /$TESTPOOL/$TESTFS/dir_to_delete
log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete
# Create a simple validation payload # Create a simple validation payload
log_must mkdir -p $TESTDIR
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8 log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8
CHECKSUM_BEFORE=$(sha256sum -b /$TESTPOOL/$TESTFS/payload) log_must eval "sha256sum -b /$TESTPOOL/$TESTFS/payload >$TESTDIR/checksum"
# TX_WRITE (small file with ordering) # TX_WRITE (small file with ordering)
log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file
@ -132,7 +138,7 @@ log_must truncate -s 0 /$TESTPOOL/$TESTFS/truncated_file
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \ log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \
bs=128k count=64 oflag=sync bs=128k count=64 oflag=sync
# Write zeroes, which compresss to holes, in the middle of a file # Write zeros, which compress to holes, in the middle of a file
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=8 log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=8
log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=2 log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=2
@ -155,15 +161,16 @@ log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file
# #
# 4. Copy TESTFS to temporary location (TESTDIR) # 4. Copy TESTFS to temporary location (TESTDIR/copy)
# #
log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR log_must mkdir -p $TESTDIR/copy
log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/
# #
# 5. Unmount filesystem and export the pool # 5. Unmount filesystem and export the pool
# #
# At this stage TESTFS is empty again and unfrozen, and the # At this stage TESTFS is empty again and frozen, the intent log contains
# intent log contains a complete set of deltas to replay it. # a complete set of deltas to replay.
# #
log_must zfs unmount /$TESTPOOL/$TESTFS log_must zfs unmount /$TESTPOOL/$TESTFS
@ -181,7 +188,7 @@ log_must zpool export $TESTPOOL
log_must zpool import -f -d $VDIR $TESTPOOL log_must zpool import -f -d $VDIR $TESTPOOL
# #
# 7. Compare TESTFS against the TESTDIR copy # 7. Compare TESTFS against the TESTDIR/copy
# #
log_note "Verify current block usage:" log_note "Verify current block usage:"
log_must zdb -bcv $TESTPOOL log_must zdb -bcv $TESTPOOL
@ -191,11 +198,9 @@ log_must attr -l /$TESTPOOL/$TESTFS/xattr.dir
log_must attr -l /$TESTPOOL/$TESTFS/xattr.file log_must attr -l /$TESTPOOL/$TESTFS/xattr.file
log_note "Verify working set diff:" log_note "Verify working set diff:"
log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR >/dev/null || \ log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy
diff -r /$TESTPOOL/$TESTFS $TESTDIR
log_note "Verify file checksum:" log_note "Verify file checksum:"
log_note "$CHECKSUM_BEFORE" log_must sha256sum -c $TESTDIR/checksum
log_must echo "$CHECKSUM_BEFORE" | sha256sum -c
log_pass "Replay of intent log succeeds." log_pass "Replay of intent log succeeds."

View File

@ -0,0 +1,164 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
# Use is subject to license terms.
#
. $STF_SUITE/tests/functional/slog/slog.kshlib
#
# DESCRIPTION:
# Verify slogs are replayed correctly for a volume.
#
# The general idea is to build up an intent log from a bunch of
# diverse user commands without actually committing them to the
# file system. Then generate checksums for files and volume,
# replay the intent log and verify the checksums.
#
# To enable this automated testing of the intent log some minimal
# support is required of the file system. In particular, a
# "freeze" command is required to flush the in-flight transactions;
# to stop the actual committing of transactions; and to ensure no
# deltas are discarded. All deltas past a freeze point are kept
# for replay and comparison later. Here is the flow:
#
# STRATEGY:
# 1. Create an empty volume (TESTVOL), set sync=always, and format
# it with an ext4 filesystem and mount it.
# 2. Freeze TESTVOL.
# 3. Create log records of various types to verify replay.
# 4. Generate checksums for all ext4 files.
# 5. Unmount filesystem and export the pool
# <at this stage TESTVOL is empty again and unfrozen, and the
# intent log contains a complete set of deltas to replay it>
# 6. Import TESTVOL <which replays the intent log> and mount it.
# 7. Verify the stored checksums
#
verify_runnable "global"
VOLUME=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
MNTPNT=$TESTDIR/$TESTVOL
function cleanup_volume
{
if ismounted $MNTPNT ext4; then
log_must umount $MNTPNT
rmdir $MNTPNT
fi
rm -f $TESTDIR/checksum.files
cleanup
}
log_assert "Replay of intent log succeeds."
log_onexit cleanup_volume
#
# 1. Create an empty volume (TESTVOL), set sync=always, and format
# it with an ext4 filesystem and mount it.
#
log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
log_must zfs create -V 128M $TESTPOOL/$TESTVOL
log_must zfs set compression=on $TESTPOOL/$TESTVOL
log_must zfs set sync=always $TESTPOOL/$TESTVOL
log_must mkdir -p $TESTDIR
log_must block_device_wait
echo "y" | newfs -t ext4 -v $VOLUME
log_must mkdir -p $MNTPNT
log_must mount -o discard $VOLUME $MNTPNT
log_must rmdir $MNTPNT/lost+found
log_must zpool sync
#
# 2. Freeze TESTVOL
#
log_must zpool freeze $TESTPOOL
#
# 3. Create log records of various types to verify replay.
#
# TX_WRITE
log_must dd if=/dev/urandom of=$MNTPNT/latency-8k bs=8k count=1 oflag=sync
log_must dd if=/dev/urandom of=$MNTPNT/latency-128k bs=128k count=1 oflag=sync
# TX_WRITE (WR_INDIRECT)
log_must zfs set logbias=throughput $TESTPOOL/$TESTVOL
log_must dd if=/dev/urandom of=$MNTPNT/throughput-8k bs=8k count=1
log_must dd if=/dev/urandom of=$MNTPNT/throughput-128k bs=128k count=1
# TX_WRITE (holes)
log_must dd if=/dev/urandom of=$MNTPNT/holes bs=128k count=8
log_must dd if=/dev/zero of=$MNTPNT/holes bs=128k count=2 seek=2 conv=notrunc
# TX_TRUNCATE
if fallocate --punch-hole 2>&1 | grep -q "unrecognized option"; then
log_note "fallocate(1) does not support --punch-hole"
else
log_must dd if=/dev/urandom of=$MNTPNT/discard bs=128k count=16
log_must fallocate --punch-hole -l 128K -o 512K $MNTPNT/discard
log_must fallocate --punch-hole -l 512K -o 1M $MNTPNT/discard
fi
#
# 4. Generate checksums for all ext4 files.
#
log_must sha256sum -b $MNTPNT/* >$TESTDIR/checksum
#
# 5. Unmount filesystem and export the pool
#
# At this stage TESTVOL is initialized with the random data and frozen,
# the intent log contains a complete set of deltas to replay.
#
log_must umount $MNTPNT
log_note "Verify transactions to replay:"
log_must zdb -iv $TESTPOOL/$TESTVOL
log_must zpool export $TESTPOOL
#
# 6. Import TESTPOOL, the intent log is replayed during minor creation.
#
# Import the pool to unfreeze it and claim log blocks. It has to be
# `zpool import -f` because we can't write a frozen pool's labels!
#
log_must zpool import -f -d $VDIR $TESTPOOL
log_must block_device_wait
log_must mount $VOLUME $MNTPNT
#
# 7. Verify the stored checksums
#
log_note "Verify current block usage:"
log_must zdb -bcv $TESTPOOL
log_note "Verify checksums"
log_must sha256sum -c $TESTDIR/checksum
log_pass "Replay of intent log succeeds."

View File

@ -33,23 +33,21 @@
# #
# DESCRIPTION: # DESCRIPTION:
# #
# Creating files on ufs|ext2 and tmpfs, and copying those files to ZFS with # Creating files on ufs|ext and tmpfs, and copying those files to ZFS with
# appropriate cp flags, the xattrs will still be readable. # appropriate cp flags, the xattrs will still be readable.
# #
# STRATEGY: # STRATEGY:
# 1. Create files in ufs|ext2 and tmpfs with xattrs # 1. Create files in ufs|ext and tmpfs with xattrs
# 2. Copy those files to zfs # 2. Copy those files to zfs
# 3. Ensure the xattrs can be read and written # 3. Ensure the xattrs can be read and written
# 4. Do the same in reverse. # 4. Do the same in reverse.
# #
# we need to be able to create zvols to hold our test # we need to be able to create zvols to hold our test ufs|ext filesystem.
# ufs|ext2 filesystem.
verify_runnable "global" verify_runnable "global"
# Make sure we clean up properly # Make sure we clean up properly
function cleanup { function cleanup {
if ismounted /tmp/$NEWFS_DEFAULT_FS.$$ $NEWFS_DEFAULT_FS; then if ismounted /tmp/$NEWFS_DEFAULT_FS.$$ $NEWFS_DEFAULT_FS; then
log_must umount /tmp/$NEWFS_DEFAULT_FS.$$ log_must umount /tmp/$NEWFS_DEFAULT_FS.$$
log_must rm -rf /tmp/$NEWFS_DEFAULT_FS.$$ log_must rm -rf /tmp/$NEWFS_DEFAULT_FS.$$
@ -59,7 +57,7 @@ function cleanup {
log_assert "Files from $NEWFS_DEFAULT_FS,tmpfs with xattrs copied to zfs retain xattr info." log_assert "Files from $NEWFS_DEFAULT_FS,tmpfs with xattrs copied to zfs retain xattr info."
log_onexit cleanup log_onexit cleanup
# Create a UFS|EXT2 file system that we can work in # Create a ufs|ext file system that we can work in
log_must zfs create -V128m $TESTPOOL/$TESTFS/zvol log_must zfs create -V128m $TESTPOOL/$TESTFS/zvol
block_device_wait block_device_wait
log_must eval "echo y | newfs $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol > /dev/null 2>&1" log_must eval "echo y | newfs $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol > /dev/null 2>&1"
@ -69,8 +67,8 @@ if is_linux; then
log_must mount -o user_xattr \ log_must mount -o user_xattr \
$ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol /tmp/$NEWFS_DEFAULT_FS.$$ $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol /tmp/$NEWFS_DEFAULT_FS.$$
# Create files in ext2 and tmpfs, and set some xattrs on them. # Create files in ext and tmpfs, and set some xattrs on them.
# Use small values for xattrs for ext2 compatibility. # Use small values for xattrs for ext compatibility.
log_must touch /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ log_must touch /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$
log_must touch /tmp/tmpfs-file.$$ log_must touch /tmp/tmpfs-file.$$