mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-17 18:11:00 +03:00
Fix range locking in ZIL commit codepath
Since OpenZFS 7578 (1b7c1e5
) if we have a ZVOL with logbias=throughput
we will force WR_INDIRECT itxs in zvol_log_write() setting itx->itx_lr
offset and length to the offset and length of the BIO from
zvol_write()->zvol_log_write(): these offset and length are later used
to take a range lock in zillog->zl_get_data function: zvol_get_data().
Now suppose we have a ZVOL with blocksize=8K and push 4K writes to
offset 0: we will only be range-locking 0-4096. This means the
ASSERTion we make in dbuf_unoverride() is no longer valid because now
dmu_sync() is called from zilog's get_data functions holding a partial
lock on the dbuf.
Fix this by taking a range lock on the whole block in zvol_get_data().
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #6238
Closes #6315
Closes #6356
Closes #6477
This commit is contained in:
parent
3468fdbd34
commit
ae5b4a05ff
@ -2145,6 +2145,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||||||
if (buf != NULL) { /* immediate write */
|
if (buf != NULL) { /* immediate write */
|
||||||
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
|
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
|
||||||
RL_READER);
|
RL_READER);
|
||||||
|
zgd->zgd_rl = zgd_private->z_rl->z_rl;
|
||||||
|
|
||||||
error = dmu_read(os, object, offset, size, buf,
|
error = dmu_read(os, object, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
@ -2160,6 +2161,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||||||
|
|
||||||
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
|
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
|
||||||
RL_READER);
|
RL_READER);
|
||||||
|
zgd->zgd_rl = zgd_private->z_rl->z_rl;
|
||||||
|
|
||||||
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
|
@ -49,6 +49,7 @@
|
|||||||
#include <sys/zfeature.h>
|
#include <sys/zfeature.h>
|
||||||
#include <sys/abd.h>
|
#include <sys/abd.h>
|
||||||
#include <sys/trace_dmu.h>
|
#include <sys/trace_dmu.h>
|
||||||
|
#include <sys/zfs_rlock.h>
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#include <sys/vmsystm.h>
|
#include <sys/vmsystm.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
@ -1715,6 +1716,11 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
|
|||||||
ASSERT(pio != NULL);
|
ASSERT(pio != NULL);
|
||||||
ASSERT(txg != 0);
|
ASSERT(txg != 0);
|
||||||
|
|
||||||
|
/* dbuf is within the locked range */
|
||||||
|
ASSERT3U(db->db.db_offset, >=, zgd->zgd_rl->r_off);
|
||||||
|
ASSERT3U(db->db.db_offset + db->db.db_size, <=,
|
||||||
|
zgd->zgd_rl->r_off + zgd->zgd_rl->r_len);
|
||||||
|
|
||||||
SET_BOOKMARK(&zb, ds->ds_object,
|
SET_BOOKMARK(&zb, ds->ds_object,
|
||||||
db->db.db_object, db->db_level, db->db_blkid);
|
db->db.db_object, db->db_level, db->db_blkid);
|
||||||
|
|
||||||
|
@ -1050,7 +1050,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||||||
} else { /* indirect write */
|
} else { /* indirect write */
|
||||||
/*
|
/*
|
||||||
* Have to lock the whole block to ensure when it's
|
* Have to lock the whole block to ensure when it's
|
||||||
* written out and it's checksum is being calculated
|
* written out and its checksum is being calculated
|
||||||
* that no one can change the data. We need to re-check
|
* that no one can change the data. We need to re-check
|
||||||
* blocksize after we get the lock in case it's changed!
|
* blocksize after we get the lock in case it's changed!
|
||||||
*/
|
*/
|
||||||
|
@ -821,6 +821,7 @@ zvol_discard(void *arg)
|
|||||||
uint64_t start = BIO_BI_SECTOR(bio) << 9;
|
uint64_t start = BIO_BI_SECTOR(bio) << 9;
|
||||||
uint64_t size = BIO_BI_SIZE(bio);
|
uint64_t size = BIO_BI_SIZE(bio);
|
||||||
uint64_t end = start + size;
|
uint64_t end = start + size;
|
||||||
|
boolean_t sync;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
unsigned long start_jif;
|
unsigned long start_jif;
|
||||||
@ -830,9 +831,11 @@ zvol_discard(void *arg)
|
|||||||
start_jif = jiffies;
|
start_jif = jiffies;
|
||||||
generic_start_io_acct(WRITE, bio_sectors(bio), &zv->zv_disk->part0);
|
generic_start_io_acct(WRITE, bio_sectors(bio), &zv->zv_disk->part0);
|
||||||
|
|
||||||
|
sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||||
|
|
||||||
if (end > zv->zv_volsize) {
|
if (end > zv->zv_volsize) {
|
||||||
error = SET_ERROR(EIO);
|
error = SET_ERROR(EIO);
|
||||||
goto out;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -848,7 +851,7 @@ zvol_discard(void *arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (start >= end)
|
if (start >= end)
|
||||||
goto out;
|
goto unlock;
|
||||||
|
|
||||||
tx = dmu_tx_create(zv->zv_objset);
|
tx = dmu_tx_create(zv->zv_objset);
|
||||||
dmu_tx_mark_netfree(tx);
|
dmu_tx_mark_netfree(tx);
|
||||||
@ -861,9 +864,11 @@ zvol_discard(void *arg)
|
|||||||
error = dmu_free_long_range(zv->zv_objset,
|
error = dmu_free_long_range(zv->zv_objset,
|
||||||
ZVOL_OBJ, start, size);
|
ZVOL_OBJ, start, size);
|
||||||
}
|
}
|
||||||
|
unlock:
|
||||||
out:
|
|
||||||
zfs_range_unlock(zvr->rl);
|
zfs_range_unlock(zvr->rl);
|
||||||
|
if (error == 0 && sync)
|
||||||
|
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||||
|
|
||||||
rw_exit(&zv->zv_suspend_lock);
|
rw_exit(&zv->zv_suspend_lock);
|
||||||
generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif);
|
generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif);
|
||||||
BIO_END_IO(bio, -error);
|
BIO_END_IO(bio, -error);
|
||||||
@ -933,6 +938,8 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rw == WRITE) {
|
if (rw == WRITE) {
|
||||||
|
boolean_t need_sync = B_FALSE;
|
||||||
|
|
||||||
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||||
BIO_END_IO(bio, -SET_ERROR(EROFS));
|
BIO_END_IO(bio, -SET_ERROR(EROFS));
|
||||||
goto out;
|
goto out;
|
||||||
@ -966,13 +973,24 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
|||||||
*/
|
*/
|
||||||
zvr->rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
|
zvr->rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
|
||||||
RL_WRITER);
|
RL_WRITER);
|
||||||
|
/*
|
||||||
|
* Sync writes and discards execute zil_commit() which may need
|
||||||
|
* to take a RL_READER lock on the whole block being modified
|
||||||
|
* via its zillog->zl_get_data(): to avoid circular dependency
|
||||||
|
* issues with taskq threads execute these requests
|
||||||
|
* synchronously here in zvol_request().
|
||||||
|
*/
|
||||||
|
need_sync = bio_is_fua(bio) ||
|
||||||
|
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||||
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
|
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
|
||||||
if (zvol_request_sync || taskq_dispatch(zvol_taskq,
|
if (zvol_request_sync || need_sync ||
|
||||||
zvol_discard, zvr, TQ_SLEEP) == TASKQID_INVALID)
|
taskq_dispatch(zvol_taskq, zvol_discard, zvr,
|
||||||
|
TQ_SLEEP) == TASKQID_INVALID)
|
||||||
zvol_discard(zvr);
|
zvol_discard(zvr);
|
||||||
} else {
|
} else {
|
||||||
if (zvol_request_sync || taskq_dispatch(zvol_taskq,
|
if (zvol_request_sync || need_sync ||
|
||||||
zvol_write, zvr, TQ_SLEEP) == TASKQID_INVALID)
|
taskq_dispatch(zvol_taskq, zvol_write, zvr,
|
||||||
|
TQ_SLEEP) == TASKQID_INVALID)
|
||||||
zvol_write(zvr);
|
zvol_write(zvr);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1030,8 +1048,6 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||||||
|
|
||||||
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
||||||
zgd->zgd_zilog = zv->zv_zilog;
|
zgd->zgd_zilog = zv->zv_zilog;
|
||||||
zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
|
|
||||||
RL_READER);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write records come in two flavors: immediate and indirect.
|
* Write records come in two flavors: immediate and indirect.
|
||||||
@ -1041,11 +1057,21 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||||||
* we don't have to write the data twice.
|
* we don't have to write the data twice.
|
||||||
*/
|
*/
|
||||||
if (buf != NULL) { /* immediate write */
|
if (buf != NULL) { /* immediate write */
|
||||||
|
zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
|
||||||
|
RL_READER);
|
||||||
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
} else {
|
} else { /* indirect write */
|
||||||
|
/*
|
||||||
|
* Have to lock the whole block to ensure when it's written out
|
||||||
|
* and its checksum is being calculated that no one can change
|
||||||
|
* the data. Contrarily to zfs_get_data we need not re-check
|
||||||
|
* blocksize after we get the lock because it cannot be changed.
|
||||||
|
*/
|
||||||
size = zv->zv_volblocksize;
|
size = zv->zv_volblocksize;
|
||||||
offset = P2ALIGN_TYPED(offset, size, uint64_t);
|
offset = P2ALIGN_TYPED(offset, size, uint64_t);
|
||||||
|
zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
|
||||||
|
RL_READER);
|
||||||
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
|
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
|
@ -575,7 +575,7 @@ tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg']
|
|||||||
[tests/functional/zvol/zvol_misc]
|
[tests/functional/zvol/zvol_misc]
|
||||||
tests = ['zvol_misc_001_neg', 'zvol_misc_002_pos', 'zvol_misc_003_neg',
|
tests = ['zvol_misc_001_neg', 'zvol_misc_002_pos', 'zvol_misc_003_neg',
|
||||||
'zvol_misc_004_pos', 'zvol_misc_005_neg', 'zvol_misc_006_pos',
|
'zvol_misc_004_pos', 'zvol_misc_005_neg', 'zvol_misc_006_pos',
|
||||||
'zvol_misc_snapdev', 'zvol_misc_volmode']
|
'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
|
||||||
|
|
||||||
[tests/functional/zvol/zvol_swap]
|
[tests/functional/zvol/zvol_swap]
|
||||||
tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_003_pos',
|
tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_003_pos',
|
||||||
|
@ -10,4 +10,5 @@ dist_pkgdata_SCRIPTS = \
|
|||||||
zvol_misc_005_neg.ksh \
|
zvol_misc_005_neg.ksh \
|
||||||
zvol_misc_006_pos.ksh \
|
zvol_misc_006_pos.ksh \
|
||||||
zvol_misc_snapdev.ksh \
|
zvol_misc_snapdev.ksh \
|
||||||
zvol_misc_volmode.ksh
|
zvol_misc_volmode.ksh \
|
||||||
|
zvol_misc_zil.ksh
|
||||||
|
74
tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_zil.ksh
Executable file
74
tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_zil.ksh
Executable file
@ -0,0 +1,74 @@
|
|||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or http://www.opensolaris.org/os/licensing.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Verify ZIL functionality on ZVOLs
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Create a ZVOLs with various combination of "logbias" and "sync" values
|
||||||
|
# 2. Write data to ZVOL device node
|
||||||
|
# 3. Verify we don't trigger any issue like the one reported in #6238
|
||||||
|
#
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
datasetexists $ZVOL && log_must_busy zfs destroy $ZVOL
|
||||||
|
udev_wait
|
||||||
|
}
|
||||||
|
|
||||||
|
log_assert "Verify ZIL functionality on ZVOLs"
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
ZVOL="$TESTPOOL/vol"
|
||||||
|
ZDEV="$ZVOL_DEVDIR/$ZVOL"
|
||||||
|
typeset -a logbias_prop_vals=('latency' 'throughput')
|
||||||
|
typeset -a sync_prop_vals=('standard' 'always' 'disabled')
|
||||||
|
|
||||||
|
for logbias in ${logbias_prop_vals[@]}; do
|
||||||
|
for sync in ${sync_prop_vals[@]}; do
|
||||||
|
# 1. Create a ZVOL with logbias=throughput and sync=always
|
||||||
|
log_must zfs create -V $VOLSIZE -b 128K -o sync=$sync \
|
||||||
|
-o logbias=$logbias $ZVOL
|
||||||
|
|
||||||
|
# 2. Write data to its device node
|
||||||
|
for i in {1..50}; do
|
||||||
|
dd if=/dev/zero of=$ZDEV bs=8k count=1 &
|
||||||
|
done
|
||||||
|
|
||||||
|
# 3. Verify we don't trigger any issue
|
||||||
|
log_must wait
|
||||||
|
log_must_busy zfs destroy $ZVOL
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
log_pass "ZIL functionality works on ZVOLs"
|
Loading…
Reference in New Issue
Block a user