OpenZFS 9689 - zfs range lock code should not be zpl-specific

The ZFS range locking code in zfs_rlock.c/h depends on ZPL-specific
data structures, specifically znode_t.  However, it's also used by
the ZVOL code, which uses a "dummy" znode_t to pass to the range
locking code.

We should clean this up so that the range locking code is generic
and can be used equally by ZPL and ZVOL, and also can be used by
future consumers that may need to run in userland (libzpool) as
well as the kernel.

Porting notes:
* Added missing sys/avl.h include to sys/zfs_rlock.h.
* Removed 'dbuf is within the locked range' ASSERTs from dmu_sync().
  This was needed because ztest does not yet use a locked_range_t.
* Removed "Approved by:" tag requirement from OpenZFS commit
  check to prevent needless warnings when integrating changes
  which has not been merged to illumos.
* Reverted free_list range lock changes which were originally
  needed to defer the cv_destroy() which was called immediately
  after cv_broadcast().  With d2733258 this should be safe but
  if not we may need to reintroduce this logic.
* Reverts: The following two commits were reverted and squashed in
  to this change in order to make it easier to apply OpenZFS 9689.
  - d88895a0, which removed the dummy znode from zvol_state
  - e3a07cd0, which updated ztest to use range locks
* Preserved optimized rangelock comparison function.  Preserved the
  rangelock free list.  The cv_destroy() function will block waiting
  for all processes in cv_wait() to be scheduled and drop their
  reference.  This is done to ensure it's safe to free the condition
  variable.  However, blocking while holding the rl->rl_lock mutex
  can result in a deadlock on Linux.  A free list is introduced to
  defer the cv_destroy() and kmem_free() until after the mutex is
  released.

Authored by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>

OpenZFS-issue: https://illumos.org/issues/9689
OpenZFS-commit: https://github.com/openzfs/openzfs/pull/680
External-issue: DLPX-58662
Closes #7980
This commit is contained in:
Matt Ahrens
2018-10-01 15:13:12 -07:00
committed by Brian Behlendorf
parent 50a343d85c
commit 5d43cc9a59
10 changed files with 484 additions and 595 deletions
+49 -22
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -91,6 +91,37 @@ static kmem_cache_t *znode_cache = NULL;
static kmem_cache_t *znode_hold_cache = NULL;
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
/*
* This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
* z_rangelock. It will modify the offset and length of the lock to reflect
* znode-specific information, and convert RL_APPEND to RL_WRITER. This is
* called with the rangelock_t's rl_lock held, which avoids races.
*/
static void
zfs_rangelock_cb(locked_range_t *new, void *arg)
{
znode_t *zp = arg;
/*
* If in append mode, convert to writer and lock starting at the
* current end of file.
*/
if (new->lr_type == RL_APPEND) {
new->lr_offset = zp->z_size;
new->lr_type = RL_WRITER;
}
/*
* If we need to grow the block size then lock the whole file range.
*/
uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
new->lr_offset = 0;
new->lr_length = UINT64_MAX;
}
}
/*ARGSUSED*/
static int
zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
@@ -106,7 +137,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
zfs_rlock_init(&zp->z_range_lock);
rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
zp->z_dirlocks = NULL;
zp->z_acl_cached = NULL;
@@ -128,7 +159,7 @@ zfs_znode_cache_destructor(void *buf, void *arg)
rw_destroy(&zp->z_name_lock);
mutex_destroy(&zp->z_acl_lock);
rw_destroy(&zp->z_xattr_lock);
zfs_rlock_destroy(&zp->z_range_lock);
rangelock_fini(&zp->z_rangelock);
ASSERT(zp->z_dirlocks == NULL);
ASSERT(zp->z_acl_cached == NULL);
@@ -577,9 +608,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_FALSE;
zp->z_is_stale = B_FALSE;
zp->z_range_lock.zr_size = &zp->z_size;
zp->z_range_lock.zr_blksz = &zp->z_blksz;
zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz;
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
@@ -1475,20 +1503,20 @@ zfs_extend(znode_t *zp, uint64_t end)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_tx_t *tx;
rl_t *rl;
locked_range_t *lr;
uint64_t newblksz;
int error;
/*
* We will change zp_size, lock the whole file.
*/
rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
/*
* Nothing to do if file already at desired length.
*/
if (end <= zp->z_size) {
zfs_range_unlock(rl);
rangelock_exit(lr);
return (0);
}
tx = dmu_tx_create(zfsvfs->z_os);
@@ -1518,7 +1546,7 @@ zfs_extend(znode_t *zp, uint64_t end)
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
zfs_range_unlock(rl);
rangelock_exit(lr);
return (error);
}
@@ -1530,7 +1558,7 @@ zfs_extend(znode_t *zp, uint64_t end)
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
&zp->z_size, sizeof (zp->z_size), tx));
zfs_range_unlock(rl);
rangelock_exit(lr);
dmu_tx_commit(tx);
@@ -1593,19 +1621,19 @@ static int
zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
rl_t *rl;
locked_range_t *lr;
int error;
/*
* Lock the range being freed.
*/
rl = zfs_range_lock(&zp->z_range_lock, off, len, RL_WRITER);
lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
/*
* Nothing to do if file already at desired length.
*/
if (off >= zp->z_size) {
zfs_range_unlock(rl);
rangelock_exit(lr);
return (0);
}
@@ -1655,7 +1683,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
page_len);
}
}
zfs_range_unlock(rl);
rangelock_exit(lr);
return (error);
}
@@ -1673,7 +1701,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_tx_t *tx;
rl_t *rl;
locked_range_t *lr;
int error;
sa_bulk_attr_t bulk[2];
int count = 0;
@@ -1681,20 +1709,20 @@ zfs_trunc(znode_t *zp, uint64_t end)
/*
* We will change zp_size, lock the whole file.
*/
rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
/*
* Nothing to do if file already at desired length.
*/
if (end >= zp->z_size) {
zfs_range_unlock(rl);
rangelock_exit(lr);
return (0);
}
error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
DMU_OBJECT_END);
if (error) {
zfs_range_unlock(rl);
rangelock_exit(lr);
return (error);
}
tx = dmu_tx_create(zfsvfs->z_os);
@@ -1704,7 +1732,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
zfs_range_unlock(rl);
rangelock_exit(lr);
return (error);
}
@@ -1720,8 +1748,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
dmu_tx_commit(tx);
zfs_range_unlock(rl);
rangelock_exit(lr);
return (0);
}