mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
zfs_vnops: make zfs_get_data OS-independent
Move zfs_get_data() in to platform-independent code. The only platform-specific aspect of it is the way we release an inode (Linux) / vnode_t (FreeBSD). I am not aware of a platform that could be supported by ZFS that couldn't implement zfs_rele_async itself. It's sibling zvol_get_data already is platform-independent. Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Christian Schwarz <me@cschwarz.com> Closes #10979
This commit is contained in:
committed by
GitHub
parent
09eb36ce3d
commit
ab8c935ea6
@@ -381,12 +381,6 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop a reference on the passed inode asynchronously. This ensures
|
||||
* that the caller will never drop the last reference on an inode in
|
||||
* the current context. Doing so while holding open a tx could result
|
||||
* in a deadlock if iput_final() re-enters the filesystem code.
|
||||
*/
|
||||
void
|
||||
zfs_zrele_async(znode_t *zp)
|
||||
{
|
||||
@@ -403,159 +397,6 @@ zfs_zrele_async(znode_t *zp)
|
||||
zrele(zp);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
zfs_get_done(zgd_t *zgd, int error)
|
||||
{
|
||||
znode_t *zp = zgd->zgd_private;
|
||||
|
||||
if (zgd->zgd_db)
|
||||
dmu_buf_rele(zgd->zgd_db, zgd);
|
||||
|
||||
zfs_rangelock_exit(zgd->zgd_lr);
|
||||
|
||||
/*
|
||||
* Release the vnode asynchronously as we currently have the
|
||||
* txg stopped from syncing.
|
||||
*/
|
||||
zfs_zrele_async(zp);
|
||||
|
||||
kmem_free(zgd, sizeof (zgd_t));
|
||||
}
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
static int zil_fault_io = 0;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Get data to generate a TX_WRITE intent log record.
|
||||
*/
|
||||
int
|
||||
zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = arg;
|
||||
objset_t *os = zfsvfs->z_os;
|
||||
znode_t *zp;
|
||||
uint64_t object = lr->lr_foid;
|
||||
uint64_t offset = lr->lr_offset;
|
||||
uint64_t size = lr->lr_length;
|
||||
dmu_buf_t *db;
|
||||
zgd_t *zgd;
|
||||
int error = 0;
|
||||
|
||||
ASSERT3P(lwb, !=, NULL);
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
ASSERT3U(size, !=, 0);
|
||||
|
||||
/*
|
||||
* Nothing to do if the file has been removed
|
||||
*/
|
||||
if (zfs_zget(zfsvfs, object, &zp) != 0)
|
||||
return (SET_ERROR(ENOENT));
|
||||
if (zp->z_unlinked) {
|
||||
/*
|
||||
* Release the vnode asynchronously as we currently have the
|
||||
* txg stopped from syncing.
|
||||
*/
|
||||
zfs_zrele_async(zp);
|
||||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
|
||||
zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
||||
zgd->zgd_lwb = lwb;
|
||||
zgd->zgd_private = zp;
|
||||
|
||||
/*
|
||||
* Write records come in two flavors: immediate and indirect.
|
||||
* For small writes it's cheaper to store the data with the
|
||||
* log record (immediate); for large writes it's cheaper to
|
||||
* sync the data and get a pointer to it (indirect) so that
|
||||
* we don't have to write the data twice.
|
||||
*/
|
||||
if (buf != NULL) { /* immediate write */
|
||||
zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
|
||||
offset, size, RL_READER);
|
||||
/* test for truncation needs to be done while range locked */
|
||||
if (offset >= zp->z_size) {
|
||||
error = SET_ERROR(ENOENT);
|
||||
} else {
|
||||
error = dmu_read(os, object, offset, size, buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
}
|
||||
ASSERT(error == 0 || error == ENOENT);
|
||||
} else { /* indirect write */
|
||||
/*
|
||||
* Have to lock the whole block to ensure when it's
|
||||
* written out and its checksum is being calculated
|
||||
* that no one can change the data. We need to re-check
|
||||
* blocksize after we get the lock in case it's changed!
|
||||
*/
|
||||
for (;;) {
|
||||
uint64_t blkoff;
|
||||
size = zp->z_blksz;
|
||||
blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
|
||||
offset -= blkoff;
|
||||
zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
|
||||
offset, size, RL_READER);
|
||||
if (zp->z_blksz == size)
|
||||
break;
|
||||
offset += blkoff;
|
||||
zfs_rangelock_exit(zgd->zgd_lr);
|
||||
}
|
||||
/* test for truncation needs to be done while range locked */
|
||||
if (lr->lr_offset >= zp->z_size)
|
||||
error = SET_ERROR(ENOENT);
|
||||
#ifdef ZFS_DEBUG
|
||||
if (zil_fault_io) {
|
||||
error = SET_ERROR(EIO);
|
||||
zil_fault_io = 0;
|
||||
}
|
||||
#endif
|
||||
if (error == 0)
|
||||
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
|
||||
if (error == 0) {
|
||||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
|
||||
zgd->zgd_db = db;
|
||||
zgd->zgd_bp = bp;
|
||||
|
||||
ASSERT(db->db_offset == offset);
|
||||
ASSERT(db->db_size == size);
|
||||
|
||||
error = dmu_sync(zio, lr->lr_common.lrc_txg,
|
||||
zfs_get_done, zgd);
|
||||
ASSERT(error || lr->lr_length <= size);
|
||||
|
||||
/*
|
||||
* On success, we need to wait for the write I/O
|
||||
* initiated by dmu_sync() to complete before we can
|
||||
* release this dbuf. We will finish everything up
|
||||
* in the zfs_get_done() callback.
|
||||
*/
|
||||
if (error == 0)
|
||||
return (0);
|
||||
|
||||
if (error == EALREADY) {
|
||||
lr->lr_common.lrc_txtype = TX_WRITE2;
|
||||
/*
|
||||
* TX_WRITE2 relies on the data previously
|
||||
* written by the TX_WRITE that caused
|
||||
* EALREADY. We zero out the BP because
|
||||
* it is the old, currently-on-disk BP.
|
||||
*/
|
||||
zgd->zgd_bp = NULL;
|
||||
BP_ZERO(bp);
|
||||
error = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
zfs_get_done(zgd, error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup an entry in a directory, or an extended attribute directory.
|
||||
|
||||
Reference in New Issue
Block a user