Log zvol truncate/discard operations

As the comments in zvol_discard() suggested, the discard operation
could be logged to the zil.  This is a port of the relevant code from
Nexenta as it was added in "701 UNMAP support for COMSTAR" and has been
attributed to the author of that commit.

References:
  https://github.com/Nexenta/illumos-nexenta/commit/b77b923
  https://github.com/zfsonlinux/zfs/blob/089fa91b/module/zfs/zvol.c#L637

Ported-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Dan McDonald 2015-08-02 08:01:14 -05:00 committed by Brian Behlendorf
parent d93b45aefc
commit 460a021391

View File

@ -33,6 +33,8 @@
* *
* Volumes are persistent through reboot and module load. No user command * Volumes are persistent through reboot and module load. No user command
* needs to be run before opening and using a device. * needs to be run before opening and using a device.
*
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/ */
#include <sys/dbuf.h> #include <sys/dbuf.h>
@ -42,6 +44,7 @@
#include <sys/zap.h> #include <sys/zap.h>
#include <sys/zfeature.h> #include <sys/zfeature.h>
#include <sys/zil_impl.h> #include <sys/zil_impl.h>
#include <sys/dmu_tx.h>
#include <sys/zio.h> #include <sys/zio.h>
#include <sys/zfs_rlock.h> #include <sys/zfs_rlock.h>
#include <sys/zfs_znode.h> #include <sys/zfs_znode.h>
@ -446,6 +449,24 @@ out:
return (SET_ERROR(error)); return (SET_ERROR(error));
} }
/*
* Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
* implement DKIOCFREE/free-long-range.
*/
static int
zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
{
uint64_t offset, length;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
offset = lr->lr_offset;
length = lr->lr_length;
return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
}
/* /*
* Replay a TX_WRITE ZIL transaction that didn't get committed * Replay a TX_WRITE ZIL transaction that didn't get committed
* after a system failure * after a system failure
@ -484,7 +505,7 @@ zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
/* /*
* Callback vectors for replaying records. * Callback vectors for replaying records.
* Only TX_WRITE is needed for zvol. * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
*/ */
zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = { zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = {
(zil_replay_func_t)zvol_replay_err, /* no such transaction type */ (zil_replay_func_t)zvol_replay_err, /* no such transaction type */
@ -497,7 +518,7 @@ zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = {
(zil_replay_func_t)zvol_replay_err, /* TX_LINK */ (zil_replay_func_t)zvol_replay_err, /* TX_LINK */
(zil_replay_func_t)zvol_replay_err, /* TX_RENAME */ (zil_replay_func_t)zvol_replay_err, /* TX_RENAME */
(zil_replay_func_t)zvol_replay_write, /* TX_WRITE */ (zil_replay_func_t)zvol_replay_write, /* TX_WRITE */
(zil_replay_func_t)zvol_replay_err, /* TX_TRUNCATE */ (zil_replay_func_t)zvol_replay_truncate, /* TX_TRUNCATE */
(zil_replay_func_t)zvol_replay_err, /* TX_SETATTR */ (zil_replay_func_t)zvol_replay_err, /* TX_SETATTR */
(zil_replay_func_t)zvol_replay_err, /* TX_ACL */ (zil_replay_func_t)zvol_replay_err, /* TX_ACL */
}; };
@ -636,6 +657,30 @@ out:
return (error); return (error);
} }
/*
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
*/
static void
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
boolean_t sync)
{
itx_t *itx;
lr_truncate_t *lr;
zilog_t *zilog = zv->zv_zilog;
if (zil_replaying(zilog, tx))
return;
itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
lr = (lr_truncate_t *)&itx->itx_lr;
lr->lr_foid = ZVOL_OBJ;
lr->lr_offset = off;
lr->lr_length = len;
itx->itx_sync = sync;
zil_itx_assign(zilog, itx, tx);
}
static int static int
zvol_discard(struct bio *bio) zvol_discard(struct bio *bio)
{ {
@ -645,6 +690,7 @@ zvol_discard(struct bio *bio)
uint64_t end = start + size; uint64_t end = start + size;
int error; int error;
rl_t *rl; rl_t *rl;
dmu_tx_t *tx;
if (end > zv->zv_volsize) if (end > zv->zv_volsize)
return (SET_ERROR(EIO)); return (SET_ERROR(EIO));
@ -669,12 +715,17 @@ zvol_discard(struct bio *bio)
return (0); return (0);
rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER); rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER);
tx = dmu_tx_create(zv->zv_objset);
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
/* if (error != 0) {
* TODO: maybe we should add the operation to the log. dmu_tx_abort(tx);
*/ } else {
zvol_log_truncate(zv, tx, start, size, B_TRUE);
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset,
ZVOL_OBJ, start, size);
}
zfs_range_unlock(rl); zfs_range_unlock(rl);