mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
OpenZFS 8997 - ztest assertion failure in zil_lwb_write_issue
PROBLEM
=======
When `dmu_tx_assign` is called from `zil_lwb_write_issue`, it's possible
for either `ERESTART` or `EIO` to be returned.
If `ERESTART` is returned, this will cause an assertion to fail directly
in `zil_lwb_write_issue`, where the code assumes the return value is
`EIO` if `dmu_tx_assign` returns a non-zero value. This can occur if the
SPA is suspended when `dmu_tx_assign` is called, and most often occurs
when running `zloop`.
If `EIO` is returned, this can cause assertions to fail elsewhere in the
ZIL code. For example, `zil_commit_waiter_timeout` contains the
following logic:
lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
In this case, if `dmu_tx_assign` returned `EIO` from within
`zil_lwb_write_issue`, the `lwb` variable passed in will not be issued
to disk. Thus, it's `lwb_state` field will remain `LWB_STATE_OPENED` and
this assertion will fail. `zil_commit_waiter_timeout` assumes that after
it calls `zil_lwb_write_issue`, the `lwb` will be issued to disk, and
doesn't handle the case where this is not true; i.e. it doesn't handle
the case where `dmu_tx_assign` returns `EIO`.
SOLUTION
========
This change modifies the `dmu_tx_assign` function such that `txg_how` is
a bitmask, rather than of the `txg_how_t` enum type. Now, the previous
`TXG_WAITED` semantics can be used via `TXG_NOTHROTTLE`, along with
specifying either `TXG_NOWAIT` or `TXG_WAIT` semantics.
Previously, when `TXG_WAITED` was specified, `TXG_NOWAIT` semantics was
automatically invoked. This was not ideal when using `TXG_WAITED` within
`zil_lwb_write_issued`, leading the problem described above. Rather, we
want to achieve the semantics of `TXG_WAIT`, while also preventing the
`tx` from being penalized via the dirty delay throttling.
With this change, `zil_lwb_write_issued` can acheive the semtantics that
it requires by passing in the value `TXG_WAIT | TXG_NOTHROTTLE` to
`dmu_tx_assign`.
Further, consumers of `dmu_tx_assign` wishing to achieve the old
`TXG_WAITED` semantics can pass in the value `TXG_NOWAIT | TXG_NOTHROTTLE`.
Authored by: Prakash Surya <prakash.surya@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Andriy Gapon <avg@FreeBSD.org>
Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
Porting Notes:
- Additionally updated `zfs_tmpfile` to use `TXG_NOTHROTTLE`
OpenZFS-issue: https://www.illumos.org/issues/8997
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/19ea6cb0f9
Closes #7084
This commit is contained in:
committed by
Brian Behlendorf
parent
522db29275
commit
0735ecb334
+9
-6
@@ -245,11 +245,14 @@ typedef enum dmu_object_type {
|
||||
DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
|
||||
} dmu_object_type_t;
|
||||
|
||||
typedef enum txg_how {
|
||||
TXG_WAIT = 1,
|
||||
TXG_NOWAIT,
|
||||
TXG_WAITED,
|
||||
} txg_how_t;
|
||||
/*
|
||||
* These flags are intended to be used to specify the "txg_how"
|
||||
* parameter when calling the dmu_tx_assign() function. See the comment
|
||||
* above dmu_tx_assign() for more details on the meaning of these flags.
|
||||
*/
|
||||
#define TXG_NOWAIT (0ULL)
|
||||
#define TXG_WAIT (1ULL<<0)
|
||||
#define TXG_NOTHROTTLE (1ULL<<1)
|
||||
|
||||
void byteswap_uint64_array(void *buf, size_t size);
|
||||
void byteswap_uint32_array(void *buf, size_t size);
|
||||
@@ -729,7 +732,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
|
||||
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
void dmu_tx_mark_netfree(dmu_tx_t *tx);
|
||||
|
||||
@@ -67,9 +67,6 @@ struct dmu_tx {
|
||||
/* placeholder for syncing context, doesn't need specific holds */
|
||||
boolean_t tx_anyobj;
|
||||
|
||||
/* has this transaction already been delayed? */
|
||||
boolean_t tx_waited;
|
||||
|
||||
/* transaction is marked as being a "net free" of space */
|
||||
boolean_t tx_netfree;
|
||||
|
||||
@@ -79,6 +76,9 @@ struct dmu_tx {
|
||||
/* need to wait for sufficient dirty space */
|
||||
boolean_t tx_wait_dirty;
|
||||
|
||||
/* has this transaction already been delayed? */
|
||||
boolean_t tx_dirty_delayed;
|
||||
|
||||
int tx_err;
|
||||
};
|
||||
|
||||
@@ -138,7 +138,7 @@ extern dmu_tx_stats_t dmu_tx_stats;
|
||||
* These routines are defined in dmu.h, and are called by the user.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create(objset_t *dd);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
|
||||
Reference in New Issue
Block a user