mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-31 11:14:09 +03:00
OpenZFS 9425 - channel programs can be interrupted
Problem Statement ================= ZFS Channel program scripts currently require a timeout, so that hung or long-running scripts return a timeout error instead of causing ZFS to get wedged. This limit can currently be set up to 100 million Lua instructions. Even with a limit in place, it would be desirable to have a sys admin (support engineer) be able to cancel a script that is taking a long time. Proposed Solution ================= Make it possible to abort a channel program by sending an interrupt signal.In the underlying txg_wait_sync function, switch the cv_wait to a cv_wait_sig to catch the signal. Once a signal is encountered, the dsl_sync_task function can install a Lua hook that will get called before the Lua interpreter executes a new line of code. The dsl_sync_task can resume with a standard txg_wait_sync call and wait for the txg to complete. Meanwhile, the hook will abort the script and indicate that the channel program was canceled. The kernel returns a EINTR to indicate that the channel program run was canceled. Porting notes: Added missing return value from cv_wait_sig() Authored by: Don Brady <don.brady@delphix.com> Reviewed by: Sebastien Roy <sebastien.roy@delphix.com> Reviewed by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com> Reviewed by: Matt Ahrens <matt@delphix.com> Reviewed by: Sara Hartse <sara.hartse@delphix.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Approved by: Robert Mustacchi <rm@joyent.com> Ported-by: Don Brady <don.brady@delphix.com> Signed-off-by: Don Brady <don.brady@delphix.com> OpenZFS-issue: https://www.illumos.org/issues/9425 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/d0cb1fb926 Closes #8904
This commit is contained in:
+33
-3
@@ -675,8 +675,8 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution)
|
||||
mutex_exit(&tx->tx_sync_lock);
|
||||
}
|
||||
|
||||
void
|
||||
txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
|
||||
static boolean_t
|
||||
txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig)
|
||||
{
|
||||
tx_state_t *tx = &dp->dp_tx;
|
||||
|
||||
@@ -695,9 +695,39 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
|
||||
"tx_synced=%llu waiting=%llu dp=%p\n",
|
||||
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
|
||||
cv_broadcast(&tx->tx_sync_more_cv);
|
||||
cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock);
|
||||
if (wait_sig) {
|
||||
/*
|
||||
* Condition wait here but stop if the thread receives a
|
||||
* signal. The caller may call txg_wait_synced*() again
|
||||
* to resume waiting for this txg.
|
||||
*/
|
||||
if (cv_wait_io_sig(&tx->tx_sync_done_cv,
|
||||
&tx->tx_sync_lock) == 0) {
|
||||
mutex_exit(&tx->tx_sync_lock);
|
||||
return (B_TRUE);
|
||||
}
|
||||
} else {
|
||||
cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock);
|
||||
}
|
||||
}
|
||||
mutex_exit(&tx->tx_sync_lock);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
VERIFY0(txg_wait_synced_impl(dp, txg, B_FALSE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to a txg_wait_synced but it can be interrupted from a signal.
|
||||
* Returns B_TRUE if the thread was signaled while waiting.
|
||||
*/
|
||||
boolean_t
|
||||
txg_wait_synced_sig(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
return (txg_wait_synced_impl(dp, txg, B_TRUE));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user