zinject: count matches and injections for each handler

When building tests with zinject, it can be quite difficult to work out
if you're producing the right kind of IO to match the rules you've set
up.

So, here we extend injection records to count the number of times a
handler matched the operation, and how often an error was actually
injected (ie after frequency and other exclusions are applied).

Then, display those counts in the `zinject` output.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #16938
This commit is contained in:
Rob Norris
2025-01-14 00:33:31 +11:00
committed by GitHub
parent fae4c664a4
commit 2aa3fbe761
6 changed files with 236 additions and 39 deletions
+49 -11
View File
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2024, Klara Inc.
* Copyright (c) 2024-2025, Klara, Inc.
*/
/*
@@ -129,6 +129,9 @@ static boolean_t
zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
zinject_record_t *record, int error)
{
boolean_t matched = B_FALSE;
boolean_t injected = B_FALSE;
/*
* Check for a match against the MOS, which is based on type
*/
@@ -137,9 +140,8 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
record->zi_object == DMU_META_DNODE_OBJECT) {
if (record->zi_type == DMU_OT_NONE ||
type == record->zi_type)
return (freq_triggered(record->zi_freq));
else
return (B_FALSE);
matched = B_TRUE;
goto done;
}
/*
@@ -153,10 +155,20 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
(record->zi_dvas == 0 ||
(dva != ZI_NO_DVA && (record->zi_dvas & (1ULL << dva)))) &&
error == record->zi_error) {
return (freq_triggered(record->zi_freq));
matched = B_TRUE;
goto done;
}
return (B_FALSE);
done:
if (matched) {
record->zi_match_count++;
injected = freq_triggered(record->zi_freq);
}
if (injected)
record->zi_inject_count++;
return (injected);
}
/*
@@ -177,8 +189,11 @@ zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type)
continue;
if (handler->zi_record.zi_type == type &&
strcmp(tag, handler->zi_record.zi_func) == 0)
strcmp(tag, handler->zi_record.zi_func) == 0) {
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
panic("Panic requested in function %s\n", tag);
}
}
rw_exit(&inject_lock);
@@ -336,6 +351,8 @@ zio_handle_label_injection(zio_t *zio, int error)
if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
(offset >= start && offset <= end)) {
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
ret = error;
break;
}
@@ -400,12 +417,16 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
if (handler->zi_record.zi_error == err1 ||
handler->zi_record.zi_error == err2) {
handler->zi_record.zi_match_count++;
/*
* limit error injection if requested
*/
if (!freq_triggered(handler->zi_record.zi_freq))
continue;
handler->zi_record.zi_inject_count++;
/*
* For a failed open, pretend like the device
* has gone away.
@@ -441,6 +462,8 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
break;
}
if (handler->zi_record.zi_error == ENXIO) {
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
ret = SET_ERROR(EIO);
break;
}
@@ -483,6 +506,8 @@ zio_handle_ignored_writes(zio_t *zio)
handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
continue;
handler->zi_record.zi_match_count++;
/*
* Positive duration implies # of seconds, negative
* a number of txgs
@@ -495,8 +520,10 @@ zio_handle_ignored_writes(zio_t *zio)
}
/* Have a "problem" writing 60% of the time */
if (random_in_range(100) < 60)
if (random_in_range(100) < 60) {
handler->zi_record.zi_inject_count++;
zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
}
break;
}
@@ -520,6 +547,9 @@ spa_handle_ignored_writes(spa_t *spa)
handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
continue;
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
if (handler->zi_record.zi_duration > 0) {
VERIFY(handler->zi_record.zi_timer == 0 ||
ddi_time_after64(
@@ -601,9 +631,6 @@ zio_handle_io_delay(zio_t *zio)
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
continue;
if (!freq_triggered(handler->zi_record.zi_freq))
continue;
if (vd->vdev_guid != handler->zi_record.zi_guid)
continue;
@@ -628,6 +655,12 @@ zio_handle_io_delay(zio_t *zio)
ASSERT3U(handler->zi_record.zi_nlanes, >,
handler->zi_next_lane);
handler->zi_record.zi_match_count++;
/* Limit the use of this handler if requested */
if (!freq_triggered(handler->zi_record.zi_freq))
continue;
/*
* We want to issue this IO to the lane that will become
* idle the soonest, so we compare the soonest this
@@ -699,6 +732,9 @@ zio_handle_io_delay(zio_t *zio)
*/
min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) %
min_handler->zi_record.zi_nlanes;
min_handler->zi_record.zi_inject_count++;
}
mutex_exit(&inject_delay_mtx);
@@ -721,9 +757,11 @@ zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command)
handler = list_next(&inject_handlers, handler)) {
ASSERT3P(handler->zi_spa_name, !=, NULL);
if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) {
handler->zi_record.zi_match_count++;
uint64_t pause =
SEC2NSEC(handler->zi_record.zi_duration);
if (pause > elapsed) {
handler->zi_record.zi_inject_count++;
delay = pause - elapsed;
}
id = handler->zi_id;