zinject: count matches and injections for each handler

When building tests with zinject, it can be quite difficult to work out
if you're producing the right kind of IO to match the rules you've set
up.

So, here we extend injection records to count the number of times a
handler matched the operation, and how often an error was actually
injected (ie after frequency and other exclusions are applied).

Then, display those counts in the `zinject` output.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #16938
This commit is contained in:
Rob Norris 2025-01-14 00:33:31 +11:00 committed by GitHub
parent fae4c664a4
commit 2aa3fbe761
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 236 additions and 39 deletions

View File

@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2023-2024, Klara Inc. * Copyright (c) 2023-2025, Klara, Inc.
*/ */
/* /*
@ -404,27 +404,30 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
if (*count == 0) { if (*count == 0) {
(void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s " (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
"%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "%-15s %-6s %-15s\n", "ID", "POOL", "OBJSET", "OBJECT",
"LVL", "DVAs", "RANGE"); "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT");
(void) printf("--- --------------- ------ " (void) printf("--- --------------- ------ "
"------ -------- --- ---- ---------------\n"); "------ -------- --- ---- --------------- "
"------ ------\n");
} }
*count += 1; *count += 1;
(void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x ", char rangebuf[32];
id, pool, (u_longlong_t)record->zi_objset, if (record->zi_start == 0 && record->zi_end == -1ULL)
(u_longlong_t)record->zi_object, type_to_name(record->zi_type), snprintf(rangebuf, sizeof (rangebuf), "all");
record->zi_level, record->zi_dvas);
if (record->zi_start == 0 &&
record->zi_end == -1ULL)
(void) printf("all\n");
else else
(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]",
(u_longlong_t)record->zi_start,
(u_longlong_t)record->zi_end); (u_longlong_t)record->zi_end);
(void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x %-15s "
"%6lu %6lu\n", id, pool, (u_longlong_t)record->zi_objset,
(u_longlong_t)record->zi_object, type_to_name(record->zi_type),
record->zi_level, record->zi_dvas, rangebuf,
record->zi_match_count, record->zi_inject_count);
return (0); return (0);
} }
@ -445,11 +448,14 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
return (0); return (0);
if (*count == 0) { if (*count == 0) {
(void) printf("%3s %-15s %-16s %-5s %-10s %-9s\n", (void) printf("%3s %-15s %-16s %-5s %-10s %-9s "
"ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ"); "%-6s %-6s\n",
"ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ",
"MATCH", "INJECT");
(void) printf( (void) printf(
"--- --------------- ---------------- " "--- --------------- ---------------- "
"----- ---------- ---------\n"); "----- ---------- --------- "
"------ ------\n");
} }
*count += 1; *count += 1;
@ -457,9 +463,10 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
double freq = record->zi_freq == 0 ? 100.0f : double freq = record->zi_freq == 0 ? 100.0f :
(((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f; (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
(void) printf("%3d %-15s %llx %-5s %-10s %8.4f%%\n", id, pool, (void) printf("%3d %-15s %llx %-5s %-10s %8.4f%% "
(u_longlong_t)record->zi_guid, iotypestr[record->zi_iotype], "%6lu %6lu\n", id, pool, (u_longlong_t)record->zi_guid,
err_to_str(record->zi_error), freq); iotypestr[record->zi_iotype], err_to_str(record->zi_error),
freq, record->zi_match_count, record->zi_inject_count);
return (0); return (0);
} }
@ -477,18 +484,25 @@ print_delay_handler(int id, const char *pool, zinject_record_t *record,
return (0); return (0);
if (*count == 0) { if (*count == 0) {
(void) printf("%3s %-15s %-15s %-15s %s\n", (void) printf("%3s %-15s %-16s %-10s %-5s %-9s "
"ID", "POOL", "DELAY (ms)", "LANES", "GUID"); "%-6s %-6s\n",
(void) printf("--- --------------- --------------- " "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ",
"--------------- ----------------\n"); "MATCH", "INJECT");
(void) printf("--- --------------- ---------------- "
"---------- ----- --------- "
"------ ------\n");
} }
*count += 1; *count += 1;
(void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool, double freq = record->zi_freq == 0 ? 100.0f :
(((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
(void) printf("%3d %-15s %llx %10llu %5llu %8.4f%% "
"%6lu %6lu\n", id, pool, (u_longlong_t)record->zi_guid,
(u_longlong_t)NSEC2MSEC(record->zi_timer), (u_longlong_t)NSEC2MSEC(record->zi_timer),
(u_longlong_t)record->zi_nlanes, (u_longlong_t)record->zi_nlanes,
(u_longlong_t)record->zi_guid); freq, record->zi_match_count, record->zi_inject_count);
return (0); return (0);
} }

View File

@ -23,7 +23,7 @@
* Copyright (c) 2012, 2024 by Delphix. All rights reserved. * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems. * Copyright 2016 RackTop Systems.
* Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2024, Klara, Inc. * Copyright (c) 2024-2025, Klara, Inc.
*/ */
#ifndef _SYS_ZFS_IOCTL_H #ifndef _SYS_ZFS_IOCTL_H
@ -421,6 +421,8 @@ typedef struct zinject_record {
uint64_t zi_nlanes; uint64_t zi_nlanes;
uint32_t zi_cmd; uint32_t zi_cmd;
uint32_t zi_dvas; uint32_t zi_dvas;
uint64_t zi_match_count; /* count of times matched */
uint64_t zi_inject_count; /* count of times injected */
} zinject_record_t; } zinject_record_t;
#define ZINJECT_NULL 0x1 #define ZINJECT_NULL 0x1

View File

@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2024, Klara Inc. * Copyright (c) 2024-2025, Klara, Inc.
*/ */
/* /*
@ -129,6 +129,9 @@ static boolean_t
zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva, zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
zinject_record_t *record, int error) zinject_record_t *record, int error)
{ {
boolean_t matched = B_FALSE;
boolean_t injected = B_FALSE;
/* /*
* Check for a match against the MOS, which is based on type * Check for a match against the MOS, which is based on type
*/ */
@ -137,9 +140,8 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
record->zi_object == DMU_META_DNODE_OBJECT) { record->zi_object == DMU_META_DNODE_OBJECT) {
if (record->zi_type == DMU_OT_NONE || if (record->zi_type == DMU_OT_NONE ||
type == record->zi_type) type == record->zi_type)
return (freq_triggered(record->zi_freq)); matched = B_TRUE;
else goto done;
return (B_FALSE);
} }
/* /*
@ -153,10 +155,20 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
(record->zi_dvas == 0 || (record->zi_dvas == 0 ||
(dva != ZI_NO_DVA && (record->zi_dvas & (1ULL << dva)))) && (dva != ZI_NO_DVA && (record->zi_dvas & (1ULL << dva)))) &&
error == record->zi_error) { error == record->zi_error) {
return (freq_triggered(record->zi_freq)); matched = B_TRUE;
goto done;
} }
return (B_FALSE); done:
if (matched) {
record->zi_match_count++;
injected = freq_triggered(record->zi_freq);
}
if (injected)
record->zi_inject_count++;
return (injected);
} }
/* /*
@ -177,9 +189,12 @@ zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type)
continue; continue;
if (handler->zi_record.zi_type == type && if (handler->zi_record.zi_type == type &&
strcmp(tag, handler->zi_record.zi_func) == 0) strcmp(tag, handler->zi_record.zi_func) == 0) {
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
panic("Panic requested in function %s\n", tag); panic("Panic requested in function %s\n", tag);
} }
}
rw_exit(&inject_lock); rw_exit(&inject_lock);
} }
@ -336,6 +351,8 @@ zio_handle_label_injection(zio_t *zio, int error)
if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
(offset >= start && offset <= end)) { (offset >= start && offset <= end)) {
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
ret = error; ret = error;
break; break;
} }
@ -400,12 +417,16 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
if (handler->zi_record.zi_error == err1 || if (handler->zi_record.zi_error == err1 ||
handler->zi_record.zi_error == err2) { handler->zi_record.zi_error == err2) {
handler->zi_record.zi_match_count++;
/* /*
* limit error injection if requested * limit error injection if requested
*/ */
if (!freq_triggered(handler->zi_record.zi_freq)) if (!freq_triggered(handler->zi_record.zi_freq))
continue; continue;
handler->zi_record.zi_inject_count++;
/* /*
* For a failed open, pretend like the device * For a failed open, pretend like the device
* has gone away. * has gone away.
@ -441,6 +462,8 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
break; break;
} }
if (handler->zi_record.zi_error == ENXIO) { if (handler->zi_record.zi_error == ENXIO) {
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
ret = SET_ERROR(EIO); ret = SET_ERROR(EIO);
break; break;
} }
@ -483,6 +506,8 @@ zio_handle_ignored_writes(zio_t *zio)
handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
continue; continue;
handler->zi_record.zi_match_count++;
/* /*
* Positive duration implies # of seconds, negative * Positive duration implies # of seconds, negative
* a number of txgs * a number of txgs
@ -495,8 +520,10 @@ zio_handle_ignored_writes(zio_t *zio)
} }
/* Have a "problem" writing 60% of the time */ /* Have a "problem" writing 60% of the time */
if (random_in_range(100) < 60) if (random_in_range(100) < 60) {
handler->zi_record.zi_inject_count++;
zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
}
break; break;
} }
@ -520,6 +547,9 @@ spa_handle_ignored_writes(spa_t *spa)
handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
continue; continue;
handler->zi_record.zi_match_count++;
handler->zi_record.zi_inject_count++;
if (handler->zi_record.zi_duration > 0) { if (handler->zi_record.zi_duration > 0) {
VERIFY(handler->zi_record.zi_timer == 0 || VERIFY(handler->zi_record.zi_timer == 0 ||
ddi_time_after64( ddi_time_after64(
@ -601,9 +631,6 @@ zio_handle_io_delay(zio_t *zio)
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
continue; continue;
if (!freq_triggered(handler->zi_record.zi_freq))
continue;
if (vd->vdev_guid != handler->zi_record.zi_guid) if (vd->vdev_guid != handler->zi_record.zi_guid)
continue; continue;
@ -628,6 +655,12 @@ zio_handle_io_delay(zio_t *zio)
ASSERT3U(handler->zi_record.zi_nlanes, >, ASSERT3U(handler->zi_record.zi_nlanes, >,
handler->zi_next_lane); handler->zi_next_lane);
handler->zi_record.zi_match_count++;
/* Limit the use of this handler if requested */
if (!freq_triggered(handler->zi_record.zi_freq))
continue;
/* /*
* We want to issue this IO to the lane that will become * We want to issue this IO to the lane that will become
* idle the soonest, so we compare the soonest this * idle the soonest, so we compare the soonest this
@ -699,6 +732,9 @@ zio_handle_io_delay(zio_t *zio)
*/ */
min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) % min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) %
min_handler->zi_record.zi_nlanes; min_handler->zi_record.zi_nlanes;
min_handler->zi_record.zi_inject_count++;
} }
mutex_exit(&inject_delay_mtx); mutex_exit(&inject_delay_mtx);
@ -721,9 +757,11 @@ zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command)
handler = list_next(&inject_handlers, handler)) { handler = list_next(&inject_handlers, handler)) {
ASSERT3P(handler->zi_spa_name, !=, NULL); ASSERT3P(handler->zi_spa_name, !=, NULL);
if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) { if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) {
handler->zi_record.zi_match_count++;
uint64_t pause = uint64_t pause =
SEC2NSEC(handler->zi_record.zi_duration); SEC2NSEC(handler->zi_record.zi_duration);
if (pause > elapsed) { if (pause > elapsed) {
handler->zi_record.zi_inject_count++;
delay = pause - elapsed; delay = pause - elapsed;
} }
id = handler->zi_id; id = handler->zi_id;

View File

@ -159,7 +159,7 @@ tests = ['json_sanity']
tags = ['functional', 'cli_root', 'json'] tags = ['functional', 'cli_root', 'json']
[tests/functional/cli_root/zinject] [tests/functional/cli_root/zinject]
tests = ['zinject_args'] tests = ['zinject_args', 'zinject_counts']
pre = pre =
post = post =
tags = ['functional', 'cli_root', 'zinject'] tags = ['functional', 'cli_root', 'zinject']

View File

@ -615,6 +615,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/json/setup.ksh \ functional/cli_root/json/setup.ksh \
functional/cli_root/json/json_sanity.ksh \ functional/cli_root/json/json_sanity.ksh \
functional/cli_root/zinject/zinject_args.ksh \ functional/cli_root/zinject/zinject_args.ksh \
functional/cli_root/zinject/zinject_counts.ksh \
functional/cli_root/zdb/zdb_002_pos.ksh \ functional/cli_root/zdb/zdb_002_pos.ksh \
functional/cli_root/zdb/zdb_003_pos.ksh \ functional/cli_root/zdb/zdb_003_pos.ksh \
functional/cli_root/zdb/zdb_004_pos.ksh \ functional/cli_root/zdb/zdb_004_pos.ksh \

View File

@ -0,0 +1,142 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025, Klara, Inc.
#
#
# This test sets various injections, does some IO to trigger them. and then
# checks the "match" and "inject" counters on the injection records to ensure
# that they're being counted properly.
#
# Note that this is a test of the counters, not injection generally. We're
# usually only looking for the counters moving at all, not caring too much
# about their actual values.
. $STF_SUITE/include/libtest.shlib
verify_runnable "global"
log_assert "Check zinject counts are displayed and advanced as expected."
DISK1=${DISKS%% *}
function cleanup
{
zinject -c all
default_cleanup_noexit
}
log_onexit cleanup
default_mirror_setup_noexit $DISKS
# Call zinject, get the match and inject counts, and make sure they look
# plausible for the requested frequency.
function check_count_freq
{
typeset -i freq=$1
# assuming a single rule, with the match and inject counts in the
# last two columns
typeset rule=$(zinject | grep -m 1 -oE '^ *[0-9].*[0-9]$')
log_note "check_count_freq: using rule: $rule"
typeset -a record=($(echo $rule | grep -oE ' [0-9]+ +[0-9]+$'))
typeset -i match=${record[0]}
typeset -i inject=${record[1]}
log_note "check_count_freq: freq=$freq match=$match inject=$inject"
# equality check, for 100% frequency, or if we've never matched the rule
if [[ $match -eq 0 || $freq -eq 100 ]] ; then
return [[ $match -eq 0 $inject ]]
fi
# Compute the expected injection count, and compare. Because we're
# not testing the fine details here, it's considered good-enough for
# the injection account to be within +/- 10% of the expected count.
typeset -i expect=$(($match * $freq / 100))
typeset -i diff=$((($expect - $inject) / 10))
return [[ $diff -ge -1 && $diff -le 1 ]]
}
# Test device IO injections by injecting write errors, doing some writes,
# and making sure the count moved
function test_device_injection
{
for freq in 100 50 ; do
log_must zinject -d $DISK1 -e io -T write -f $freq $TESTPOOL
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=1
log_must zpool sync
log_must check_count_freq $freq
log_must zinject -c all
done
}
# Test object injections by writing a file, injecting checksum errors and
# trying to read it back
function test_object_injection
{
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=1
zpool sync
for freq in 100 50 ; do
log_must zinject -t data -e checksum -f $freq /$TESTPOOL/file
cat /tank/file > /dev/null || true
log_must check_count_freq $freq
log_must zinject -c all
done
}
# Test delay injections, by injecting delays and writing
function test_delay_injection
{
for freq in 100 50 ; do
log_must zinject -d $DISK1 -D 50:1 -f $freq $TESTPOOL
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=1
zpool sync
log_must check_count_freq $freq
log_must zinject -c all
done
}
# Disable cache, to ensure reads induce IO
log_must zfs set primarycache=none $TESTPOOL
# Test 'em all.
log_must test_device_injection
log_must test_object_injection
log_must test_delay_injection
log_pass "zinject counts are displayed and advanced as expected."