OpenZFS 6531 - Provide mechanism to artificially limit disk performance

Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
Ported by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>

OpenZFS-issue: https://www.illumos.org/issues/6531
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/97e8130

Porting notes:
- Added new IO delay tracepoints, and moved common ZIO tracepoint macros
  to a new trace_common.h file.
- Used zio_delay_taskq() in place of OpenZFS's timeout_generic() function.
- Updated zinject man page
- Updated zpool_scrub test files
This commit is contained in:
Tony Hutter
2016-05-23 10:41:29 -07:00
committed by Brian Behlendorf
parent 7e945072d1
commit 26ef0cc7db
18 changed files with 681 additions and 121 deletions
+105 -3
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
/*
@@ -239,6 +239,38 @@ usage(void)
"\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
"\t\tPerform a specific action on a particular device.\n"
"\n"
"\tzinject -d device -D latency:lanes pool\n"
"\n"
"\t\tAdd an artificial delay to IO requests on a particular\n"
"\t\tdevice, such that the requests take a minimum of 'latency'\n"
"\t\tmilliseconds to complete. Each delay has an associated\n"
"\t\tnumber of 'lanes' which defines the number of concurrent\n"
"\t\tIO requests that can be processed.\n"
"\n"
"\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
"\t\tthe device will only be able to service a single IO request\n"
"\t\tat a time with each request taking 10 ms to complete. So,\n"
"\t\tif only a single request is submitted every 10 ms, the\n"
"\t\taverage latency will be 10 ms; but if more than one request\n"
"\t\tis submitted every 10 ms, the average latency will be more\n"
"\t\tthan 10 ms.\n"
"\n"
"\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
"\t\tlanes (-D 10:2), then the device will be able to service\n"
"\t\ttwo requests at a time, each with a minimum latency of\n"
"\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
"\t\tthe average latency will be 10 ms; but if more than two\n"
"\t\trequests are submitted every 10 ms, the average latency\n"
"\t\twill be more than 10 ms.\n"
"\n"
"\t\tAlso note, these delays are additive. So two invocations\n"
"\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
"\t\tof '-D 10:2'. This also means, one can specify multiple\n"
"\t\tlanes with differing target latencies. For example, an\n"
"\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
"\t\tcreate 3 lanes on the device; one lane with a latency\n"
"\t\tof 10 ms and two lanes with a 25 ms latency.\n"
"\n"
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
"\t\tCause the pool to stop writing blocks yet not\n"
"\t\treport errors for a duration. Simulates buggy hardware\n"
@@ -353,6 +385,9 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);
if (record->zi_cmd == ZINJECT_DELAY_IO)
return (0);
if (*count == 0) {
(void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
(void) printf("--- --------------- ----------------\n");
@@ -366,6 +401,35 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
return (0);
}
static int
print_delay_handler(int id, const char *pool, zinject_record_t *record,
void *data)
{
int *count = data;
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);
if (record->zi_cmd != ZINJECT_DELAY_IO)
return (0);
if (*count == 0) {
(void) printf("%3s %-15s %-15s %-15s %s\n",
"ID", "POOL", "DELAY (ms)", "LANES", "GUID");
(void) printf("--- --------------- --------------- "
"--------------- ----------------\n");
}
*count += 1;
(void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool,
(u_longlong_t)NSEC2MSEC(record->zi_timer),
(u_longlong_t)record->zi_nlanes,
(u_longlong_t)record->zi_guid);
return (0);
}
static int
print_panic_handler(int id, const char *pool, zinject_record_t *record,
void *data)
@@ -403,6 +467,13 @@ print_all_handlers(void)
count = 0;
}
(void) iter_handlers(print_delay_handler, &count);
if (count > 0) {
total += count;
(void) printf("\n");
count = 0;
}
(void) iter_handlers(print_data_handler, &count);
if (count > 0) {
total += count;
@@ -545,6 +616,35 @@ perform_action(const char *pool, zinject_record_t *record, int cmd)
return (1);
}
static int
parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
{
unsigned long scan_delay;
unsigned long scan_nlanes;
if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
return (1);
/*
* We explicitly disallow a delay of zero here, because we key
* off this value being non-zero in translate_device(), to
* determine if the fault is a ZINJECT_DELAY_IO fault or not.
*/
if (scan_delay == 0)
return (1);
/*
* The units for the CLI delay parameter is milliseconds, but
* the data passed to the kernel is interpreted as nanoseconds.
* Thus we scale the milliseconds to nanoseconds here, and this
* nanosecond value is used to pass the delay to the kernel.
*/
*delay = MSEC2NSEC(scan_delay);
*nlanes = scan_nlanes;
return (0);
}
int
main(int argc, char **argv)
{
@@ -628,8 +728,10 @@ main(int argc, char **argv)
break;
case 'D':
errno = 0;
record.zi_timer = strtoull(optarg, &end, 10);
if (errno != 0 || *end != '\0') {
ret = parse_delay(optarg, &record.zi_timer,
&record.zi_nlanes);
if (ret != 0) {
(void) fprintf(stderr, "invalid i/o delay "
"value: '%s'\n", optarg);
usage();