From 302ef1517e5769cbe6a12d94c89f44a90721bfd4 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 26 Aug 2010 11:58:00 -0700 Subject: [PATCH] Add linux zpios support Linux kernel implementation of PIOS test app. Signed-off-by: Brian Behlendorf --- cmd/Makefile.am | 2 +- cmd/zpios/.gitignore | 1 + cmd/zpios/Makefile.am | 12 + cmd/zpios/zpios.h | 121 ++ cmd/zpios/zpios_main.c | 629 ++++++++ cmd/zpios/zpios_util.c | 454 ++++++ config/zfs-build.m4 | 9 + configure.ac | 2 + module/Makefile.in | 1 + module/zpios/Makefile.in | 11 + module/zpios/include/zpios-ctl.h | 198 +++ module/zpios/include/zpios-internal.h | 138 ++ module/zpios/zpios.c | 1331 +++++++++++++++++ scripts/Makefile.am | 14 + scripts/common.sh.in | 5 + scripts/zpios-profile/zpios-profile-disk.sh | 129 ++ scripts/zpios-profile/zpios-profile-pids.sh | 131 ++ scripts/zpios-profile/zpios-profile-post.sh | 129 ++ scripts/zpios-profile/zpios-profile-pre.sh | 184 +++ scripts/zpios-profile/zpios-profile.sh | 226 +++ scripts/zpios-sanity.sh | 158 ++ scripts/zpios-survey.sh | 215 +++ .../zpios-test/16th-8192rc-4rs-1cs-4off.sh | 65 + scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh | 66 + .../1x256th-65536rc-4rs-1cs-4off.sh | 65 + .../zpios-test/256th-65536rc-4rs-1cs-4off.sh | 65 + scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh | 65 + scripts/zpios-test/large-thread-survey.sh | 1 + scripts/zpios-test/large.sh | 1 + scripts/zpios-test/medium.sh | 1 + scripts/zpios-test/small.sh | 1 + scripts/zpios-test/tiny.sh | 1 + scripts/zpios.sh | 272 ++++ 33 files changed, 4702 insertions(+), 1 deletion(-) create mode 100644 cmd/zpios/.gitignore create mode 100644 cmd/zpios/Makefile.am create mode 100644 cmd/zpios/zpios.h create mode 100644 cmd/zpios/zpios_main.c create mode 100644 cmd/zpios/zpios_util.c create mode 100644 module/zpios/Makefile.in create mode 100644 module/zpios/include/zpios-ctl.h create mode 100644 module/zpios/include/zpios-internal.h create mode 100644 module/zpios/zpios.c create mode 100755 scripts/zpios-profile/zpios-profile-disk.sh create mode 100755 scripts/zpios-profile/zpios-profile-pids.sh create mode 100755 scripts/zpios-profile/zpios-profile-post.sh create mode 100755 scripts/zpios-profile/zpios-profile-pre.sh create mode 100755 scripts/zpios-profile/zpios-profile.sh create mode 100755 scripts/zpios-sanity.sh create mode 100755 scripts/zpios-survey.sh create mode 100755 scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh create mode 100755 scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh create mode 100755 scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh create mode 100755 scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh create mode 100755 scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh create mode 120000 scripts/zpios-test/large-thread-survey.sh create mode 120000 scripts/zpios-test/large.sh create mode 120000 scripts/zpios-test/medium.sh create mode 120000 scripts/zpios-test/small.sh create mode 120000 scripts/zpios-test/tiny.sh create mode 100755 scripts/zpios.sh diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 954f8565c..760854744 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1 +1 @@ -SUBDIRS = zfs zpool zpool_id zpool_layout zdb zinject ztest +SUBDIRS = zfs zpool zpool_id zpool_layout zdb zinject ztest zpios diff --git a/cmd/zpios/.gitignore b/cmd/zpios/.gitignore new file mode 100644 index 000000000..b83e1d094 --- /dev/null +++ b/cmd/zpios/.gitignore @@ -0,0 +1 @@ +/zpios diff --git a/cmd/zpios/Makefile.am b/cmd/zpios/Makefile.am new file mode 100644 index 000000000..4e13a76c9 --- /dev/null +++ b/cmd/zpios/Makefile.am @@ -0,0 +1,12 @@ +include $(top_srcdir)/config/Rules.am + +DEFAULT_INCLUDES += \ + -I${top_srcdir}/module/zpios/include + +sbin_PROGRAMS = zpios + +zpios_SOURCES = \ + $(top_srcdir)/cmd/zpios/zpios_main.c \ + $(top_srcdir)/cmd/zpios/zpios_util.c \ + $(top_srcdir)/cmd/zpios/zpios.h + diff --git a/cmd/zpios/zpios.h b/cmd/zpios/zpios.h new file mode 100644 index 000000000..ed979459a --- /dev/null +++ b/cmd/zpios/zpios.h @@ -0,0 +1,121 @@ +/*****************************************************************************\ + * ZPIOS is a heavily modified version of the original PIOS test code. + * It is designed to have the test code running in the Linux kernel + * against ZFS while still being flexibly controled from user space. + * + * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049 + * + * Original PIOS Test Code + * Copyright (C) 2004 Cluster File Systems, Inc. + * Written by Peter Braam + * Atul Vidwansa + * Milind Dumbare + * + * This file is part of ZFS on Linux. + * For details, see . + * + * ZPIOS is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ZPIOS is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with ZPIOS. If not, see . +\*****************************************************************************/ + +#ifndef _ZPIOS_H +#define _ZPIOS_H + +#include + +#define VERSION_SIZE 64 + +/* Regular expressions */ +#define REGEX_NUMBERS "^[0-9]*[0-9]$" +#define REGEX_NUMBERS_COMMA "^([0-9]+,)*[0-9]+$" +#define REGEX_SIZE "^[0-9][0-9]*[kmgt]$" +#define REGEX_SIZE_COMMA "^([0-9][0-9]*[kmgt]+,)*[0-9][0-9]*[kmgt]$" + +/* Flags for low, high, incr */ +#define FLAG_SET 0x01 +#define FLAG_LOW 0x02 +#define FLAG_HIGH 0x04 +#define FLAG_INCR 0x08 + +#define TRUE 1 +#define FALSE 0 + +#define KB (1024) +#define MB (KB * 1024) +#define GB (MB * 1024) +#define TB (GB * 1024) + +#define KMGT_SIZE 16 + +/* All offsets, sizes and counts can be passed to the application in + * multiple ways. + * 1. a value (stored in val[0], val_count will be 1) + * 2. a comma separated list of values (stored in val[], using val_count) + * 3. a range and block sizes, low, high, factor (val_count must be 0) + */ +typedef struct pios_range_repeat { + uint64_t val[32]; /* Comma sep array, or low, high, inc */ + uint64_t val_count; /* Num of values */ + uint64_t val_low; + uint64_t val_high; + uint64_t val_inc_perc; + uint64_t next_val; /* Used for multiple runs in get_next() */ +} range_repeat_t; + +typedef struct cmd_args { + range_repeat_t T; /* Thread count */ + range_repeat_t N; /* Region count */ + range_repeat_t O; /* Offset count */ + range_repeat_t C; /* Chunksize */ + range_repeat_t S; /* Regionsize */ + + const char *pool; /* Pool */ + const char *name; /* Name */ + uint32_t flags; /* Flags */ + uint32_t io_type; /* DMUIO only */ + uint32_t verbose; /* Verbose */ + uint32_t human_readable; /* Human readable output */ + + uint64_t regionnoise; /* Region noise */ + uint64_t chunknoise; /* Chunk noise */ + uint64_t thread_delay; /* Thread delay */ + + char pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */ + char post[ZPIOS_PATH_SIZE]; /* Post-exec hook */ + char log[ZPIOS_PATH_SIZE]; /* Requested log dir */ + + /* Control */ + int current_id; + uint64_t current_T; + uint64_t current_N; + uint64_t current_C; + uint64_t current_S; + uint64_t current_O; + + uint32_t rc; +} cmd_args_t; + +int set_count(char *pattern1, char *pattern2, range_repeat_t *range, + char *optarg, uint32_t *flags, char *arg); +int set_lhi(char *pattern, range_repeat_t *range, char *optarg, + int flag, uint32_t *flag_thread, char *arg); +int set_noise(uint64_t *noise, char *optarg, char *arg); +int set_load_params(cmd_args_t *args, char *optarg); +int check_mutual_exclusive_command_lines(uint32_t flag, char *arg); +void print_stats_header(cmd_args_t *args); +void print_stats(cmd_args_t *args, zpios_cmd_t *cmd); + +#endif /* _ZPIOS_H */ diff --git a/cmd/zpios/zpios_main.c b/cmd/zpios/zpios_main.c new file mode 100644 index 000000000..14c37e7e3 --- /dev/null +++ b/cmd/zpios/zpios_main.c @@ -0,0 +1,629 @@ +/*****************************************************************************\ + * ZPIOS is a heavily modified version of the original PIOS test code. + * It is designed to have the test code running in the Linux kernel + * against ZFS while still being flexibly controled from user space. + * + * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049 + * + * Original PIOS Test Code + * Copyright (C) 2004 Cluster File Systems, Inc. + * Written by Peter Braam + * Atul Vidwansa + * Milind Dumbare + * + * This file is part of ZFS on Linux. + * For details, see . + * + * ZPIOS is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ZPIOS is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with ZPIOS. If not, see . +\*****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zpios.h" + +static const char short_opt[] = "t:l:h:e:n:i:j:k:o:m:q:r:c:a:b:g:s:A:B:C:" + "L:p:M:xP:R:G:I:N:T:VzOfHv?"; +static const struct option long_opt[] = { + {"threadcount", required_argument, 0, 't' }, + {"threadcount_low", required_argument, 0, 'l' }, + {"threadcount_high", required_argument, 0, 'h' }, + {"threadcount_incr", required_argument, 0, 'e' }, + {"regioncount", required_argument, 0, 'n' }, + {"regioncount_low", required_argument, 0, 'i' }, + {"regioncount_high", required_argument, 0, 'j' }, + {"regioncount_incr", required_argument, 0, 'k' }, + {"offset", required_argument, 0, 'o' }, + {"offset_low", required_argument, 0, 'm' }, + {"offset_high", required_argument, 0, 'q' }, + {"offset_incr", required_argument, 0, 'r' }, + {"chunksize", required_argument, 0, 'c' }, + {"chunksize_low", required_argument, 0, 'a' }, + {"chunksize_high", required_argument, 0, 'b' }, + {"chunksize_incr", required_argument, 0, 'g' }, + {"regionsize", required_argument, 0, 's' }, + {"regionsize_low", required_argument, 0, 'A' }, + {"regionsize_high", required_argument, 0, 'B' }, + {"regionsize_incr", required_argument, 0, 'C' }, + {"load", required_argument, 0, 'L' }, + {"pool", required_argument, 0, 'p' }, + {"name", required_argument, 0, 'M' }, + {"cleanup", no_argument, 0, 'x' }, + {"prerun", required_argument, 0, 'P' }, + {"postrun", required_argument, 0, 'R' }, + {"log", required_argument, 0, 'G' }, + {"regionnoise", required_argument, 0, 'I' }, + {"chunknoise", required_argument, 0, 'N' }, + {"threaddelay", required_argument, 0, 'T' }, + {"verify", no_argument, 0, 'V' }, + {"zerocopy", no_argument, 0, 'z' }, + {"nowait", no_argument, 0, 'O' }, + {"noprefetch", no_argument, 0, 'f' }, + {"human-readable", no_argument, 0, 'H' }, + {"verbose", no_argument, 0, 'v' }, + {"help", no_argument, 0, '?' }, + { 0, 0, 0, 0 }, +}; + +static int zpiosctl_fd; /* Control file descriptor */ +static char zpios_version[VERSION_SIZE]; /* Kernel version string */ +static char *zpios_buffer = NULL; /* Scratch space area */ +static int zpios_buffer_size = 0; /* Scratch space size */ + +static int +usage(void) +{ + fprintf(stderr, "Usage: zpios\n"); + fprintf(stderr, + " --threadcount -t =values\n" + " --threadcount_low -l =value\n" + " --threadcount_high -h =value\n" + " --threadcount_incr -e =value\n" + " --regioncount -n =values\n" + " --regioncount_low -i =value\n" + " --regioncount_high -j =value\n" + " --regioncount_incr -k =value\n" + " --offset -o =values\n" + " --offset_low -m =value\n" + " --offset_high -q =value\n" + " --offset_incr -r =value\n" + " --chunksize -c =values\n" + " --chunksize_low -a =value\n" + " --chunksize_high -b =value\n" + " --chunksize_incr -g =value\n" + " --regionsize -s =values\n" + " --regionsize_low -A =value\n" + " --regionsize_high -B =value\n" + " --regionsize_incr -C =value\n" + " --load -L =dmuio|ssf|fpp\n" + " --pool -p =pool name\n" + " --name -M =test name\n" + " --cleanup -x\n" + " --prerun -P =pre-command\n" + " --postrun -R =post-command\n" + " --log -G =log directory\n" + " --regionnoise -I =shift\n" + " --chunknoise -N =bytes\n" + " --threaddelay -T =jiffies\n" + " --verify -V\n" + " --zerocopy -z\n" + " --nowait -O\n" + " --noprefetch -f\n" + " --human-readable -H\n" + " --verbose -v =increase verbosity\n" + " --help -? =this help\n\n"); + + return 0; +} + +static void args_fini(cmd_args_t *args) +{ + assert(args != NULL); + free(args); +} + +static cmd_args_t * +args_init(int argc, char **argv) +{ + cmd_args_t *args; + uint32_t fl_th = 0; + uint32_t fl_rc = 0; + uint32_t fl_of = 0; + uint32_t fl_rs = 0; + uint32_t fl_cs = 0; + int c, rc; + + if (argc == 1) { + usage(); + return (cmd_args_t *)NULL; + } + + /* Configure and populate the args structures */ + args = malloc(sizeof(*args)); + if (args == NULL) + return NULL; + + memset(args, 0, sizeof(*args)); + + while ((c=getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) { + rc = 0; + + switch (c) { + case 't': /* --thread count */ + rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA, + &args->T, optarg, &fl_th, "threadcount"); + break; + case 'l': /* --threadcount_low */ + rc = set_lhi(REGEX_NUMBERS, &args->T, optarg, + FLAG_LOW, &fl_th, "threadcount_low"); + break; + case 'h': /* --threadcount_high */ + rc = set_lhi(REGEX_NUMBERS, &args->T, optarg, + FLAG_HIGH, &fl_th, "threadcount_high"); + break; + case 'e': /* --threadcount_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->T, optarg, + FLAG_INCR, &fl_th, "threadcount_incr"); + break; + case 'n': /* --regioncount */ + rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA, + &args->N, optarg, &fl_rc, "regioncount"); + break; + case 'i': /* --regioncount_low */ + rc = set_lhi(REGEX_NUMBERS, &args->N, optarg, + FLAG_LOW, &fl_rc, "regioncount_low"); + break; + case 'j': /* --regioncount_high */ + rc = set_lhi(REGEX_NUMBERS, &args->N, optarg, + FLAG_HIGH, &fl_rc, "regioncount_high"); + break; + case 'k': /* --regioncount_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->N, optarg, + FLAG_INCR, &fl_rc, "regioncount_incr"); + break; + case 'o': /* --offset */ + rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA, + &args->O, optarg, &fl_of, "offset"); + break; + case 'm': /* --offset_low */ + rc = set_lhi(REGEX_SIZE, &args->O, optarg, + FLAG_LOW, &fl_of, "offset_low"); + break; + case 'q': /* --offset_high */ + rc = set_lhi(REGEX_SIZE, &args->O, optarg, + FLAG_HIGH, &fl_of, "offset_high"); + break; + case 'r': /* --offset_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->O, optarg, + FLAG_INCR, &fl_of, "offset_incr"); + break; + case 'c': /* --chunksize */ + rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA, + &args->C, optarg, &fl_cs, "chunksize"); + break; + case 'a': /* --chunksize_low */ + rc = set_lhi(REGEX_SIZE, &args->C, optarg, + FLAG_LOW, &fl_cs, "chunksize_low"); + break; + case 'b': /* --chunksize_high */ + rc = set_lhi(REGEX_SIZE, &args->C, optarg, + FLAG_HIGH, &fl_cs, "chunksize_high"); + break; + case 'g': /* --chunksize_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->C, optarg, + FLAG_INCR, &fl_cs, "chunksize_incr"); + break; + case 's': /* --regionsize */ + rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA, + &args->S, optarg, &fl_rs, "regionsize"); + break; + case 'A': /* --regionsize_low */ + rc = set_lhi(REGEX_SIZE, &args->S, optarg, + FLAG_LOW, &fl_rs, "regionsize_low"); + break; + case 'B': /* --regionsize_high */ + rc = set_lhi(REGEX_SIZE, &args->S, optarg, + FLAG_HIGH, &fl_rs, "regionsize_high"); + break; + case 'C': /* --regionsize_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->S, optarg, + FLAG_INCR, &fl_rs, "regionsize_incr"); + break; + case 'L': /* --load */ + rc = set_load_params(args, optarg); + break; + case 'p': /* --pool */ + args->pool = optarg; + break; + case 'M': + args->name = optarg; + break; + case 'x': /* --cleanup */ + args->flags |= DMU_REMOVE; + break; + case 'P': /* --prerun */ + strncpy(args->pre, optarg, ZPIOS_PATH_SIZE - 1); + break; + case 'R': /* --postrun */ + strncpy(args->post, optarg, ZPIOS_PATH_SIZE - 1); + break; + case 'G': /* --log */ + strncpy(args->log, optarg, ZPIOS_PATH_SIZE - 1); + break; + case 'I': /* --regionnoise */ + rc = set_noise(&args->regionnoise, optarg, "regionnoise"); + break; + case 'N': /* --chunknoise */ + rc = set_noise(&args->chunknoise, optarg, "chunknoise"); + break; + case 'T': /* --threaddelay */ + rc = set_noise(&args->thread_delay, optarg, "threaddelay"); + break; + case 'V': /* --verify */ + args->flags |= DMU_VERIFY; + break; + case 'z': /* --zerocopy */ + args->flags |= (DMU_WRITE_ZC | DMU_READ_ZC); + break; + case 'O': /* --nowait */ + args->flags |= DMU_WRITE_NOWAIT; + break; + case 'f': /* --noprefetch */ + args->flags |= DMU_READ_NOPF; + break; + case 'H': /* --human-readable */ + args->human_readable = 1; + break; + case 'v': /* --verbose */ + args->verbose++; + break; + case '?': + rc = 1; + break; + default: + fprintf(stderr,"Unknown option '%s'\n",argv[optind-1]); + rc = EINVAL; + break; + } + + if (rc) { + usage(); + args_fini(args); + return NULL; + } + } + + check_mutual_exclusive_command_lines(fl_th, "threadcount"); + check_mutual_exclusive_command_lines(fl_rc, "regioncount"); + check_mutual_exclusive_command_lines(fl_of, "offset"); + check_mutual_exclusive_command_lines(fl_rs, "regionsize"); + check_mutual_exclusive_command_lines(fl_cs, "chunksize"); + + if (args->pool == NULL) { + fprintf(stderr, "Error: Pool not specificed\n"); + usage(); + args_fini(args); + return NULL; + } + + if ((args->flags & (DMU_WRITE_ZC | DMU_READ_ZC)) && + (args->flags & DMU_VERIFY)) { + fprintf(stderr, "Error, --zerocopy incompatible --verify, " + "used for performance analysis only\n"); + usage(); + args_fini(args); + return NULL; + } + + return args; +} + +static int +dev_clear(void) +{ + zpios_cfg_t cfg; + int rc; + + memset(&cfg, 0, sizeof(cfg)); + cfg.cfg_magic = ZPIOS_CFG_MAGIC; + cfg.cfg_cmd = ZPIOS_CFG_BUFFER_CLEAR; + cfg.cfg_arg1 = 0; + + rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg); + if (rc) + fprintf(stderr, "Ioctl() error %lu / %d: %d\n", + (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno); + + lseek(zpiosctl_fd, 0, SEEK_SET); + + return rc; +} + +/* Passing a size of zero simply results in querying the current size */ +static int +dev_size(int size) +{ + zpios_cfg_t cfg; + int rc; + + memset(&cfg, 0, sizeof(cfg)); + cfg.cfg_magic = ZPIOS_CFG_MAGIC; + cfg.cfg_cmd = ZPIOS_CFG_BUFFER_SIZE; + cfg.cfg_arg1 = size; + + rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg); + if (rc) { + fprintf(stderr, "Ioctl() error %lu / %d: %d\n", + (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno); + return rc; + } + + return cfg.cfg_rc1; +} + +static void +dev_fini(void) +{ + if (zpios_buffer) + free(zpios_buffer); + + if (zpiosctl_fd != -1) { + if (close(zpiosctl_fd) == -1) { + fprintf(stderr, "Unable to close %s: %d\n", + ZPIOS_DEV, errno); + } + } +} + +static int +dev_init(void) +{ + int rc; + + zpiosctl_fd = open(ZPIOS_DEV, O_RDONLY); + if (zpiosctl_fd == -1) { + fprintf(stderr, "Unable to open %s: %d\n" + "Is the zpios module loaded?\n", ZPIOS_DEV, errno); + rc = errno; + goto error; + } + + if ((rc = dev_clear())) + goto error; + + if ((rc = dev_size(0)) < 0) + goto error; + + zpios_buffer_size = rc; + zpios_buffer = (char *)malloc(zpios_buffer_size); + if (zpios_buffer == NULL) { + rc = ENOMEM; + goto error; + } + + memset(zpios_buffer, 0, zpios_buffer_size); + return 0; +error: + if (zpiosctl_fd != -1) { + if (close(zpiosctl_fd) == -1) { + fprintf(stderr, "Unable to close %s: %d\n", + ZPIOS_DEV, errno); + } + } + + return rc; +} + +static int +get_next(uint64_t *val, range_repeat_t *range) +{ + /* if low, incr, high is given */ + if (range->val_count == 0) { + *val = (range->val_low) + + (range->val_low * range->next_val / 100); + + if (*val > range->val_high) + return 0; /* No more values, limit exceeded */ + + if (!range->next_val) + range->next_val = range->val_inc_perc; + else + range->next_val = range->next_val+range->val_inc_perc; + + return 1; /* more values to come */ + + /* if only one val is given */ + } else if (range->val_count == 1) { + if (range->next_val) + return 0; /* No more values, we only have one */ + + *val = range->val[0]; + range->next_val = 1; + return 1; /* more values to come */ + + /* if comma separated values are given */ + } else if (range->val_count > 1) { + if (range->next_val > range->val_count - 1) + return 0; /* No more values, limit exceeded */ + + *val = range->val[range->next_val]; + range->next_val++; + return 1; /* more values to come */ + } + + return 0; +} + +static int +run_one(cmd_args_t *args, uint32_t id, uint32_t T, uint32_t N, + uint64_t C, uint64_t S, uint64_t O) +{ + zpios_cmd_t *cmd; + int rc, rc2, cmd_size; + + dev_clear(); + + cmd_size = sizeof(zpios_cmd_t) + ((T + N + 1) * sizeof(zpios_stats_t)); + cmd = (zpios_cmd_t *)malloc(cmd_size); + if (cmd == NULL) + return ENOMEM; + + memset(cmd, 0, cmd_size); + cmd->cmd_magic = ZPIOS_CMD_MAGIC; + strncpy(cmd->cmd_pool, args->pool, ZPIOS_NAME_SIZE - 1); + strncpy(cmd->cmd_pre, args->pre, ZPIOS_PATH_SIZE - 1); + strncpy(cmd->cmd_post, args->post, ZPIOS_PATH_SIZE - 1); + strncpy(cmd->cmd_log, args->log, ZPIOS_PATH_SIZE - 1); + cmd->cmd_id = id; + cmd->cmd_chunk_size = C; + cmd->cmd_thread_count = T; + cmd->cmd_region_count = N; + cmd->cmd_region_size = S; + cmd->cmd_offset = O; + cmd->cmd_region_noise = args->regionnoise; + cmd->cmd_chunk_noise = args->chunknoise; + cmd->cmd_thread_delay = args->thread_delay; + cmd->cmd_flags = args->flags; + cmd->cmd_data_size = (T + N + 1) * sizeof(zpios_stats_t); + + rc = ioctl(zpiosctl_fd, ZPIOS_CMD, cmd); + if (rc) + args->rc = errno; + + print_stats(args, cmd); + + if (args->verbose) { + rc2 = read(zpiosctl_fd, zpios_buffer, zpios_buffer_size - 1); + if (rc2 < 0) { + fprintf(stdout, "Error reading results: %d\n", rc2); + } else if ((rc2 > 0) && (strlen(zpios_buffer) > 0)) { + fprintf(stdout, "\n%s\n", zpios_buffer); + fflush(stdout); + } + } + + free(cmd); + + return rc; +} + +static int +run_offsets(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next(&args->current_O, &args->O)) { + rc = run_one(args, args->current_id, + args->current_T, args->current_N, args->current_C, + args->current_S, args->current_O); + args->current_id++; + } + + args->O.next_val = 0; + return rc; +} + +static int +run_region_counts(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next((uint64_t *)&args->current_N, &args->N)) + rc = run_offsets(args); + + args->N.next_val = 0; + return rc; +} + +static int +run_region_sizes(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next(&args->current_S, &args->S)) { + if (args->current_S < args->current_C) { + fprintf(stderr, "Error: in any run chunksize can " + "not be smaller than regionsize.\n"); + return EINVAL; + } + + rc = run_region_counts(args); + } + + args->S.next_val = 0; + return rc; +} + +static int +run_chunk_sizes(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next(&args->current_C, &args->C)) { + rc = run_region_sizes(args); + } + + args->C.next_val = 0; + return rc; +} + +static int +run_thread_counts(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next((uint64_t *)&args->current_T, &args->T)) + rc = run_chunk_sizes(args); + + return rc; +} + +int +main(int argc, char **argv) +{ + cmd_args_t *args; + int rc = 0; + + /* Argument init and parsing */ + if ((args = args_init(argc, argv)) == NULL) { + rc = -1; + goto out; + } + + /* Device specific init */ + if ((rc = dev_init())) + goto out; + + /* Generic kernel version string */ + if (args->verbose) + fprintf(stdout, "%s", zpios_version); + + print_stats_header(args); + rc = run_thread_counts(args); +out: + if (args != NULL) + args_fini(args); + + dev_fini(); + return rc; +} diff --git a/cmd/zpios/zpios_util.c b/cmd/zpios/zpios_util.c new file mode 100644 index 000000000..48a0a469f --- /dev/null +++ b/cmd/zpios/zpios_util.c @@ -0,0 +1,454 @@ +/*****************************************************************************\ + * ZPIOS is a heavily modified version of the original PIOS test code. + * It is designed to have the test code running in the Linux kernel + * against ZFS while still being flexibly controled from user space. + * + * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049 + * + * Original PIOS Test Code + * Copyright (C) 2004 Cluster File Systems, Inc. + * Written by Peter Braam + * Atul Vidwansa + * Milind Dumbare + * + * This file is part of ZFS on Linux. + * For details, see . + * + * ZPIOS is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ZPIOS is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with ZPIOS. If not, see . +\*****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "zpios.h" + +/* extracts an unsigned int (64) and K,M,G,T from the string */ +/* and returns a 64 bit value converted to the proper units */ +static int +kmgt_to_uint64(const char *str, uint64_t *val) +{ + char *endptr; + int rc = 0; + + *val = strtoll(str, &endptr, 0); + if ((str == endptr) && (*val == 0)) + return EINVAL; + + switch (endptr[0]) { + case 'k': case 'K': + *val = (*val) << 10; + break; + case 'm': case 'M': + *val = (*val) << 20; + break; + case 'g': case 'G': + *val = (*val) << 30; + break; + case 't': case 'T': + *val = (*val) << 40; + break; + case '\0': + break; + default: + rc = EINVAL; + } + + return rc; +} + +static char * +uint64_to_kmgt(char *str, uint64_t val) +{ + char postfix[] = "kmgt"; + int i = -1; + + while ((val >= KB) && (i < 4)) { + val = (val >> 10); + i++; + } + + if (i >= 4) + (void)snprintf(str, KMGT_SIZE-1, "inf"); + else + (void)snprintf(str, KMGT_SIZE-1, "%lu%c", (unsigned long)val, + (i == -1) ? '\0' : postfix[i]); + + return str; +} + +static char * +kmgt_per_sec(char *str, uint64_t v, double t) +{ + char postfix[] = "kmgt"; + double val = ((double)v) / t; + int i = -1; + + while ((val >= (double)KB) && (i < 4)) { + val /= (double)KB; + i++; + } + + if (i >= 4) + (void)snprintf(str, KMGT_SIZE-1, "inf"); + else + (void)snprintf(str, KMGT_SIZE-1, "%.2f%c", val, + (i == -1) ? '\0' : postfix[i]); + + return str; +} + +static char * +print_flags(char *str, uint32_t flags) +{ + str[0] = (flags & DMU_WRITE) ? 'w' : '-'; + str[1] = (flags & DMU_READ) ? 'r' : '-'; + str[2] = (flags & DMU_VERIFY) ? 'v' : '-'; + str[3] = (flags & DMU_REMOVE) ? 'c' : '-'; + str[4] = (flags & DMU_FPP) ? 'p' : 's'; + str[5] = (flags & (DMU_WRITE_ZC | DMU_READ_ZC)) ? 'z' : '-'; + str[6] = (flags & DMU_WRITE_NOWAIT) ? 'O' : '-'; + str[7] = '\0'; + + return str; +} + +static int +regex_match(const char *string, char *pattern) +{ + regex_t re = { 0 }; + int rc; + + rc = regcomp(&re, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE); + if (rc) { + fprintf(stderr, "Error: Couldn't do regcomp, %d\n", rc); + return rc; + } + + rc = regexec(&re, string, (size_t) 0, NULL, 0); + regfree(&re); + + return rc; +} + +/* fills the pios_range_repeat structure of comma separated values */ +static int +split_string(const char *optarg, char *pattern, range_repeat_t *range) +{ + const char comma[] = ","; + char *cp, *token[32]; + int rc, i = 0; + + if ((rc = regex_match(optarg, pattern))) + return rc; + + cp = strdup(optarg); + if (cp == NULL) + return ENOMEM; + + do { + /* STRTOK(3) Each subsequent call, with a null pointer as the + * value of the * first argument, starts searching from the + * saved pointer and behaves as described above. + */ + token[i] = strtok(cp, comma); + cp = NULL; + } while ((token[i++] != NULL) && (i < 32)); + + range->val_count = i - 1; + + for (i = 0; i < range->val_count; i++) + kmgt_to_uint64(token[i], &range->val[i]); + + free(cp); + return 0; +} + +int +set_count(char *pattern1, char *pattern2, range_repeat_t *range, + char *optarg, uint32_t *flags, char *arg) +{ + if (flags) + *flags |= FLAG_SET; + + range->next_val = 0; + + if (regex_match(optarg, pattern1) == 0) { + kmgt_to_uint64(optarg, &range->val[0]); + range->val_count = 1; + } else if (split_string(optarg, pattern2, range) < 0) { + fprintf(stderr, "Error: Incorrect pattern for %s, '%s'\n", + arg, optarg); + return EINVAL; + } + + return 0; +} + +/* validates the value with regular expression and sets low, high, incr + * according to value at which flag will be set. Sets the flag after. */ +int +set_lhi(char *pattern, range_repeat_t *range, char *optarg, + int flag, uint32_t *flag_thread, char *arg) +{ + int rc; + + if ((rc = regex_match(optarg, pattern))) { + fprintf(stderr, "Error: Wrong pattern in %s, '%s'\n", + arg, optarg); + return rc; + } + + switch (flag) { + case FLAG_LOW: + kmgt_to_uint64(optarg, &range->val_low); + break; + case FLAG_HIGH: + kmgt_to_uint64(optarg, &range->val_high); + break; + case FLAG_INCR: + kmgt_to_uint64(optarg, &range->val_inc_perc); + break; + default: + assert(0); + } + + *flag_thread |= flag; + + return 0; +} + +int +set_noise(uint64_t *noise, char *optarg, char *arg) +{ + if (regex_match(optarg, REGEX_NUMBERS) == 0) { + kmgt_to_uint64(optarg, noise); + } else { + fprintf(stderr, "Error: Incorrect pattern for %s\n", arg); + return EINVAL; + } + + return 0; +} + +int +set_load_params(cmd_args_t *args, char *optarg) +{ + char *param, *search, comma[] = ","; + int rc = 0; + + search = strdup(optarg); + if (search == NULL) + return ENOMEM; + + while ((param = strtok(search, comma)) != NULL) { + search = NULL; + + if (strcmp("fpp", param) == 0) { + args->flags |= DMU_FPP; /* File Per Process/Thread */ + } else if (strcmp("ssf", param) == 0) { + args->flags &= ~DMU_FPP; /* Single Shared File */ + } else if (strcmp("dmuio", param) == 0) { + args->io_type |= DMU_IO; + args->flags |= (DMU_WRITE | DMU_READ); + } else { + fprintf(stderr, "Invalid load: %s\n", param); + rc = EINVAL; + } + } + + free(search); + + return rc; +} + + +/* checks the low, high, increment values against the single value for + * mutual exclusion, for e.g threadcount is mutually exclusive to + * threadcount_low, ..._high, ..._incr */ +int +check_mutual_exclusive_command_lines(uint32_t flag, char *arg) +{ + if ((flag & FLAG_SET) && (flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR))) { + fprintf(stderr, "Error: --%s can not be given with --%s_low, " + "--%s_high or --%s_incr.\n", arg, arg, arg, arg); + return 0; + } + + if ((flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) && !(flag & FLAG_SET)){ + if (flag != (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) { + fprintf(stderr, "Error: One or more values missing " + "from --%s_low, --%s_high, --%s_incr.\n", + arg, arg, arg); + return 0; + } + } + + return 1; +} + +void +print_stats_header(cmd_args_t *args) +{ + if (args->verbose) { + printf("status name id\tth-cnt\trg-cnt\trg-sz\t" + "ch-sz\toffset\trg-no\tch-no\tth-dly\tflags\ttime\t" + "cr-time\trm-time\twr-time\trd-time\twr-data\twr-ch\t" + "wr-bw\trd-data\trd-ch\trd-bw\n"); + printf("------------------------------------------------" + "------------------------------------------------" + "------------------------------------------------" + "----------------------------------------------\n"); + } else { + printf("status name id\t" + "wr-data\twr-ch\twr-bw\t" + "rd-data\trd-ch\trd-bw\n"); + printf("-----------------------------------------" + "--------------------------------------\n"); + } +} + +static void +print_stats_human_readable(cmd_args_t *args, zpios_cmd_t *cmd) +{ + zpios_stats_t *summary_stats; + double t_time, wr_time, rd_time, cr_time, rm_time; + char str[KMGT_SIZE]; + + if (args->rc) + printf("FAIL: %3d ", args->rc); + else + printf("PASS: "); + + printf("%-12s", args->name ? args->name : ZPIOS_NAME); + printf("%2u\t", cmd->cmd_id); + + if (args->verbose) { + printf("%u\t", cmd->cmd_thread_count); + printf("%u\t", cmd->cmd_region_count); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_size)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_size)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_offset)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_noise)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_noise)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_thread_delay)); + printf("%s\t", print_flags(str, cmd->cmd_flags)); + } + + if (args->rc) { + printf("\n"); + return; + } + + summary_stats = (zpios_stats_t *)cmd->cmd_data_str; + t_time = zpios_timespec_to_double(summary_stats->total_time.delta); + wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta); + rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta); + cr_time = zpios_timespec_to_double(summary_stats->cr_time.delta); + rm_time = zpios_timespec_to_double(summary_stats->rm_time.delta); + + if (args->verbose) { + printf("%.2f\t", t_time); + printf("%.3f\t", cr_time); + printf("%.3f\t", rm_time); + printf("%.2f\t", wr_time); + printf("%.2f\t", rd_time); + } + + printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_data)); + printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_chunks)); + printf("%s\t", kmgt_per_sec(str, summary_stats->wr_data, wr_time)); + + printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_data)); + printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_chunks)); + printf("%s\n", kmgt_per_sec(str, summary_stats->rd_data, rd_time)); + fflush(stdout); +} + +static void +print_stats_table(cmd_args_t *args, zpios_cmd_t *cmd) +{ + zpios_stats_t *summary_stats; + double wr_time, rd_time; + + if (args->rc) + printf("FAIL: %3d ", args->rc); + else + printf("PASS: "); + + printf("%-12s", args->name ? args->name : ZPIOS_NAME); + printf("%2u\t", cmd->cmd_id); + + if (args->verbose) { + printf("%u\t", cmd->cmd_thread_count); + printf("%u\t", cmd->cmd_region_count); + printf("%llu\t", (long long unsigned)cmd->cmd_region_size); + printf("%llu\t", (long long unsigned)cmd->cmd_chunk_size); + printf("%llu\t", (long long unsigned)cmd->cmd_offset); + printf("%u\t", cmd->cmd_region_noise); + printf("%u\t", cmd->cmd_chunk_noise); + printf("%u\t", cmd->cmd_thread_delay); + printf("0x%x\t", cmd->cmd_flags); + } + + if (args->rc) { + printf("\n"); + return; + } + + summary_stats = (zpios_stats_t *)cmd->cmd_data_str; + wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta); + rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta); + + if (args->verbose) { + printf("%ld.%02ld\t", + (long)summary_stats->total_time.delta.ts_sec, + (long)summary_stats->total_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->cr_time.delta.ts_sec, + (long)summary_stats->cr_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->rm_time.delta.ts_sec, + (long)summary_stats->rm_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->wr_time.delta.ts_sec, + (long)summary_stats->wr_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->rd_time.delta.ts_sec, + (long)summary_stats->rd_time.delta.ts_nsec); + } + + printf("%lld\t", (long long unsigned)summary_stats->wr_data); + printf("%lld\t", (long long unsigned)summary_stats->wr_chunks); + printf("%.4f\t", (double)summary_stats->wr_data / wr_time); + + printf("%lld\t", (long long unsigned)summary_stats->rd_data); + printf("%lld\t", (long long unsigned)summary_stats->rd_chunks); + printf("%.4f\n", (double)summary_stats->rd_data / rd_time); + fflush(stdout); +} + +void +print_stats(cmd_args_t *args, zpios_cmd_t *cmd) +{ + if (args->human_readable) + print_stats_human_readable(args, cmd); + else + print_stats_table(args, cmd); +} diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index b88003585..fbaffcaa3 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -53,6 +53,8 @@ SCRIPTDIR=${SCRIPTDIR} ETCDIR=\${TOPDIR}/etc DEVDIR=\${TOPDIR}/dev ZPOOLDIR=\${TOPDIR}/scripts/zpool-config +ZPIOSDIR=\${TOPDIR}/scripts/zpios-test +ZPIOSPROFILEDIR=\${TOPDIR}/scripts/zpios-profile ZDB=\${CMDDIR}/zdb/zdb ZFS=\${CMDDIR}/zfs/zfs @@ -60,10 +62,13 @@ ZINJECT=\${CMDDIR}/zinject/zinject ZPOOL=\${CMDDIR}/zpool/zpool ZPOOL_ID=\${CMDDIR}/zpool_id/zpool_id ZTEST=\${CMDDIR}/ztest/ztest +ZPIOS=\${CMDDIR}/zpios/zpios COMMON_SH=\${SCRIPTDIR}/common.sh ZFS_SH=\${SCRIPTDIR}/zfs.sh ZPOOL_CREATE_SH=\${SCRIPTDIR}/zpool-create.sh +ZPIOS_SH=\${SCRIPTDIR}/zpios.sh +ZPIOS_SURVEY_SH=\${SCRIPTDIR}/zpios-survey.sh INTREE=1 LDMOD=/sbin/insmod @@ -85,6 +90,10 @@ ZFS_MODULES=( \\ \${MODDIR}/zfs/zfs.ko \\ ) +ZPIOS_MODULES=( \\ + \${MODDIR}/zpios/zpios.ko \\ +) + MODULES=( \\ \${KERNEL_MODULES[[*]]} \\ \${SPL_MODULES[[*]]} \\ diff --git a/configure.ac b/configure.ac index 0b7ea81f7..3963d231c 100644 --- a/configure.ac +++ b/configure.ac @@ -81,12 +81,14 @@ AC_CONFIG_FILES([ cmd/zpool_id/Makefile cmd/zpool_layout/Makefile cmd/ztest/Makefile + cmd/zpios/Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile module/unicode/Makefile module/zcommon/Makefile module/zfs/Makefile + module/zpios/Makefile scripts/Makefile scripts/common.sh zfs.spec diff --git a/module/Makefile.in b/module/Makefile.in index 489119ab3..f59f04479 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -3,6 +3,7 @@ subdir-m += nvpair subdir-m += unicode subdir-m += zcommon subdir-m += zfs +subdir-m += zpios modules: # Make the exported SPL symbols available to these modules. diff --git a/module/zpios/Makefile.in b/module/zpios/Makefile.in new file mode 100644 index 000000000..4924082a1 --- /dev/null +++ b/module/zpios/Makefile.in @@ -0,0 +1,11 @@ +MODULE := zpios + +EXTRA_CFLAGS = -I@MODDIR@/zfs/include +EXTRA_CFLAGS += -I@MODDIR@/zcommon/include +EXTRA_CFLAGS += -I@MODDIR@/avl/include +EXTRA_CFLAGS += -I@MODDIR@/nvpair/include +EXTRA_CFLAGS += -I@MODDIR@/unicode/include +EXTRA_CFLAGS += -I@MODDIR@/zpios/include +EXTRA_CFLAGS += @KERNELCPPFLAGS@ + +obj-m := ${MODULE}.o diff --git a/module/zpios/include/zpios-ctl.h b/module/zpios/include/zpios-ctl.h new file mode 100644 index 000000000..234e96c11 --- /dev/null +++ b/module/zpios/include/zpios-ctl.h @@ -0,0 +1,198 @@ +/*****************************************************************************\ + * ZPIOS is a heavily modified version of the original PIOS test code. + * It is designed to have the test code running in the Linux kernel + * against ZFS while still being flexibly controled from user space. + * + * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049 + * + * Original PIOS Test Code + * Copyright (C) 2004 Cluster File Systems, Inc. + * Written by Peter Braam + * Atul Vidwansa + * Milind Dumbare + * + * This file is part of ZFS on Linux. + * For details, see . + * + * ZPIOS is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ZPIOS is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with ZPIOS. If not, see . +\*****************************************************************************/ + +#ifndef _ZPIOS_CTL_H +#define _ZPIOS_CTL_H + +/* Contains shared definitions which both the userspace + * and kernelspace portions of zpios must agree on. + */ +#ifndef _KERNEL +#include +#endif + +#define ZPIOS_MAJOR 232 /* XXX - Arbitrary */ +#define ZPIOS_MINORS 1 +#define ZPIOS_NAME "zpios" +#define ZPIOS_DEV "/dev/zpios" + +#define DMU_IO 0x01 + +#define DMU_WRITE 0x0001 +#define DMU_READ 0x0002 +#define DMU_VERIFY 0x0004 +#define DMU_REMOVE 0x0008 +#define DMU_FPP 0x0010 +#define DMU_WRITE_ZC 0x0020 /* Incompatible w/DMU_VERIFY */ +#define DMU_READ_ZC 0x0040 /* Incompatible w/DMU_VERIFY */ +#define DMU_WRITE_NOWAIT 0x0080 +#define DMU_READ_NOPF 0x0100 + +#define ZPIOS_NAME_SIZE 16 +#define ZPIOS_PATH_SIZE 128 + +#define PHASE_PRE_RUN "pre-run" +#define PHASE_PRE_CREATE "pre-create" +#define PHASE_PRE_WRITE "pre-write" +#define PHASE_PRE_READ "pre-read" +#define PHASE_PRE_REMOVE "pre-remove" +#define PHASE_POST_RUN "post-run" +#define PHASE_POST_CREATE "post-create" +#define PHASE_POST_WRITE "post-write" +#define PHASE_POST_READ "post-read" +#define PHASE_POST_REMOVE "post-remove" + +#define ZPIOS_CFG_MAGIC 0x87237190U +typedef struct zpios_cfg { + uint32_t cfg_magic; /* Unique magic */ + int32_t cfg_cmd; /* Config command */ + int32_t cfg_arg1; /* Config command arg 1 */ + int32_t cfg_rc1; /* Config response 1 */ +} zpios_cfg_t; + +typedef struct zpios_timespec { + uint32_t ts_sec; + uint32_t ts_nsec; +} zpios_timespec_t; + +typedef struct zpios_time { + zpios_timespec_t start; + zpios_timespec_t stop; + zpios_timespec_t delta; +} zpios_time_t; + +typedef struct zpios_stats { + zpios_time_t total_time; + zpios_time_t cr_time; + zpios_time_t rm_time; + zpios_time_t wr_time; + zpios_time_t rd_time; + uint64_t wr_data; + uint64_t wr_chunks; + uint64_t rd_data; + uint64_t rd_chunks; +} zpios_stats_t; + +#define ZPIOS_CMD_MAGIC 0x49715385U +typedef struct zpios_cmd { + uint32_t cmd_magic; /* Unique magic */ + uint32_t cmd_id; /* Run ID */ + char cmd_pool[ZPIOS_NAME_SIZE]; /* Pool name */ + uint64_t cmd_chunk_size; /* Chunk size */ + uint32_t cmd_thread_count; /* Thread count */ + uint32_t cmd_region_count; /* Region count */ + uint64_t cmd_region_size; /* Region size */ + uint64_t cmd_offset; /* Region offset */ + uint32_t cmd_region_noise; /* Region noise */ + uint32_t cmd_chunk_noise; /* Chunk noise */ + uint32_t cmd_thread_delay; /* Thread delay */ + uint32_t cmd_flags; /* Test flags */ + char cmd_pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */ + char cmd_post[ZPIOS_PATH_SIZE]; /* Post-exec hook */ + char cmd_log[ZPIOS_PATH_SIZE]; /* Requested log dir */ + uint64_t cmd_data_size; /* Opaque data size */ + char cmd_data_str[0]; /* Opaque data region */ +} zpios_cmd_t; + +/* Valid ioctls */ +#define ZPIOS_CFG _IOWR('f', 101, zpios_cfg_t) +#define ZPIOS_CMD _IOWR('f', 102, zpios_cmd_t) + +/* Valid configuration commands */ +#define ZPIOS_CFG_BUFFER_CLEAR 0x001 /* Clear text buffer */ +#define ZPIOS_CFG_BUFFER_SIZE 0x002 /* Resize text buffer */ + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000L +#endif + +static inline +void zpios_timespec_normalize(zpios_timespec_t *ts, uint32_t sec, uint32_t nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + sec++; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + sec--; + } + ts->ts_sec = sec; + ts->ts_nsec = nsec; +} + +static inline +zpios_timespec_t zpios_timespec_add(zpios_timespec_t lhs, zpios_timespec_t rhs) +{ + zpios_timespec_t ts_delta; + zpios_timespec_normalize(&ts_delta, lhs.ts_sec + rhs.ts_sec, + lhs.ts_nsec + rhs.ts_nsec); + return ts_delta; +} + +static inline +zpios_timespec_t zpios_timespec_sub(zpios_timespec_t lhs, zpios_timespec_t rhs) +{ + zpios_timespec_t ts_delta; + zpios_timespec_normalize(&ts_delta, lhs.ts_sec - rhs.ts_sec, + lhs.ts_nsec - rhs.ts_nsec); + return ts_delta; +} + +#ifdef _KERNEL + +static inline +zpios_timespec_t zpios_timespec_now(void) +{ + zpios_timespec_t zts_now; + struct timespec ts_now; + + ts_now = current_kernel_time(); + zts_now.ts_sec = ts_now.tv_sec; + zts_now.ts_nsec = ts_now.tv_nsec; + + return zts_now; +} + +#else + +static inline +double zpios_timespec_to_double(zpios_timespec_t ts) +{ + return ((double)(ts.ts_sec) + + ((double)(ts.ts_nsec) / (double)(NSEC_PER_SEC))); +} + +#endif /* _KERNEL */ + +#endif /* _ZPIOS_CTL_H */ diff --git a/module/zpios/include/zpios-internal.h b/module/zpios/include/zpios-internal.h new file mode 100644 index 000000000..c9b6e0092 --- /dev/null +++ b/module/zpios/include/zpios-internal.h @@ -0,0 +1,138 @@ +/*****************************************************************************\ + * ZPIOS is a heavily modified version of the original PIOS test code. + * It is designed to have the test code running in the Linux kernel + * against ZFS while still being flexibly controled from user space. + * + * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049 + * + * Original PIOS Test Code + * Copyright (C) 2004 Cluster File Systems, Inc. + * Written by Peter Braam + * Atul Vidwansa + * Milind Dumbare + * + * This file is part of ZFS on Linux. + * For details, see . + * + * ZPIOS is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ZPIOS is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with ZPIOS. If not, see . +\*****************************************************************************/ + +#ifndef _ZPIOS_INTERNAL_H +#define _ZPIOS_INTERNAL_H + +#include "zpios-ctl.h" + +#define OBJ_SIZE 64 + +struct run_args; + +typedef struct dmu_obj { + objset_t *os; + uint64_t obj; +} dmu_obj_t; + +/* thread doing the IO data */ +typedef struct thread_data { + struct run_args *run_args; + int thread_no; + int rc; + zpios_stats_t stats; + kmutex_t lock; +} thread_data_t; + +/* region for IO data */ +typedef struct zpios_region { + __u64 wr_offset; + __u64 rd_offset; + __u64 init_offset; + __u64 max_offset; + dmu_obj_t obj; + zpios_stats_t stats; + kmutex_t lock; +} zpios_region_t; + +/* arguments for one run */ +typedef struct run_args { + /* Config args */ + int id; + char pool[ZPIOS_NAME_SIZE]; + __u64 chunk_size; + __u32 thread_count; + __u32 region_count; + __u64 region_size; + __u64 offset; + __u32 region_noise; + __u32 chunk_noise; + __u32 thread_delay; + __u32 flags; + char pre[ZPIOS_PATH_SIZE]; + char post[ZPIOS_PATH_SIZE]; + char log[ZPIOS_PATH_SIZE]; + + /* Control data */ + objset_t *os; + wait_queue_head_t waitq; + volatile uint64_t threads_done; + kmutex_t lock_work; + kmutex_t lock_ctl; + __u32 region_next; + + /* Results data */ + struct file *file; + zpios_stats_t stats; + + thread_data_t **threads; + zpios_region_t regions[0]; /* Must be last element */ +} run_args_t; + +#define ZPIOS_INFO_BUFFER_SIZE 65536 +#define ZPIOS_INFO_BUFFER_REDZONE 1024 + +typedef struct zpios_info { + spinlock_t info_lock; + int info_size; + char *info_buffer; + char *info_head; /* Internal kernel use only */ +} zpios_info_t; + +#define zpios_print(file, format, args...) \ +({ zpios_info_t *_info_ = (zpios_info_t *)file->private_data; \ + int _rc_; \ + \ + ASSERT(_info_); \ + ASSERT(_info_->info_buffer); \ + \ + spin_lock(&_info_->info_lock); \ + \ + /* Don't allow the kernel to start a write in the red zone */ \ + if ((int)(_info_->info_head - _info_->info_buffer) > \ + (_info_->info_size - ZPIOS_INFO_BUFFER_REDZONE)) { \ + _rc_ = -EOVERFLOW; \ + } else { \ + _rc_ = sprintf(_info_->info_head, format, args); \ + if (_rc_ >= 0) \ + _info_->info_head += _rc_; \ + } \ + \ + spin_unlock(&_info_->info_lock); \ + _rc_; \ +}) + +#define zpios_vprint(file, test, format, args...) \ + zpios_print(file, "%*s: " format, ZPIOS_NAME_SIZE, test, args) + +#endif /* _ZPIOS_INTERNAL_H */ diff --git a/module/zpios/zpios.c b/module/zpios/zpios.c new file mode 100644 index 000000000..3edc16105 --- /dev/null +++ b/module/zpios/zpios.c @@ -0,0 +1,1331 @@ +/*****************************************************************************\ + * ZPIOS is a heavily modified version of the original PIOS test code. + * It is designed to have the test code running in the Linux kernel + * against ZFS while still being flexibly controled from user space. + * + * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049 + * + * Original PIOS Test Code + * Copyright (C) 2004 Cluster File Systems, Inc. + * Written by Peter Braam + * Atul Vidwansa + * Milind Dumbare + * + * This file is part of ZFS on Linux. + * For details, see . + * + * ZPIOS is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ZPIOS is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with ZPIOS. If not, see . +\*****************************************************************************/ + +#include +#include +#include +#include +#include "zpios-internal.h" + + +static spl_class *zpios_class; +static spl_device *zpios_device; +static char *zpios_tag = "zpios_tag"; + +static +int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc) +{ + /* This is stack heavy but it should be OK since we are only + * making the upcall between tests when the stack is shallow. + */ + char id[16], chunk_size[16], region_size[16], thread_count[16]; + char region_count[16], offset[16], region_noise[16], chunk_noise[16]; + char thread_delay[16], flags[16], result[8]; + char *argv[16], *envp[4]; + + if ((path == NULL) || (strlen(path) == 0)) + return -ENOENT; + + snprintf(id, 15, "%d", run_args->id); + snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size); + snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size); + snprintf(thread_count, 15, "%u", run_args->thread_count); + snprintf(region_count, 15, "%u", run_args->region_count); + snprintf(offset, 15, "%lu", (long unsigned)run_args->offset); + snprintf(region_noise, 15, "%u", run_args->region_noise); + snprintf(chunk_noise, 15, "%u", run_args->chunk_noise); + snprintf(thread_delay, 15, "%u", run_args->thread_delay); + snprintf(flags, 15, "0x%x", run_args->flags); + snprintf(result, 7, "%d", rc); + + /* Passing 15 args to registered pre/post upcall */ + argv[0] = path; + argv[1] = phase; + argv[2] = strlen(run_args->log) ? run_args->log : ""; + argv[3] = id; + argv[4] = run_args->pool; + argv[5] = chunk_size; + argv[6] = region_size; + argv[7] = thread_count; + argv[8] = region_count; + argv[9] = offset; + argv[10] = region_noise; + argv[11] = chunk_noise; + argv[12] = thread_delay; + argv[13] = flags; + argv[14] = result; + argv[15] = NULL; + + /* Passing environment for user space upcall */ + envp[0] = "HOME=/"; + envp[1] = "TERM=linux"; + envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin"; + envp[3] = NULL; + + return call_usermodehelper(path, argv, envp, 1); +} + +static uint64_t +zpios_dmu_object_create(run_args_t *run_args, objset_t *os) +{ + struct dmu_tx *tx; + uint64_t obj = 0ULL; + int rc; + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); + rc = dmu_tx_assign(tx, TXG_WAIT); + if (rc) { + zpios_print(run_args->file, + "dmu_tx_assign() failed: %d\n", rc); + dmu_tx_abort(tx); + return obj; + } + + obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx); + if (rc) { + zpios_print(run_args->file, + "dmu_object_set_blocksize() failed: %d\n", rc); + dmu_tx_abort(tx); + return obj; + } + + dmu_tx_commit(tx); + + return obj; +} + +static int +zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj) +{ + struct dmu_tx *tx; + int rc; + + tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); + rc = dmu_tx_assign(tx, TXG_WAIT); + if (rc) { + zpios_print(run_args->file, + "dmu_tx_assign() failed: %d\n", rc); + dmu_tx_abort(tx); + return rc; + } + + rc = dmu_object_free(os, obj, tx); + if (rc) { + zpios_print(run_args->file, + "dmu_object_free() failed: %d\n", rc); + dmu_tx_abort(tx); + return rc; + } + + dmu_tx_commit(tx); + + return 0; +} + +static int +zpios_dmu_setup(run_args_t *run_args) +{ + zpios_time_t *t = &(run_args->stats.cr_time); + objset_t *os; + char name[32]; + uint64_t obj = 0ULL; + int i, rc = 0, rc2; + + (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0); + t->start = zpios_timespec_now(); + + (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); + rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL); + if (rc) { + zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) " + "failed: %d\n", name, rc); + goto out; + } + + rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os); + if (rc) { + zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) " + "failed: %d\n", name, rc); + goto out_destroy; + } + + if (!(run_args->flags & DMU_FPP)) { + obj = zpios_dmu_object_create(run_args, os); + if (obj == 0) { + rc = -EBADF; + zpios_print(run_args->file, "Error zpios_dmu_" + "object_create() failed, %d\n", rc); + goto out_destroy; + } + } + + for (i = 0; i < run_args->region_count; i++) { + zpios_region_t *region; + + region = &run_args->regions[i]; + mutex_init(®ion->lock, NULL, MUTEX_DEFAULT, NULL); + + if (run_args->flags & DMU_FPP) { + /* File per process */ + region->obj.os = os; + region->obj.obj = zpios_dmu_object_create(run_args, os); + ASSERT(region->obj.obj > 0); /* XXX - Handle this */ + region->wr_offset = run_args->offset; + region->rd_offset = run_args->offset; + region->init_offset = run_args->offset; + region->max_offset = run_args->offset + + run_args->region_size; + } else { + /* Single shared file */ + region->obj.os = os; + region->obj.obj = obj; + region->wr_offset = run_args->offset * i; + region->rd_offset = run_args->offset * i; + region->init_offset = run_args->offset * i; + region->max_offset = run_args->offset * + i + run_args->region_size; + } + } + + run_args->os = os; +out_destroy: + if (rc) { + rc2 = dmu_objset_destroy(name, B_FALSE); + if (rc2) + zpios_print(run_args->file, "Error dmu_objset_destroy" + "(%s, ...) failed: %d\n", name, rc2); + } +out: + t->stop = zpios_timespec_now(); + t->delta = zpios_timespec_sub(t->stop, t->start); + (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc); + + return rc; +} + +static int +zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file) +{ + run_args_t *ra; + int rc, size; + + size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t); + + ra = vmem_zalloc(size, KM_SLEEP); + if (ra == NULL) { + zpios_print(file, "Unable to vmem_zalloc() %d bytes " + "for regions\n", size); + return -ENOMEM; + } + + *run_args = ra; + strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1); + strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1); + strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1); + strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1); + ra->id = kcmd->cmd_id; + ra->chunk_size = kcmd->cmd_chunk_size; + ra->thread_count = kcmd->cmd_thread_count; + ra->region_count = kcmd->cmd_region_count; + ra->region_size = kcmd->cmd_region_size; + ra->offset = kcmd->cmd_offset; + ra->region_noise = kcmd->cmd_region_noise; + ra->chunk_noise = kcmd->cmd_chunk_noise; + ra->thread_delay = kcmd->cmd_thread_delay; + ra->flags = kcmd->cmd_flags; + ra->stats.wr_data = 0; + ra->stats.wr_chunks = 0; + ra->stats.rd_data = 0; + ra->stats.rd_chunks = 0; + ra->region_next = 0; + ra->file = file; + mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL); + + (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0); + + rc = zpios_dmu_setup(ra); + if (rc) { + mutex_destroy(&ra->lock_ctl); + mutex_destroy(&ra->lock_work); + vmem_free(ra, size); + *run_args = NULL; + } + + return rc; +} + +static int +zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset, + __u32 *chunk_size, zpios_region_t **region, __u32 flags) +{ + int i, j, count = 0; + unsigned int random_int; + + get_random_bytes(&random_int, sizeof(unsigned int)); + + mutex_enter(&run_args->lock_work); + i = run_args->region_next; + + /* XXX: I don't much care for this chunk selection mechansim + * there's the potential to burn a lot of time here doing nothing + * useful while holding the global lock. This could give some + * misleading performance results. I'll fix it latter. + */ + while (count < run_args->region_count) { + __u64 *rw_offset; + zpios_time_t *rw_time; + + j = i % run_args->region_count; + *region = &(run_args->regions[j]); + + if (flags & DMU_WRITE) { + rw_offset = &((*region)->wr_offset); + rw_time = &((*region)->stats.wr_time); + } else { + rw_offset = &((*region)->rd_offset); + rw_time = &((*region)->stats.rd_time); + } + + /* test if region is fully written */ + if (*rw_offset + *chunk_size > (*region)->max_offset) { + i++; + count++; + + if (unlikely(rw_time->stop.ts_sec == 0) && + unlikely(rw_time->stop.ts_nsec == 0)) + rw_time->stop = zpios_timespec_now(); + + continue; + } + + *offset = *rw_offset; + *obj = (*region)->obj; + *rw_offset += *chunk_size; + + /* update ctl structure */ + if (run_args->region_noise) { + get_random_bytes(&random_int, sizeof(unsigned int)); + run_args->region_next += random_int % run_args->region_noise; + } else { + run_args->region_next++; + } + + mutex_exit(&run_args->lock_work); + return 1; + } + + /* nothing left to do */ + mutex_exit(&run_args->lock_work); + + return 0; +} + +static void +zpios_remove_objset(run_args_t *run_args) +{ + zpios_time_t *t = &(run_args->stats.rm_time); + zpios_region_t *region; + char name[32]; + int rc = 0, i; + + (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0); + t->start = zpios_timespec_now(); + + (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); + + if (run_args->flags & DMU_REMOVE) { + if (run_args->flags & DMU_FPP) { + for (i = 0; i < run_args->region_count; i++) { + region = &run_args->regions[i]; + rc = zpios_dmu_object_free(run_args, + region->obj.os, + region->obj.obj); + if (rc) + zpios_print(run_args->file, "Error " + "removing object %d, %d\n", + (int)region->obj.obj, rc); + } + } else { + region = &run_args->regions[0]; + rc = zpios_dmu_object_free(run_args, + region->obj.os, + region->obj.obj); + if (rc) + zpios_print(run_args->file, "Error " + "removing object %d, %d\n", + (int)region->obj.obj, rc); + } + } + + dmu_objset_disown(run_args->os, zpios_tag); + + if (run_args->flags & DMU_REMOVE) { + rc = dmu_objset_destroy(name, B_FALSE); + if (rc) + zpios_print(run_args->file, "Error dmu_objset_destroy" + "(%s, ...) failed: %d\n", name, rc); + } + + t->stop = zpios_timespec_now(); + t->delta = zpios_timespec_sub(t->stop, t->start); + (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc); +} + +static void +zpios_cleanup_run(run_args_t *run_args) +{ + int i, size = 0; + + if (run_args == NULL) + return; + + if (run_args->threads != NULL) { + for (i = 0; i < run_args->thread_count; i++) { + if (run_args->threads[i]) { + mutex_destroy(&run_args->threads[i]->lock); + kmem_free(run_args->threads[i], + sizeof(thread_data_t)); + } + } + + kmem_free(run_args->threads, + sizeof(thread_data_t *) * run_args->thread_count); + } + + for (i = 0; i < run_args->region_count; i++) + mutex_destroy(&run_args->regions[i].lock); + + mutex_destroy(&run_args->lock_work); + mutex_destroy(&run_args->lock_ctl); + size = run_args->region_count * sizeof(zpios_region_t); + + vmem_free(run_args, sizeof(*run_args) + size); +} + +static int +zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object, + uint64_t offset, uint64_t size, const void *buf) +{ + struct dmu_tx *tx; + int rc, how = TXG_WAIT; +// int flags = 0; + + if (run_args->flags & DMU_WRITE_NOWAIT) + how = TXG_NOWAIT; + + while (1) { + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, object, offset, size); + rc = dmu_tx_assign(tx, how); + + if (rc) { + if (rc == ERESTART && how == TXG_NOWAIT) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + continue; + } + zpios_print(run_args->file, + "Error in dmu_tx_assign(), %d", rc); + dmu_tx_abort(tx); + return rc; + } + break; + } + +// if (run_args->flags & DMU_WRITE_ZC) +// flags |= DMU_WRITE_ZEROCOPY; + + dmu_write(os, object, offset, size, buf, tx); + dmu_tx_commit(tx); + + return 0; +} + +static int +zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object, + uint64_t offset, uint64_t size, void *buf) +{ + int flags = 0; + +// if (run_args->flags & DMU_READ_ZC) +// flags |= DMU_READ_ZEROCOPY; + + if (run_args->flags & DMU_READ_NOPF) + flags |= DMU_READ_NO_PREFETCH; + + return dmu_read(os, object, offset, size, buf, flags); +} + +static int +zpios_thread_main(void *data) +{ + thread_data_t *thr = (thread_data_t *)data; + run_args_t *run_args = thr->run_args; + zpios_time_t t; + dmu_obj_t obj; + __u64 offset; + __u32 chunk_size; + zpios_region_t *region; + char *buf; + unsigned int random_int; + int chunk_noise = run_args->chunk_noise; + int chunk_noise_tmp = 0; + int thread_delay = run_args->thread_delay; + int thread_delay_tmp = 0; + int i, rc = 0; + + if (chunk_noise) { + get_random_bytes(&random_int, sizeof(unsigned int)); + chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise; + } + + /* It's OK to vmem_alloc() this memory because it will be copied + * in to the slab and pointers to the slab copy will be setup in + * the bio when the IO is submitted. This of course is not ideal + * since we want a zero-copy IO path if possible. It would be nice + * to have direct access to those slab entries. + */ + chunk_size = run_args->chunk_size + chunk_noise_tmp; + buf = (char *)vmem_alloc(chunk_size, KM_SLEEP); + ASSERT(buf); + + /* Trivial data verification pattern for now. */ + if (run_args->flags & DMU_VERIFY) + memset(buf, 'z', chunk_size); + + /* Write phase */ + mutex_enter(&thr->lock); + thr->stats.wr_time.start = zpios_timespec_now(); + mutex_exit(&thr->lock); + + while (zpios_get_work_item(run_args, &obj, &offset, + &chunk_size, ®ion, DMU_WRITE)) { + if (thread_delay) { + get_random_bytes(&random_int, sizeof(unsigned int)); + thread_delay_tmp = random_int % thread_delay; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(thread_delay_tmp); /* In jiffies */ + } + + t.start = zpios_timespec_now(); + rc = zpios_dmu_write(run_args, obj.os, obj.obj, + offset, chunk_size, buf); + t.stop = zpios_timespec_now(); + t.delta = zpios_timespec_sub(t.stop, t.start); + + if (rc) { + zpios_print(run_args->file, "IO error while doing " + "dmu_write(): %d\n", rc); + break; + } + + mutex_enter(&thr->lock); + thr->stats.wr_data += chunk_size; + thr->stats.wr_chunks++; + thr->stats.wr_time.delta = zpios_timespec_add( + thr->stats.wr_time.delta, t.delta); + mutex_exit(&thr->lock); + + mutex_enter(®ion->lock); + region->stats.wr_data += chunk_size; + region->stats.wr_chunks++; + region->stats.wr_time.delta = zpios_timespec_add( + region->stats.wr_time.delta, t.delta); + + /* First time region was accessed */ + if (region->init_offset == offset) + region->stats.wr_time.start = t.start; + + mutex_exit(®ion->lock); + } + + mutex_enter(&run_args->lock_ctl); + run_args->threads_done++; + mutex_exit(&run_args->lock_ctl); + + mutex_enter(&thr->lock); + thr->rc = rc; + thr->stats.wr_time.stop = zpios_timespec_now(); + mutex_exit(&thr->lock); + wake_up(&run_args->waitq); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + + /* Check if we should exit */ + mutex_enter(&thr->lock); + rc = thr->rc; + mutex_exit(&thr->lock); + if (rc) + goto out; + + /* Read phase */ + mutex_enter(&thr->lock); + thr->stats.rd_time.start = zpios_timespec_now(); + mutex_exit(&thr->lock); + + while (zpios_get_work_item(run_args, &obj, &offset, + &chunk_size, ®ion, DMU_READ)) { + if (thread_delay) { + get_random_bytes(&random_int, sizeof(unsigned int)); + thread_delay_tmp = random_int % thread_delay; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(thread_delay_tmp); /* In jiffies */ + } + + if (run_args->flags & DMU_VERIFY) + memset(buf, 0, chunk_size); + + t.start = zpios_timespec_now(); + rc = zpios_dmu_read(run_args, obj.os, obj.obj, + offset, chunk_size, buf); + t.stop = zpios_timespec_now(); + t.delta = zpios_timespec_sub(t.stop, t.start); + + if (rc) { + zpios_print(run_args->file, "IO error while doing " + "dmu_read(): %d\n", rc); + break; + } + + /* Trivial data verification, expensive! */ + if (run_args->flags & DMU_VERIFY) { + for (i = 0; i < chunk_size; i++) { + if (buf[i] != 'z') { + zpios_print(run_args->file, + "IO verify error: %d/%d/%d\n", + (int)obj.obj, (int)offset, + (int)chunk_size); + break; + } + } + } + + mutex_enter(&thr->lock); + thr->stats.rd_data += chunk_size; + thr->stats.rd_chunks++; + thr->stats.rd_time.delta = zpios_timespec_add( + thr->stats.rd_time.delta, t.delta); + mutex_exit(&thr->lock); + + mutex_enter(®ion->lock); + region->stats.rd_data += chunk_size; + region->stats.rd_chunks++; + region->stats.rd_time.delta = zpios_timespec_add( + region->stats.rd_time.delta, t.delta); + + /* First time region was accessed */ + if (region->init_offset == offset) + region->stats.rd_time.start = t.start; + + mutex_exit(®ion->lock); + } + + mutex_enter(&run_args->lock_ctl); + run_args->threads_done++; + mutex_exit(&run_args->lock_ctl); + + mutex_enter(&thr->lock); + thr->rc = rc; + thr->stats.rd_time.stop = zpios_timespec_now(); + mutex_exit(&thr->lock); + wake_up(&run_args->waitq); + +out: + vmem_free(buf, chunk_size); + do_exit(0); + + return rc; /* Unreachable, due to do_exit() */ +} + +static int +zpios_thread_done(run_args_t *run_args) +{ + ASSERT(run_args->threads_done <= run_args->thread_count); + return (run_args->threads_done == run_args->thread_count); +} + +static int +zpios_threads_run(run_args_t *run_args) +{ + struct task_struct *tsk, **tsks; + thread_data_t *thr = NULL; + zpios_time_t *tt = &(run_args->stats.total_time); + zpios_time_t *tw = &(run_args->stats.wr_time); + zpios_time_t *tr = &(run_args->stats.rd_time); + int i, rc = 0, tc = run_args->thread_count; + + tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP); + if (tsks == NULL) { + rc = -ENOMEM; + goto cleanup2; + } + + run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP); + if (run_args->threads == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + init_waitqueue_head(&run_args->waitq); + run_args->threads_done = 0; + + /* Create all the needed threads which will sleep until awoken */ + for (i = 0; i < tc; i++) { + thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP); + if (thr == NULL) { + rc = -ENOMEM; + goto taskerr; + } + + thr->thread_no = i; + thr->run_args = run_args; + thr->rc = 0; + mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL); + run_args->threads[i] = thr; + + tsk = kthread_create(zpios_thread_main, (void *)thr, + "%s/%d", "zpios_io", i); + if (IS_ERR(tsk)) { + rc = -EINVAL; + goto taskerr; + } + + tsks[i] = tsk; + } + + tt->start = zpios_timespec_now(); + + /* Wake up all threads for write phase */ + (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0); + for (i = 0; i < tc; i++) + wake_up_process(tsks[i]); + + /* Wait for write phase to complete */ + tw->start = zpios_timespec_now(); + wait_event(run_args->waitq, zpios_thread_done(run_args)); + tw->stop = zpios_timespec_now(); + (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc); + + for (i = 0; i < tc; i++) { + thr = run_args->threads[i]; + + mutex_enter(&thr->lock); + + if (!rc && thr->rc) + rc = thr->rc; + + run_args->stats.wr_data += thr->stats.wr_data; + run_args->stats.wr_chunks += thr->stats.wr_chunks; + mutex_exit(&thr->lock); + } + + if (rc) { + /* Wake up all threads and tell them to exit */ + for (i = 0; i < tc; i++) { + mutex_enter(&thr->lock); + thr->rc = rc; + mutex_exit(&thr->lock); + + wake_up_process(tsks[i]); + } + goto out; + } + + mutex_enter(&run_args->lock_ctl); + ASSERT(run_args->threads_done == run_args->thread_count); + run_args->threads_done = 0; + mutex_exit(&run_args->lock_ctl); + + /* Wake up all threads for read phase */ + (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0); + for (i = 0; i < tc; i++) + wake_up_process(tsks[i]); + + /* Wait for read phase to complete */ + tr->start = zpios_timespec_now(); + wait_event(run_args->waitq, zpios_thread_done(run_args)); + tr->stop = zpios_timespec_now(); + (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc); + + for (i = 0; i < tc; i++) { + thr = run_args->threads[i]; + + mutex_enter(&thr->lock); + + if (!rc && thr->rc) + rc = thr->rc; + + run_args->stats.rd_data += thr->stats.rd_data; + run_args->stats.rd_chunks += thr->stats.rd_chunks; + mutex_exit(&thr->lock); + } +out: + tt->stop = zpios_timespec_now(); + tt->delta = zpios_timespec_sub(tt->stop, tt->start); + tw->delta = zpios_timespec_sub(tw->stop, tw->start); + tr->delta = zpios_timespec_sub(tr->stop, tr->start); + +cleanup: + kmem_free(tsks, sizeof(struct task_struct *) * tc); +cleanup2: + /* Returns first encountered thread error (if any) */ + return rc; + +taskerr: + /* Destroy all threads that were created successfully */ + for (i = 0; i < tc; i++) + if (tsks[i] != NULL) + (void) kthread_stop(tsks[i]); + + goto cleanup; +} + +static int +zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd, + int data_size, void *data) +{ + run_args_t *run_args = { 0 }; + zpios_stats_t *stats = (zpios_stats_t *)data; + int i, n, m, size, rc; + + if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) || + (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) { + zpios_print(file, "Invalid chunk_size, region_size, " + "thread_count, or region_count, %d\n", -EINVAL); + return -EINVAL; + } + + if (!(kcmd->cmd_flags & DMU_WRITE) || + !(kcmd->cmd_flags & DMU_READ)) { + zpios_print(file, "Invalid flags, minimally DMU_WRITE " + "and DMU_READ must be set, %d\n", -EINVAL); + return -EINVAL; + } + + if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) && + (kcmd->cmd_flags & DMU_VERIFY)) { + zpios_print(file, "Invalid flags, DMU_*_ZC incompatible " + "with DMU_VERIFY, used for performance analysis " + "only, %d\n", -EINVAL); + return -EINVAL; + } + + /* Opaque data on return contains structs of the following form: + * + * zpios_stat_t stats[]; + * stats[0] = run_args->stats; + * stats[1-N] = threads[N]->stats; + * stats[N+1-M] = regions[M]->stats; + * + * Where N is the number of threads, and M is the number of regions. + */ + size = (sizeof(zpios_stats_t) + + (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) + + (kcmd->cmd_region_count * sizeof(zpios_stats_t))); + if (data_size < size) { + zpios_print(file, "Invalid size, command data buffer " + "size too small, (%d < %d)\n", data_size, size); + return -ENOSPC; + } + + rc = zpios_setup_run(&run_args, kcmd, file); + if (rc) + return rc; + + rc = zpios_threads_run(run_args); + zpios_remove_objset(run_args); + if (rc) + goto cleanup; + + if (stats) { + n = 1; + m = 1 + kcmd->cmd_thread_count; + stats[0] = run_args->stats; + + for (i = 0; i < kcmd->cmd_thread_count; i++) + stats[n+i] = run_args->threads[i]->stats; + + for (i = 0; i < kcmd->cmd_region_count; i++) + stats[m+i] = run_args->regions[i].stats; + } + +cleanup: + zpios_cleanup_run(run_args); + + (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0); + + return rc; +} + +static int +zpios_open(struct inode *inode, struct file *file) +{ + unsigned int minor = iminor(inode); + zpios_info_t *info; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP); + if (info == NULL) + return -ENOMEM; + + spin_lock_init(&info->info_lock); + info->info_size = ZPIOS_INFO_BUFFER_SIZE; + info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP); + if (info->info_buffer == NULL) { + kmem_free(info, sizeof(*info)); + return -ENOMEM; + } + + info->info_head = info->info_buffer; + file->private_data = (void *)info; + + return 0; +} + +static int +zpios_release(struct inode *inode, struct file *file) +{ + unsigned int minor = iminor(inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE); + kmem_free(info, sizeof(*info)); + + return 0; +} + +static int +zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg) +{ + zpios_info_t *info = (zpios_info_t *)file->private_data; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + memset(info->info_buffer, 0, info->info_size); + info->info_head = info->info_buffer; + spin_unlock(&info->info_lock); + + return 0; +} + +static int +zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg) +{ + zpios_info_t *info = (zpios_info_t *)file->private_data; + char *buf; + int min, size, rc = 0; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + if (kcfg->cfg_arg1 > 0) { + + size = kcfg->cfg_arg1; + buf = (char *)vmem_alloc(size, KM_SLEEP); + if (buf == NULL) { + rc = -ENOMEM; + goto out; + } + + /* Zero fill and truncate contents when coping buffer */ + min = ((size < info->info_size) ? size : info->info_size); + memset(buf, 0, size); + memcpy(buf, info->info_buffer, min); + vmem_free(info->info_buffer, info->info_size); + info->info_size = size; + info->info_buffer = buf; + info->info_head = info->info_buffer; + } + + kcfg->cfg_rc1 = info->info_size; + + if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg))) + rc = -EFAULT; +out: + spin_unlock(&info->info_lock); + + return rc; +} + +static int +zpios_ioctl_cfg(struct file *file, unsigned long arg) +{ + zpios_cfg_t kcfg; + int rc = 0; + + if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg))) + return -EFAULT; + + if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) { + zpios_print(file, "Bad config magic 0x%x != 0x%x\n", + kcfg.cfg_magic, ZPIOS_CFG_MAGIC); + return -EINVAL; + } + + switch (kcfg.cfg_cmd) { + case ZPIOS_CFG_BUFFER_CLEAR: + /* cfg_arg1 - Unused + * cfg_rc1 - Unused + */ + rc = zpios_buffer_clear(file, &kcfg, arg); + break; + case ZPIOS_CFG_BUFFER_SIZE: + /* cfg_arg1 - 0 - query size; >0 resize + * cfg_rc1 - Set to current buffer size + */ + rc = zpios_buffer_size(file, &kcfg, arg); + break; + default: + zpios_print(file, "Bad config command %d\n", + kcfg.cfg_cmd); + rc = -EINVAL; + break; + } + + return rc; +} + +static int +zpios_ioctl_cmd(struct file *file, unsigned long arg) +{ + zpios_cmd_t *kcmd; + void *data = NULL; + int rc = -EINVAL; + + kcmd = kmem_alloc(sizeof(zpios_cmd_t), KM_SLEEP); + if (kcmd == NULL) { + zpios_print(file, "Unable to kmem_alloc() %ld byte for " + "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t)); + return -ENOMEM; + } + + rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof(zpios_cmd_t)); + if (rc) { + zpios_print(file, "Unable to copy command structure " + "from user to kernel memory, %d\n", rc); + goto out_cmd; + } + + if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) { + zpios_print(file, "Bad command magic 0x%x != 0x%x\n", + kcmd->cmd_magic, ZPIOS_CFG_MAGIC); + rc = -EINVAL; + goto out_cmd; + } + + /* Allocate memory for any opaque data the caller needed to pass on */ + if (kcmd->cmd_data_size > 0) { + data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP); + if (data == NULL) { + zpios_print(file, "Unable to vmem_alloc() %ld " + "bytes for data buffer\n", + (long)kcmd->cmd_data_size); + rc = -ENOMEM; + goto out_cmd; + } + + rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t, + cmd_data_str)), kcmd->cmd_data_size); + if (rc) { + zpios_print(file, "Unable to copy data buffer " + "from user to kernel memory, %d\n", rc); + goto out_data; + } + } + + rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data); + + if (data != NULL) { + /* If the test failed do not print out the stats */ + if (rc) + goto out_data; + + rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t, + cmd_data_str)), data, kcmd->cmd_data_size); + if (rc) { + zpios_print(file, "Unable to copy data buffer " + "from kernel to user memory, %d\n", rc); + rc = -EFAULT; + } + +out_data: + vmem_free(data, kcmd->cmd_data_size); + } +out_cmd: + kmem_free(kcmd, sizeof(zpios_cmd_t)); + + return rc; +} + +static int +zpios_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + unsigned int minor = iminor(inode); + int rc = 0; + + /* Ignore tty ioctls */ + if ((cmd & 0xffffff00) == ((int)'T') << 8) + return -ENOTTY; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + switch (cmd) { + case ZPIOS_CFG: + rc = zpios_ioctl_cfg(file, arg); + break; + case ZPIOS_CMD: + rc = zpios_ioctl_cmd(file, arg); + break; + default: + zpios_print(file, "Bad ioctl command %d\n", cmd); + rc = -EINVAL; + break; + } + + return rc; +} + +#ifdef CONFIG_COMPAT +/* Compatibility handler for ioctls from 32-bit ELF binaries */ +static long +zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return zpios_ioctl(file->f_dentry->d_inode, file, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + +/* I'm not sure why you would want to write in to this buffer from + * user space since its principle use is to pass test status info + * back to the user space, but I don't see any reason to prevent it. + */ +static ssize_t +zpios_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + int rc = 0; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + /* Write beyond EOF */ + if (*ppos >= info->info_size) { + rc = -EFBIG; + goto out; + } + + /* Resize count if beyond EOF */ + if (*ppos + count > info->info_size) + count = info->info_size - *ppos; + + if (copy_from_user(info->info_buffer, buf, count)) { + rc = -EFAULT; + goto out; + } + + *ppos += count; + rc = count; +out: + spin_unlock(&info->info_lock); + return rc; +} + +static ssize_t +zpios_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + int rc = 0; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + /* Read beyond EOF */ + if (*ppos >= info->info_size) + goto out; + + /* Resize count if beyond EOF */ + if (*ppos + count > info->info_size) + count = info->info_size - *ppos; + + if (copy_to_user(buf, info->info_buffer + *ppos, count)) { + rc = -EFAULT; + goto out; + } + + *ppos += count; + rc = count; +out: + spin_unlock(&info->info_lock); + return rc; +} + +static loff_t zpios_seek(struct file *file, loff_t offset, int origin) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + int rc = -EINVAL; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + switch (origin) { + case 0: /* SEEK_SET - No-op just do it */ + break; + case 1: /* SEEK_CUR - Seek from current */ + offset = file->f_pos + offset; + break; + case 2: /* SEEK_END - Seek from end */ + offset = info->info_size + offset; + break; + } + + if (offset >= 0) { + file->f_pos = offset; + file->f_version = 0; + rc = offset; + } + + spin_unlock(&info->info_lock); + + return rc; +} + +static struct cdev zpios_cdev; +static struct file_operations zpios_fops = { + .owner = THIS_MODULE, + .open = zpios_open, + .release = zpios_release, + .ioctl = zpios_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = zpios_compat_ioctl, +#endif + .read = zpios_read, + .write = zpios_write, + .llseek = zpios_seek, +}; + +static int +zpios_init(void) +{ + dev_t dev; + int rc; + + dev = MKDEV(ZPIOS_MAJOR, 0); + if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME))) + goto error; + + /* Support for registering a character driver */ + cdev_init(&zpios_cdev, &zpios_fops); + zpios_cdev.owner = THIS_MODULE; + kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME); + if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) { + printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc); + kobject_put(&zpios_cdev.kobj); + unregister_chrdev_region(dev, ZPIOS_MINORS); + goto error; + } + + /* Support for udev make driver info available in sysfs */ + zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME); + if (IS_ERR(zpios_class)) { + rc = PTR_ERR(zpios_class); + printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc); + cdev_del(&zpios_cdev); + unregister_chrdev_region(dev, ZPIOS_MINORS); + goto error; + } + + zpios_device = spl_device_create(zpios_class, NULL, + dev, NULL, ZPIOS_NAME); + return 0; +error: + printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc); + return rc; +} + +static int +zpios_fini(void) +{ + dev_t dev = MKDEV(ZPIOS_MAJOR, 0); + + spl_device_destroy(zpios_class, zpios_device, dev); + spl_class_destroy(zpios_class); + cdev_del(&zpios_cdev); + unregister_chrdev_region(dev, ZPIOS_MINORS); + + return 0; +} + +spl_module_init(zpios_init); +spl_module_exit(zpios_fini); + +MODULE_AUTHOR("LLNL / Sun"); +MODULE_DESCRIPTION("Kernel PIOS implementation"); +MODULE_LICENSE("GPL"); diff --git a/scripts/Makefile.am b/scripts/Makefile.am index a1dfc3871..ed6e5028f 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -4,11 +4,17 @@ nobase_pkglibexec_SCRIPTS += zconfig.sh nobase_pkglibexec_SCRIPTS += zfs.sh nobase_pkglibexec_SCRIPTS += zpool-create.sh nobase_pkglibexec_SCRIPTS += zpool-config/* +nobase_pkglibexec_SCRIPTS += zpios.sh +nobase_pkglibexec_SCRIPTS += zpios-sanity.sh +nobase_pkglibexec_SCRIPTS += zpios-survey.sh +nobase_pkglibexec_SCRIPTS += zpios-test/* +nobase_pkglibexec_SCRIPTS += zpios-profile/* EXTRA_DIST = zfs-update.sh $(nobase_pkglibexec_SCRIPTS) ZFS=${top_srcdir}/scripts/zfs.sh ZCONFIG=${top_srcdir}/scripts/zconfig.sh ZTEST=${top_builddir}/cmd/ztest/ztest +ZPIOS_SANITY=${top_srcdir}/scripts/zpios-sanity.sh check: @echo @@ -27,3 +33,11 @@ check: @echo @$(ZCONFIG) @echo + @echo -n "====================================" + @echo -n " ZPIOS " + @echo "====================================" + @echo + @$(ZFS) + @$(ZPIOS_SANITY) + @$(ZFS) -u + @echo diff --git a/scripts/common.sh.in b/scripts/common.sh.in index 0a8399f18..102952fdb 100644 --- a/scripts/common.sh.in +++ b/scripts/common.sh.in @@ -39,6 +39,8 @@ sbindir=@sbindir@ ETCDIR=${ETCDIR:-/etc} DEVDIR=${DEVDIR:-/dev/disk/zpool} ZPOOLDIR=${ZPOOLDIR:-${pkglibexecdir}/zpool-config} +ZPIOSDIR=${ZPIOSDIR:-${pkglibexecdir}/zpios-test} +ZPIOSPROFILEDIR=${ZPIOSPROFILEDIR:-${pkglibexecdir}/zpios-profile} ZDB=${ZDB:-${sbindir}/zdb} ZFS=${ZFS:-${sbindir}/zfs} @@ -46,10 +48,13 @@ ZINJECT=${ZINJECT:-${sbindir}/zinject} ZPOOL=${ZPOOL:-${sbindir}/zpool} ZPOOL_ID=${ZPOOL_ID:-${bindir}/zpool_id} ZTEST=${ZTEST:-${sbindir}/ztest} +ZPIOS=${ZPIOS:-${sbindir}/zpios} COMMON_SH=${COMMON_SH:-${pkglibexecdir}/common.sh} ZFS_SH=${ZFS_SH:-${pkglibexecdir}/zfs.sh} ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-${pkglibexecdir}/zpool-create.sh} +ZPIOS_SH=${ZPIOS_SH:-${pkglibexecdir}/zpios.sh} +ZPIOS_SURVEY_SH=${ZPIOS_SURVEY_SH:-${pkglibexecdir}/zpios-survey.sh} LDMOD=${LDMOD:-/sbin/modprobe} LSMOD=${LSMOD:-/sbin/lsmod} diff --git a/scripts/zpios-profile/zpios-profile-disk.sh b/scripts/zpios-profile/zpios-profile-disk.sh new file mode 100755 index 000000000..b56ee1ee4 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-disk.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# +# /proc/diskinfo +# Field 1 -- device name +# Field 2 -- # of reads issued +# Field 3 -- # of reads merged +# Field 4 -- # of sectors read +# Field 5 -- # of milliseconds spent reading +# Field 6 -- # of writes completed +# Field 7 -- # of writes merged +# Field 8 -- # of sectors written +# Field 9 -- # of milliseconds spent writing +# Field 10 -- # of I/Os currently in progress +# Field 11 -- # of milliseconds spent doing I/Os +# Field 12 -- weighted # of milliseconds spent doing I/Os + +PROG=zpios-profile-disk.sh + +RUN_PIDS=${0} +RUN_LOG_DIR=${1} +RUN_ID=${2} + +create_table() { + local FIELD=$1 + local ROW_M=() + local ROW_N=() + local HEADER=1 + local STEP=1 + + for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` ) + + if [ $HEADER -eq 1 ]; then + echo -n "step, " + cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", " + echo "total" + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${DISK_FILE}" + break + fi + + TOTAL=0 + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc` + let TOTAL=${TOTAL}+${DELTA} + echo -n "${DELTA}, " + done + echo "${TOTAL}, " + + let STEP=${STEP}+1 + done +} + +create_table_mbs() { + local FIELD=$1 + local TIME=$2 + local ROW_M=() + local ROW_N=() + local HEADER=1 + local STEP=1 + + for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` ) + + if [ $HEADER -eq 1 ]; then + echo -n "step, " + cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", " + echo "total" + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${DISK_FILE}" + break + fi + + TOTAL=0 + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc` + MBS=`echo "scale=2; ((${DELTA}*512)/${TIME})/(1024*1024)" | bc` + TOTAL=`echo "scale=2; ${TOTAL}+${MBS}" | bc` + echo -n "${MBS}, " + done + echo "${TOTAL}, " + + let STEP=${STEP}+1 + done +} + +echo +echo "Reads issued per device" +create_table 2 +echo +echo "Reads merged per device" +create_table 3 +echo +echo "Sectors read per device" +create_table 4 +echo "MB/s per device" +create_table_mbs 4 3 + +echo +echo "Writes issued per device" +create_table 6 +echo +echo "Writes merged per device" +create_table 7 +echo +echo "Sectors written per device" +create_table 8 +echo "MB/s per device" +create_table_mbs 8 3 + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile-pids.sh b/scripts/zpios-profile/zpios-profile-pids.sh new file mode 100755 index 000000000..3514b38e2 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-pids.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +PROG=zpios-profile-pids.sh + +RUN_PIDS=${0} +RUN_LOG_DIR=${1} +RUN_ID=${2} + +ROW_M=() +ROW_N=() +ROW_N_SCHED=() +ROW_N_WAIT=() + +HEADER=1 +STEP=1 + +for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N=( 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ) + ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` ) + ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` ) + ROW_N_NAMES=( `cat ${PID_FILE} | cut -f2 -d' ' | cut -f2 -d'(' | + cut -f1 -d')' | cut -f1 -d'/' | tr "\n" "\t"` ) + + for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do + SUM=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc` + + case ${ROW_N_NAMES[${i}]} in + zio_taskq) IDX=0;; + zio_req_nul) IDX=1;; + zio_irq_nul) IDX=2;; + zio_req_rd) IDX=3;; + zio_irq_rd) IDX=4;; + zio_req_wr) IDX=5;; + zio_irq_wr) IDX=6;; + zio_req_fr) IDX=7;; + zio_irq_fr) IDX=8;; + zio_req_cm) IDX=9;; + zio_irq_cm) IDX=10;; + zio_req_ctl) IDX=11;; + zio_irq_ctl) IDX=12;; + txg_quiesce) IDX=13;; + txg_sync) IDX=14;; + txg_timelimit) IDX=15;; + arc_reclaim) IDX=16;; + l2arc_feed) IDX=17;; + zpios_io) IDX=18;; + *) continue;; + esac + + let ROW_N[${IDX}]=${ROW_N[${IDX}]}+${SUM} + done + + if [ $HEADER -eq 1 ]; then + echo "step, zio_taskq, zio_req_nul, zio_irq_nul, " \ + "zio_req_rd, zio_irq_rd, zio_req_wr, zio_irq_wr, " \ + "zio_req_fr, zio_irq_fr, zio_req_cm, zio_irq_cm, " \ + "zio_req_ctl, zio_irq_ctl, txg_quiesce, txg_sync, " \ + "txg_timelimit, arc_reclaim, l2arc_feed, zpios_io, " \ + "idle" + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${PID_FILE}" + break + fi + + # Original values are in jiffies and we expect HZ to be 1000 + # on most 2.6 systems thus we divide by 10 to get a percentage. + IDLE=1000 + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc` + DELTA_PERCENT=`echo "scale=1; ${DELTA}/10" | bc` + let IDLE=${IDLE}-${DELTA} + echo -n "${DELTA_PERCENT}, " + done + ILDE_PERCENT=`echo "scale=1; ${IDLE}/10" | bc` + echo "${ILDE_PERCENT}" + + let STEP=${STEP}+1 +done + +exit + +echo +echo "Percent of total system time per pid" +for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` ) + ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` ) + + for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do + ROW_N[${i}]=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc` + done + + if [ $HEADER -eq 1 ]; then + echo -n "step, " + cat ${PID_FILE} | cut -f2 -d' ' | tr "\n" ", " + echo + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${PID_FILE}" + break + fi + + # Original values are in jiffies and we expect HZ to be 1000 + # on most 2.6 systems thus we divide by 10 to get a percentage. + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "scale=1; (${ROW_N[${i}]}-${ROW_M[${i}]})/10" | bc` + echo -n "${DELTA}, " + done + + echo + let STEP=${STEP}+1 +done + + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile-post.sh b/scripts/zpios-profile/zpios-profile-post.sh new file mode 100755 index 000000000..3a454ba04 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-post.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +PROG=zpios-profile-post.sh + +RUN_POST=${0} +RUN_PHASE=${1} +RUN_DIR=${2} +RUN_ID=${3} +RUN_POOL=${4} +RUN_CHUNK_SIZE=${5} +RUN_REGION_SIZE=${6} +RUN_THRD_COUNT=${7} +RUN_REGION_COUNT=${8} +RUN_OFFSET=${9} +RUN_REGION_NOISE=${10} +RUN_CHUNK_NOISE=${11} +RUN_THRD_DELAY=${12} +RUN_FLAGS=${13} +RUN_RESULT=${14} + +# Summarize system time per process +zpios_profile_post_pids() { + ${PROFILE_PIDS} ${PROFILE_RUN_CR_PIDS_LOG} >${PROFILE_RUN_CR_PIDS_CSV} + ${PROFILE_PIDS} ${PROFILE_RUN_WR_PIDS_LOG} >${PROFILE_RUN_WR_PIDS_CSV} + ${PROFILE_PIDS} ${PROFILE_RUN_RD_PIDS_LOG} >${PROFILE_RUN_RD_PIDS_CSV} + ${PROFILE_PIDS} ${PROFILE_RUN_RM_PIDS_LOG} >${PROFILE_RUN_RM_PIDS_CSV} +} + +zpios_profile_post_disk() { + ${PROFILE_DISK} ${PROFILE_RUN_CR_DISK_LOG} >${PROFILE_RUN_CR_DISK_CSV} + ${PROFILE_DISK} ${PROFILE_RUN_WR_DISK_LOG} >${PROFILE_RUN_WR_DISK_CSV} + ${PROFILE_DISK} ${PROFILE_RUN_RD_DISK_LOG} >${PROFILE_RUN_RD_DISK_CSV} + ${PROFILE_DISK} ${PROFILE_RUN_RM_DISK_LOG} >${PROFILE_RUN_RM_DISK_CSV} +} + +# Summarize per device performance + +# Stop a user defined profiling script which is gathering additional data +zpios_profile_post_stop() { + local PROFILE_PID=$1 + + kill -s SIGHUP `cat ${PROFILE_PID}` + + + # Sleep waiting for profile script to exit + while [ -f ${PROFILE_PID} ]; do + sleep 0.01 + done +} + +zpios_profile_post_proc_stop() { + local PROC_DIR=$1 + + if [ -f ${PROFILE_ARC_PROC} ]; then + cat ${PROFILE_ARC_PROC} >${PROC_DIR}/arcstats.txt + fi + + if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then + cat ${PROFILE_VDEV_CACHE_PROC} >${PROC_DIR}/vdev_cache_stats.txt + fi +} + +zpios_profile_post_oprofile_stop() { + local OPROFILE_LOG=$1 + local OPROFILE_ARGS="-a -g -l -p ${OPROFILE_KERNEL_DIR},${OPROFILE_SPL_DIR},${OPROFILE_ZFS_DIR}" + + /usr/bin/opcontrol --stop >>${OPROFILE_LOG} 2>&1 + /usr/bin/opcontrol --dump >>${OPROFILE_LOG} 2>&1 + /usr/bin/opreport ${OPROFILE_ARGS} >${OPROFILE_LOG} 2>&1 + /usr/bin/oparchive +} + +zpios_profile_post_create() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_OPROFILE_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_CR_DIR} + zpios_profile_post_stop ${PROFILE_RUN_CR_PID} +} + +zpios_profile_post_write() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_WR_OPROFILE_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_WR_DIR} + zpios_profile_post_stop ${PROFILE_RUN_WR_PID} +} + +zpios_profile_post_read() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_RD_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_RD_DIR} + zpios_profile_post_stop ${PROFILE_RUN_RD_PID} +} + +zpios_profile_post_remove() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_RM_OPROFILE_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_RM_DIR} + zpios_profile_post_stop ${PROFILE_RUN_RM_PID} +} + +# Source global zpios test configuration +if [ -f ${RUN_DIR}/zpios-config.sh ]; then + . ${RUN_DIR}/zpios-config.sh +fi + +# Source global per-run test configuration +if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then + . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh +fi + +case "${RUN_PHASE}" in + post-run) + zpios_profile_post_pids + zpios_profile_post_disk + ;; + post-create) + zpios_profile_post_create + ;; + post-write) + zpios_profile_post_write + ;; + post-read) + zpios_profile_post_read + ;; + post-remove) + zpios_profile_post_remove + ;; + *) + echo "Usage: ${PROG} {post-run|post-create|post-write|post-read|post-remove}" + exit 1 +esac + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile-pre.sh b/scripts/zpios-profile/zpios-profile-pre.sh new file mode 100755 index 000000000..a2a885798 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-pre.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +PROG=zpios-profile-pre.sh + +PROFILE_RDY=0 +trap "PROFILE_RDY=1" SIGHUP + +RUN_PRE=${0} +RUN_PHASE=${1} +RUN_DIR=${2} +RUN_ID=${3} +RUN_POOL=${4} +RUN_CHUNK_SIZE=${5} +RUN_REGION_SIZE=${6} +RUN_THRD_COUNT=${7} +RUN_REGION_COUNT=${8} +RUN_OFFSET=${9} +RUN_REGION_NOISE=${10} +RUN_CHUNK_NOISE=${11} +RUN_THRD_DELAY=${12} +RUN_FLAGS=${13} +RUN_RESULT=${14} + +zpios_profile_pre_run_cfg() { +cat > ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh << EOF +# +# Zpios Profiling Configuration for Run ${RUN_ID} +# + +PROFILE_RUN_DIR=${RUN_DIR}/${RUN_ID} + +PROFILE_RUN_CR_DIR=${RUN_DIR}/${RUN_ID}/create +PROFILE_RUN_CR_PID=${RUN_DIR}/${RUN_ID}/create/profile.pid +PROFILE_RUN_CR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/create/oprofile.txt +PROFILE_RUN_CR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/create/pids.txt +PROFILE_RUN_CR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/create/pids.csv +PROFILE_RUN_CR_DISK_LOG=${RUN_DIR}/${RUN_ID}/create/disk.txt +PROFILE_RUN_CR_DISK_CSV=${RUN_DIR}/${RUN_ID}/create/disk.csv + +PROFILE_RUN_WR_DIR=${RUN_DIR}/${RUN_ID}/write +PROFILE_RUN_WR_PID=${RUN_DIR}/${RUN_ID}/write/profile.pid +PROFILE_RUN_WR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/write/oprofile.txt +PROFILE_RUN_WR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/write/pids.txt +PROFILE_RUN_WR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/write/pids.csv +PROFILE_RUN_WR_DISK_LOG=${RUN_DIR}/${RUN_ID}/write/disk.txt +PROFILE_RUN_WR_DISK_CSV=${RUN_DIR}/${RUN_ID}/write/disk.csv + +PROFILE_RUN_RD_DIR=${RUN_DIR}/${RUN_ID}/read +PROFILE_RUN_RD_PID=${RUN_DIR}/${RUN_ID}/read/profile.pid +PROFILE_RUN_RD_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/read/oprofile.txt +PROFILE_RUN_RD_PIDS_LOG=${RUN_DIR}/${RUN_ID}/read/pids.txt +PROFILE_RUN_RD_PIDS_CSV=${RUN_DIR}/${RUN_ID}/read/pids.csv +PROFILE_RUN_RD_DISK_LOG=${RUN_DIR}/${RUN_ID}/read/disk.txt +PROFILE_RUN_RD_DISK_CSV=${RUN_DIR}/${RUN_ID}/read/disk.csv + +PROFILE_RUN_RM_DIR=${RUN_DIR}/${RUN_ID}/remove +PROFILE_RUN_RM_PID=${RUN_DIR}/${RUN_ID}/remove/profile.pid +PROFILE_RUN_RM_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/remove/oprofile.txt +PROFILE_RUN_RM_PIDS_LOG=${RUN_DIR}/${RUN_ID}/remove/pids.txt +PROFILE_RUN_RM_PIDS_CSV=${RUN_DIR}/${RUN_ID}/remove/pids.csv +PROFILE_RUN_RM_DISK_LOG=${RUN_DIR}/${RUN_ID}/remove/disk.txt +PROFILE_RUN_RM_DISK_CSV=${RUN_DIR}/${RUN_ID}/remove/disk.csv + +# PROFILE_PIDS_LOG=${RUN_DIR}/${RUN_ID}/pids-summary.csv +# PROFILE_DISK_LOG=${RUN_DIR}/${RUN_ID}/disk-summary.csv +EOF +} + +zpios_profile_pre_run_args() { +cat > ${RUN_DIR}/${RUN_ID}/zpios-args.txt << EOF +# +# Zpios Arguments for Run ${RUN_ID} +# + +DIR=${RUN_DIR} +ID=${RUN_ID} +POOL=${RUN_POOL} +CHUNK_SIZE=${RUN_CHUNK_SIZE} +REGION_SIZE=${RUN_REGION_SIZE} +THRD_COUNT=${RUN_THRD_COUNT} +REGION_COUNT=${RUN_REGION_COUNT} +OFFSET=${RUN_OFFSET} +REGION_NOISE=${RUN_REGION_NOISE} +CHUNK_NOISE=${RUN_CHUNK_NOISE} +THRD_DELAY=${RUN_THRD_DELAY} +FLAGS=${RUN_FLAGS} +RESULT=${RUN_RESULT} +EOF +} + +# Spawn a user defined profiling script to gather additional data +zpios_profile_pre_start() { + local PROFILE_PID=$1 + + ${PROFILE_USER} ${RUN_PHASE} ${RUN_DIR} ${RUN_ID} & + echo "$!" >${PROFILE_PID} + + # Sleep waiting for profile script to be ready, it will + # signal us via SIGHUP when it is ready to start profiling. + while [ ${PROFILE_RDY} -eq 0 ]; do + sleep 0.01 + done +} + +zpios_profile_post_proc_start() { + + if [ -f ${PROFILE_ARC_PROC} ]; then + echo 0 >${PROFILE_ARC_PROC} + fi + + if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then + echo 0 >${PROFILE_VDEV_CACHE_PROC} + fi +} + +zpios_profile_pre_oprofile_start() { + local OPROFILE_LOG=$1 + + /usr/bin/opcontrol --reset >>${OPROFILE_LOG} 2>&1 + /usr/bin/opcontrol --start >>${OPROFILE_LOG} 2>&1 +} + +zpios_profile_pre_create() { + mkdir ${PROFILE_RUN_CR_DIR} + zpios_profile_pre_start ${PROFILE_RUN_CR_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_OPROFILE_LOG} +} + +zpios_profile_pre_write() { + mkdir ${PROFILE_RUN_WR_DIR} + zpios_profile_pre_start ${PROFILE_RUN_WR_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_WR_OPROFILE_LOG} +} + +zpios_profile_pre_read() { + mkdir ${PROFILE_RUN_RD_DIR} + zpios_profile_pre_start ${PROFILE_RUN_RD_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_RD_LOG} +} + +zpios_profile_pre_remove() { + mkdir ${PROFILE_RUN_RM_DIR} + zpios_profile_pre_start ${PROFILE_RUN_RM_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_RM_OPROFILE_LOG} +} + +# Source global zpios test configuration +if [ -f ${RUN_DIR}/zpios-config.sh ]; then + . ${RUN_DIR}/zpios-config.sh +fi + +# Source global per-run test configuration +if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then + . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh +fi + +case "${RUN_PHASE}" in + pre-run) + mkdir -p ${RUN_DIR}/${RUN_ID}/ + zpios_profile_pre_run_cfg + zpios_profile_pre_run_args + ;; + pre-create) + zpios_profile_pre_create + ;; + pre-write) + zpios_profile_pre_write + ;; + pre-read) + zpios_profile_pre_read + ;; + pre-remove) + zpios_profile_pre_remove + ;; + *) + echo "Usage: ${PROG} {pre-run|pre-create|pre-write|pre-read|pre-remove}" + exit 1 +esac + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile.sh b/scripts/zpios-profile/zpios-profile.sh new file mode 100755 index 000000000..f4f0ee97f --- /dev/null +++ b/scripts/zpios-profile/zpios-profile.sh @@ -0,0 +1,226 @@ +#!/bin/bash + + +PROG=zpios-profile.sh + +trap "RUN_DONE=1" SIGHUP + +RUN_PHASE=${1} +RUN_LOG_DIR=${2} +RUN_ID=${3} +RUN_DONE=0 + +POLL_INTERVAL=2.99 + +# Log these pids, the exact pid numbers will vary from system to system +# so I harvest pid for all the following type of processes from /proc// +# +# zio_taskq/# +# spa_zio_issue/# +# spa_zio_intr/# +# txg_quiesce_thr +# txg_sync_thread +# txg_timelimit_t +# arc_reclaim_thr +# l2arc_feed_thre +# zpios_io/# + +ZIO_TASKQ_PIDS=() +ZIO_REQ_NUL_PIDS=() +ZIO_IRQ_NUL_PIDS=() +ZIO_REQ_RD_PIDS=() +ZIO_IRQ_RD_PIDS=() +ZIO_REQ_WR_PIDS=() +ZIO_IRQ_WR_PIDS=() +ZIO_REQ_FR_PIDS=() +ZIO_IRQ_FR_PIDS=() +ZIO_REQ_CM_PIDS=() +ZIO_IRQ_CM_PIDS=() +ZIO_REQ_CTL_PIDS=() +ZIO_IRQ_CTL_PIDS=() + +TXG_QUIESCE_PIDS=() +TXG_SYNC_PIDS=() +TXG_TIMELIMIT_PIDS=() + +ARC_RECLAIM_PIDS=() +L2ARC_FEED_PIDS=() + +ZPIOS_IO_PIDS=() + +show_pids() { + echo "* zio_taskq: { ${ZIO_TASKQ_PIDS[@]} } = ${#ZIO_TASKQ_PIDS[@]}" + echo "* zio_req_nul: { ${ZIO_REQ_NUL_PIDS[@]} } = ${#ZIO_REQ_NUL_PIDS[@]}" + echo "* zio_irq_nul: { ${ZIO_IRQ_NUL_PIDS[@]} } = ${#ZIO_IRQ_NUL_PIDS[@]}" + echo "* zio_req_rd: { ${ZIO_REQ_RD_PIDS[@]} } = ${#ZIO_REQ_RD_PIDS[@]}" + echo "* zio_irq_rd: { ${ZIO_IRQ_RD_PIDS[@]} } = ${#ZIO_IRQ_RD_PIDS[@]}" + echo "* zio_req_wr: { ${ZIO_REQ_WR_PIDS[@]} } = ${#ZIO_REQ_WR_PIDS[@]}" + echo "* zio_irq_wr: { ${ZIO_IRQ_WR_PIDS[@]} } = ${#ZIO_IRQ_WR_PIDS[@]}" + echo "* zio_req_fr: { ${ZIO_REQ_FR_PIDS[@]} } = ${#ZIO_REQ_FR_PIDS[@]}" + echo "* zio_irq_fr: { ${ZIO_IRQ_FR_PIDS[@]} } = ${#ZIO_IRQ_FR_PIDS[@]}" + echo "* zio_req_cm: { ${ZIO_REQ_CM_PIDS[@]} } = ${#ZIO_REQ_CM_PIDS[@]}" + echo "* zio_irq_cm: { ${ZIO_IRQ_CM_PIDS[@]} } = ${#ZIO_IRQ_CM_PIDS[@]}" + echo "* zio_req_ctl: { ${ZIO_REQ_CTL_PIDS[@]} } = ${#ZIO_REQ_CTL_PIDS[@]}" + echo "* zio_irq_ctl: { ${ZIO_IRQ_CTL_PIDS[@]} } = ${#ZIO_IRQ_CTL_PIDS[@]}" + echo "* txg_quiesce: { ${TXG_QUIESCE_PIDS[@]} } = ${#TXG_QUIESCE_PIDS[@]}" + echo "* txg_sync: { ${TXG_SYNC_PIDS[@]} } = ${#TXG_SYNC_PIDS[@]}" + echo "* txg_timelimit: { ${TXG_TIMELIMIT_PIDS[@]} } = ${#TXG_TIMELIMIT_PIDS[@]}" + echo "* arc_reclaim: { ${ARC_RECLAIM_PIDS[@]} } = ${#ARC_RECLAIM_PIDS[@]}" + echo "* l2arc_feed: { ${L2ARC_FEED_PIDS[@]} } = ${#L2ARC_FEED_PIDS[@]}" + echo "* zpios_io: { ${ZPIOS_IO_PIDS[@]} } = ${#ZPIOS_IO_PIDS[@]}" +} + +check_pid() { + local PID=$1 + local NAME=$2 + local TYPE=$3 + local PIDS=( "$4" ) + local NAME_STRING=`echo ${NAME} | cut -f1 -d'/'` + local NAME_NUMBER=`echo ${NAME} | cut -f2 -d'/'` + + if [ "${NAME_STRING}" == "${TYPE}" ]; then + if [ -n "${NAME_NUMBER}" ]; then + PIDS[${NAME_NUMBER}]=${PID} + else + PIDS[${#PIDS[@]}]=${PID} + + fi + fi + + echo "${PIDS[@]}" +} + +# NOTE: This whole process is crazy slow but it will do for now +aquire_pids() { + echo "--- Aquiring ZFS pids ---" + + for PID in `ls /proc/ | grep [0-9] | sort -n -u`; do + if [ ! -e /proc/${PID}/status ]; then + continue + fi + + NAME=`cat /proc/${PID}/status | head -n1 | cut -f2` + + ZIO_TASKQ_PIDS=( `check_pid ${PID} ${NAME} "zio_taskq" \ + "$(echo "${ZIO_TASKQ_PIDS[@]}")"` ) + + ZIO_REQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_nul" \ + "$(echo "${ZIO_REQ_NUL_PIDS[@]}")"` ) + + ZIO_IRQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_nul" \ + "$(echo "${ZIO_IRQ_NUL_PIDS[@]}")"` ) + + ZIO_REQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_req_rd" \ + "$(echo "${ZIO_REQ_RD_PIDS[@]}")"` ) + + ZIO_IRQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_rd" \ + "$(echo "${ZIO_IRQ_RD_PIDS[@]}")"` ) + + ZIO_REQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_wr" \ + "$(echo "${ZIO_REQ_WR_PIDS[@]}")"` ) + + ZIO_IRQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_wr" \ + "$(echo "${ZIO_IRQ_WR_PIDS[@]}")"` ) + + ZIO_REQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_fr" \ + "$(echo "${ZIO_REQ_FR_PIDS[@]}")"` ) + + ZIO_IRQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_fr" \ + "$(echo "${ZIO_IRQ_FR_PIDS[@]}")"` ) + + ZIO_REQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_req_cm" \ + "$(echo "${ZIO_REQ_CM_PIDS[@]}")"` ) + + ZIO_IRQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_cm" \ + "$(echo "${ZIO_IRQ_CM_PIDS[@]}")"` ) + + ZIO_REQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_ctl" \ + "$(echo "${ZIO_REQ_CTL_PIDS[@]}")"` ) + + ZIO_IRQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_ctl" \ + "$(echo "${ZIO_IRQ_CTL_PIDS[@]}")"` ) + + TXG_QUIESCE_PIDS=( `check_pid ${PID} ${NAME} "txg_quiesce" \ + "$(echo "${TXG_QUIESCE_PIDS[@]}")"` ) + + TXG_SYNC_PIDS=( `check_pid ${PID} ${NAME} "txg_sync" \ + "$(echo "${TXG_SYNC_PIDS[@]}")"` ) + + TXG_TIMELIMIT_PIDS=( `check_pid ${PID} ${NAME} "txg_timelimit" \ + "$(echo "${TXG_TIMELIMIT_PIDS[@]}")"` ) + + ARC_RECLAIM_PIDS=( `check_pid ${PID} ${NAME} "arc_reclaim" \ + "$(echo "${ARC_RECLAIM_PIDS[@]}")"` ) + + L2ARC_FEED_PIDS=( `check_pid ${PID} ${NAME} "l2arc_feed" \ + "$(echo "${L2ARC_FEED_PIDS[@]}")"` ) + done + + # Wait for zpios_io threads to start + kill -s SIGHUP ${PPID} + echo "* Waiting for zpios_io threads to start" + while [ ${RUN_DONE} -eq 0 ]; do + ZPIOS_IO_PIDS=( `ps ax | grep zpios_io | grep -v grep | \ + sed 's/^ *//g' | cut -f1 -d' '` ) + if [ ${#ZPIOS_IO_PIDS[@]} -gt 0 ]; then + break; + fi + sleep 0.1 + done + + echo "`show_pids`" >${RUN_LOG_DIR}/${RUN_ID}/pids.txt +} + +log_pids() { + echo "--- Logging ZFS profile to ${RUN_LOG_DIR}/${RUN_ID}/ ---" + ALL_PIDS=( ${ZIO_TASKQ_PIDS[@]} \ + ${ZIO_REQ_NUL_PIDS[@]} \ + ${ZIO_IRQ_NUL_PIDS[@]} \ + ${ZIO_REQ_RD_PID[@]} \ + ${ZIO_IRQ_RD_PIDS[@]} \ + ${ZIO_REQ_WR_PIDS[@]} \ + ${ZIO_IRQ_WR_PIDS[@]} \ + ${ZIO_REQ_FR_PIDS[@]} \ + ${ZIO_IRQ_FR_PIDS[@]} \ + ${ZIO_REQ_CM_PIDS[@]} \ + ${ZIO_IRQ_CM_PIDS[@]} \ + ${ZIO_REQ_CTL_PIDS[@]} \ + ${ZIO_IRQ_CTL_PIDS[@]} \ + ${TXG_QUIESCE_PIDS[@]} \ + ${TXG_SYNC_PIDS[@]} \ + ${TXG_TIMELIMIT_PIDS[@]} \ + ${ARC_RECLAIM_PIDS[@]} \ + ${L2ARC_FEED_PIDS[@]} \ + ${ZPIOS_IO_PIDS[@]} ) + + while [ ${RUN_DONE} -eq 0 ]; do + NOW=`date +%s.%N` + LOG_PIDS="${RUN_LOG_DIR}/${RUN_ID}/pids-${NOW}" + LOG_DISK="${RUN_LOG_DIR}/${RUN_ID}/disk-${NOW}" + + for PID in "${ALL_PIDS[@]}"; do + if [ -z ${PID} ]; then + continue; + fi + + if [ -e /proc/${PID}/stat ]; then + cat /proc/${PID}/stat | head -n1 >>${LOG_PIDS} + else + echo "<${PID} exited>" >>${LOG_PIDS} + fi + done + + cat /proc/diskstats >${LOG_DISK} + + NOW2=`date +%s.%N` + DELTA=`echo "${POLL_INTERVAL}-(${NOW2}-${NOW})" | bc` + sleep ${DELTA} + done +} + +aquire_pids +log_pids + +# rm ${PROFILE_PID} + +exit 0 diff --git a/scripts/zpios-sanity.sh b/scripts/zpios-sanity.sh new file mode 100755 index 000000000..194ae82e2 --- /dev/null +++ b/scripts/zpios-sanity.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# +# ZFS/ZPOOL configuration test script. + +basedir="$(dirname $0)" + +SCRIPT_COMMON=common.sh +if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then +. "${basedir}/${SCRIPT_COMMON}" +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios-sanity.sh +HEADER= + +usage() { +cat << EOF +USAGE: +$0 [hvxfc] + +DESCRIPTION: + ZPIOS sanity tests + +OPTIONS: + -h Show this message + -v Verbose + -x Destructive hd/sd/md/dm/ram tests + -f Don't prompt due to -x + -c Cleanup lo+file devices at start + +EOF +} + +while getopts 'hvxfc?' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + ;; + x) + DANGEROUS=1 + ;; + f) + FORCE=1 + ;; + c) + CLEANUP=1 + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +# Perform pre-cleanup is requested +if [ ${CLEANUP} ]; then + cleanup_loop_devices + rm -f /tmp/zpool.cache.* +fi + +zpios_test() { + CONFIG=$1 + TEST=$2 + LOG=`mktemp` + + ${ZPIOS_SH} -f -c ${CONFIG} -t ${TEST} &>${LOG} + if [ $? -ne 0 ]; then + if [ ${VERBOSE} ]; then + printf "FAIL: %-13s\n" ${CONFIG} + cat ${LOG} + else + if [ ! ${HEADER} ]; then + head -2 ${LOG} + HEADER=1 + fi + + printf "FAIL: %-13s" ${CONFIG} + tail -1 ${LOG} + fi + else + if [ ${VERBOSE} ]; then + cat ${LOG} + else + if [ ! ${HEADER} ]; then + head -2 ${LOG} + HEADER=1 + fi + + tail -1 ${LOG} + fi + fi + + rm -f ${LOG} +} + +if [ ${DANGEROUS} ] && [ ! ${FORCE} ]; then + cat << EOF +The -x option was passed which will result in UNRECOVERABLE DATA LOSS +on on the following block devices: + + /dev/sd[abcd] + /dev/hda + /dev/ram0 + /dev/md0 + /dev/dm-0 + +To continue please confirm by entering YES: +EOF + read CONFIRM + if [ ${CONFIRM} != "YES" ] && [ ${CONFIRM} != "yes" ]; then + exit 0; + fi +fi + +# +# These configurations are all safe and pose no risk to any data on +# the system which runs them. They will confine all their IO to a +# file in /tmp or a loopback device configured to use a file in /tmp. +# +SAFE_CONFIGS=( \ + file-raid0 file-raid10 file-raidz file-raidz2 \ + lo-raid0 lo-raid10 lo-raidz lo-raidz2 \ +) + +# +# These configurations are down right dangerous. They will attempt +# to use various real block devices on your system which may contain +# data you car about. You are STRONGLY advised not to run this unless +# you are certain there is no data on the system you care about. +# +DANGEROUS_CONFIGS=( \ + hda-raid0 \ + sda-raid0 \ + ram0-raid0 \ + md0-raid10 md0-raid5 \ + dm0-raid0 \ +) + +for CONFIG in ${SAFE_CONFIGS[*]}; do + zpios_test $CONFIG tiny +done + +if [ ${DANGEROUS} ]; then + for CONFIG in ${DANGEROUS_CONFIGS[*]}; do + zpios_test $CONFIG tiny + done +fi + +exit 0 diff --git a/scripts/zpios-survey.sh b/scripts/zpios-survey.sh new file mode 100755 index 000000000..cb751b467 --- /dev/null +++ b/scripts/zpios-survey.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# +# Wrapper script for easily running a survey of zpios based tests +# + +basedir="$(dirname $0)" + +SCRIPT_COMMON=common.sh +if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then +. "${basedir}/${SCRIPT_COMMON}" +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios-survey.sh + +usage() { +cat << EOF +USAGE: +$0 [hvp] <-c config> <-t test> + +DESCRIPTION: + Helper script for easy zpios survey benchmarking. + +OPTIONS: + -h Show this message + -v Verbose + -p Enable profiling + -c Zpool configuration + -t Zpios test + -l Zpios survey log + +EOF +} + +print_header() { +tee -a ${ZPIOS_SURVEY_LOG} << EOF + +================================================================ +Test: $1 +EOF +} + +# Baseline performance for an out of the box config with no manual tuning. +# Ideally, we want everything to be automatically tuned for your system and +# for this to perform reasonably well. +zpios_survey_base() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+baseline" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Disable ZFS's prefetching. For some reason still not clear to me +# current prefetching policy is quite bad for a random workload. +# Allowing the algorithm to detect a random workload and not do +# anything may be the way to address this issue. +zpios_survey_prefetch() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+prefetch" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -o "--noprefetch" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Simulating a zerocopy IO path should improve performance by freeing up +# lots of CPU which is wasted move data between buffers. +zpios_survey_zerocopy() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+zerocopy" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -o "--zerocopy" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Disabling checksumming should show some (if small) improvement +# simply due to freeing up a modest amount of CPU. +zpios_survey_checksum() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+checksum" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -s "set checksum=off" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Increasing the pending IO depth also seems to improve things likely +# at the expense of latency. This should be explored more because I'm +# seeing a much bigger impact there that I would have expected. There +# may be some low hanging fruit to be found here. +zpios_survey_pending() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+pending" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + zfs="zfs_vdev_max_pending=1024" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# To avoid memory fragmentation issues our slab implementation can be +# based on a virtual address space. Interestingly, we take a pretty +# substantial performance penalty for this somewhere in the low level +# IO drivers. If we back the slab with kmem pages we see far better +# read performance numbers at the cost of memory fragmention and general +# system instability due to large allocations. This may be because of +# an optimization in the low level drivers due to the contigeous kmem +# based memory. This needs to be explained. The good news here is that +# with zerocopy interfaces added at the DMU layer we could gaurentee +# kmem based memory for a pool of pages. +# +# 0x100 = KMC_KMEM - Force kmem_* based slab +# 0x200 = KMC_VMEM - Force vmem_* based slab +zpios_survey_kmem() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+kmem" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + zfs="zio_bulk_flags=0x100" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Apply all possible turning concurrently to get a best case number +zpios_survey_all() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+all" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + zfs="zfs_vdev_max_pending=1024" \ + zfs="zio_bulk_flags=0x100" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -o "--noprefetch --zerocopy" \ + -s "set checksum=off" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + + +PROFILE= +ZPOOL_NAME=zpios-survey +ZPOOL_CONFIG=zpool-config.sh +ZPIOS_TEST=zpios-test.sh +ZPIOS_SURVEY_LOG=/dev/null + +while getopts 'hvpc:t:l:' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + VERBOSE_FLAG="-v" + ;; + p) + PROFILE=1 + PROFILE_FLAG="-p" + ;; + c) + ZPOOL_CONFIG=${OPTARG} + ;; + t) + ZPIOS_TEST=${OPTARG} + ;; + l) + ZPIOS_SURVEY_LOG=${OPTARG} + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +zpios_survey_base +zpios_survey_prefetch +zpios_survey_zerocopy +zpios_survey_checksum +zpios_survey_pending +zpios_survey_kmem +zpios_survey_all + +exit 0 diff --git a/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..cbd9c697a --- /dev/null +++ b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=16 \ + --regioncount=8192 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..cd3c50b77 --- /dev/null +++ b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=1 \ + --regioncount=16 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..743e97b64 --- /dev/null +++ b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=1,2,4,8,16,32,64,128,256 \ + --regioncount=65536 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..92a3b77b4 --- /dev/null +++ b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=256 \ + --regioncount=65536 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..0db952cd6 --- /dev/null +++ b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=4 \ + --regioncount=1024 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/large-thread-survey.sh b/scripts/zpios-test/large-thread-survey.sh new file mode 120000 index 000000000..90b6e3c47 --- /dev/null +++ b/scripts/zpios-test/large-thread-survey.sh @@ -0,0 +1 @@ +1x256th-65536rc-4rs-1cs-4off.sh \ No newline at end of file diff --git a/scripts/zpios-test/large.sh b/scripts/zpios-test/large.sh new file mode 120000 index 000000000..b8e22bf54 --- /dev/null +++ b/scripts/zpios-test/large.sh @@ -0,0 +1 @@ +256th-65536rc-4rs-1cs-4off.sh \ No newline at end of file diff --git a/scripts/zpios-test/medium.sh b/scripts/zpios-test/medium.sh new file mode 120000 index 000000000..d81027b73 --- /dev/null +++ b/scripts/zpios-test/medium.sh @@ -0,0 +1 @@ +16th-8192rc-4rs-1cs-4off.sh \ No newline at end of file diff --git a/scripts/zpios-test/small.sh b/scripts/zpios-test/small.sh new file mode 120000 index 000000000..cbf03b5ce --- /dev/null +++ b/scripts/zpios-test/small.sh @@ -0,0 +1 @@ +4th-1024rc-4rs-1cs-4off.sh \ No newline at end of file diff --git a/scripts/zpios-test/tiny.sh b/scripts/zpios-test/tiny.sh new file mode 120000 index 000000000..ba8b7cd0c --- /dev/null +++ b/scripts/zpios-test/tiny.sh @@ -0,0 +1 @@ +1th-16rc-4rs-1cs-4off.sh \ No newline at end of file diff --git a/scripts/zpios.sh b/scripts/zpios.sh new file mode 100755 index 000000000..e16a58a3b --- /dev/null +++ b/scripts/zpios.sh @@ -0,0 +1,272 @@ +#!/bin/bash +# +# Wrapper script for easily running zpios based tests +# + +basedir="$(dirname $0)" + +SCRIPT_COMMON=common.sh +if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then +. "${basedir}/${SCRIPT_COMMON}" +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios.sh +DATE=`date +%Y%m%d-%H%M%S` +if [ "${ZPIOS_MODULES}" ]; then + MODULES=(${ZPIOS_MODULES[*]}) +else + MODULES=(zpios) +fi + +usage() { +cat << EOF +USAGE: +$0 [hvp] <-c config> <-t test> + +DESCRIPTION: + Helper script for easy zpios benchmarking. + +OPTIONS: + -h Show this message + -v Verbose + -f Force everything + -p Enable profiling + -c Zpool configuration + -t Zpios test + -o Additional zpios options + -l Additional zpool options + -s Additional zfs options + +EOF +} + +print_header() { + echo --------------------- ZPIOS RESULTS ---------------------------- + echo -n "Date: "; date + echo -n "Kernel: "; uname -r + dmesg | grep "Loaded Solaris Porting Layer" | tail -n1 + dmesg | grep "Loaded ZFS Filesystem" | tail -n1 + echo +} + +print_spl_info() { + echo --------------------- SPL Tunings ------------------------------ + ${SYSCTL} -A | grep spl + + if [ -d /sys/module/spl/parameters ]; then + grep [0-9] /sys/module/spl/parameters/* + else + grep [0-9] /sys/module/spl/* + fi + + echo +} + +print_zfs_info() { + echo --------------------- ZFS Tunings ------------------------------ + ${SYSCTL} -A | grep zfs + + if [ -d /sys/module/zfs/parameters ]; then + grep [0-9] /sys/module/zfs/parameters/* + else + grep [0-9] /sys/module/zfs/* + fi + + echo +} + +print_stats() { + echo ---------------------- Statistics ------------------------------- + ${SYSCTL} -A | grep spl | grep stack_max + + if [ -d /proc/spl/kstat/ ]; then + if [ -f /proc/spl/kstat/zfs/arcstats ]; then + echo "* ARC" + cat /proc/spl/kstat/zfs/arcstats + echo + fi + + if [ -f /proc/spl/kstat/zfs/vdev_cache_stats ]; then + echo "* VDEV Cache" + cat /proc/spl/kstat/zfs/vdev_cache_stats + echo + fi + fi + + if [ -f /proc/spl/kmem/slab ]; then + echo "* SPL SLAB" + cat /proc/spl/kmem/slab + echo + fi + + echo +} + +check_test() { + + if [ ! -f ${ZPIOS_TEST} ]; then + local NAME=`basename ${ZPIOS_TEST} .sh` + ERROR="Unknown test '${NAME}', available tests are:\n" + + for TST in `ls ${ZPIOSDIR}/ | grep ".sh"`; do + local NAME=`basename ${TST} .sh` + ERROR="${ERROR}${NAME}\n" + done + + return 1 + fi + + return 0 +} + +zpios_profile_config() { +cat > ${PROFILE_DIR}/zpios-config.sh << EOF +# +# Zpios Profiling Configuration +# + +PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE} +PROFILE_PRE=${ZPIOSPROFILEDIR}/zpios-profile-pre.sh +PROFILE_POST=${ZPIOSPROFILEDIR}/zpios-profile-post.sh +PROFILE_USER=${ZPIOSPROFILEDIR}/zpios-profile.sh +PROFILE_PIDS=${ZPIOSPROFILEDIR}/zpios-profile-pids.sh +PROFILE_DISK=${ZPIOSPROFILEDIR}/zpios-profile-disk.sh +PROFILE_ARC_PROC=/proc/spl/kstat/zfs/arcstats +PROFILE_VDEV_CACHE_PROC=/proc/spl/kstat/zfs/vdev_cache_stats + +OPROFILE_KERNEL="/boot/vmlinux-`uname -r`" +OPROFILE_KERNEL_DIR="/lib/modules/`uname -r`/kernel/" +OPROFILE_SPL_DIR=${SPLBUILD}/module/ +OPROFILE_ZFS_DIR=${MODDIR} + +EOF +} + +zpios_profile_start() { + PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE} + + mkdir -p ${PROFILE_DIR} + zpios_profile_config + . ${PROFILE_DIR}/zpios-config.sh + + ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --log=${PROFILE_DIR}" + ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --prerun=${PROFILE_PRE}" + ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --postrun=${PROFILE_POST}" + + /usr/bin/opcontrol --init + /usr/bin/opcontrol --setup --vmlinux=${OPROFILE_KERNEL} +} + +zpios_profile_stop() { + /usr/bin/opcontrol --shutdown + /usr/bin/opcontrol --deinit +} + +PROFILE= +ZPOOL_CONFIG=zpool-config.sh +ZPIOS_TEST=zpios-test.sh +ZPOOL_NAME=zpios +ZPIOS_OPTIONS= +ZPOOL_OPTIONS="" +ZFS_OPTIONS="" + +while getopts 'hvfpc:t:o:l:s:' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + VERBOSE_FLAG="-v" + ;; + f) + FORCE=1 + FORCE_FLAG="-f" + ;; + p) + PROFILE=1 + ;; + c) + ZPOOL_CONFIG=${OPTARG} + ;; + t) + ZPIOS_TEST_ARG=${OPTARG} + ZPIOS_TEST=${ZPIOSDIR}/${OPTARG}.sh + ;; + o) + ZPIOS_OPTIONS=${OPTARG} + ;; + l) # Passed through to zpool-create.sh + ZPOOL_OPTIONS=${OPTARG} + ;; + s) # Passed through to zpool-create.sh + ZFS_OPTIONS=${OPTARG} + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +# Validate and source your test config +check_test || die "${ERROR}" +. ${ZPIOS_TEST} + +# Pull in the zpios test module is not loaded. If this fails it is +# likely because the full module stack was not yet loaded with zfs.sh +if check_modules; then + if ! load_modules; then + die "Run 'zfs.sh' to ensure the full module stack is loaded" + fi +fi + +# Wait for device creation +while [ ! -c /dev/zpios ]; do + sleep 1 +done + +if [ ${VERBOSE} ]; then + print_header + print_spl_info + print_zfs_info +fi + +# Create the zpool configuration +${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} ${FORCE_FLAG} \ + -p ${ZPOOL_NAME} -c ${ZPOOL_CONFIG} \ + -l "${ZPOOL_OPTIONS}" -s "${ZFS_OPTIONS}" || exit 1 + +if [ ${PROFILE} ]; then + zpios_profile_start +fi + +zpios_start +zpios_stop + +if [ ${PROFILE} ]; then + zpios_profile_stop +fi + +if [ ${VERBOSE} ]; then + print_stats +fi + +# Destroy the zpool configuration +${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} ${FORCE_FLAG} \ + -p ${ZPOOL_NAME} -c ${ZPOOL_CONFIG} -d || exit 1 + +# Unload the test module stack and wait for device removal +unload_modules +while [ -c /dev/zpios ]; do + sleep 1 +done + +exit 0