diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 70b4ba268..0114c4168 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -86,6 +86,7 @@ extern void dump_intent_log(zilog_t *); uint64_t *zopt_object = NULL; int zopt_objects = 0; libzfs_handle_t *g_zfs; +uint64_t max_inflight = 200; /* * These libumem hooks provide a reasonable set of defaults for the allocator's @@ -108,13 +109,14 @@ usage(void) { (void) fprintf(stderr, "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "poolname [object...]\n" - " %s [-divPA] [-e -p path...] dataset [object...]\n" - " %s -m [-LXFPA] [-t txg] [-e [-p path...]] " + "[-U config] [-M inflight I/Os] poolname [object...]\n" + " %s [-divPA] [-e -p path...] [-U config] dataset " + "[object...]\n" + " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " "poolname [vdev [metaslab...]]\n" " %s -R [-A] [-e [-p path...]] poolname " "vdev:offset:size[:flags]\n" - " %s -S [-PA] [-e [-p path...]] poolname\n" + " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n" " %s -l [-uA] device\n" " %s -C [-A] [-U config]\n\n", cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname); @@ -161,6 +163,8 @@ usage(void) (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t -- highest txg to use when " "searching for uberblocks\n"); + (void) fprintf(stderr, " -M -- " + "specify the maximum number of checksumming I/Os [default is 200]"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); @@ -2005,6 +2009,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); } +static void +zdb_blkptr_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + int ioerr = zio->io_error; + zdb_cb_t *zcb = zio->io_private; + zbookmark_t *zb = &zio->io_bookmark; + + zio_data_buf_free(zio->io_data, zio->io_size); + + mutex_enter(&spa->spa_scrub_lock); + spa->spa_scrub_inflight--; + cv_broadcast(&spa->spa_scrub_io_cv); + + if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + char blkbuf[BP_SPRINTF_LEN]; + + zcb->zcb_haderrors = 1; + zcb->zcb_errors[ioerr]++; + + if (dump_opt['b'] >= 2) + sprintf_blkptr(blkbuf, bp); + else + blkbuf[0] = '\0'; + + (void) printf("zdb_blkptr_cb: " + "Got error %d reading " + "<%llu, %llu, %lld, %llx> %s -- skipping\n", + ioerr, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (u_longlong_t)zb->zb_level, + (u_longlong_t)zb->zb_blkid, + blkbuf); + } + mutex_exit(&spa->spa_scrub_lock); +} + /* ARGSUSED */ static int zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, @@ -2026,39 +2069,23 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) { - int ioerr; size_t size = BP_GET_PSIZE(bp); - void *data = malloc(size); + void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; /* If it's an intent log block, failure is expected. */ if (zb->zb_level == ZB_ZIL_LEVEL) flags |= ZIO_FLAG_SPECULATIVE; - ioerr = zio_wait(zio_read(NULL, spa, bp, data, size, - NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb)); + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight > max_inflight) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + spa->spa_scrub_inflight++; + mutex_exit(&spa->spa_scrub_lock); - free(data); + zio_nowait(zio_read(NULL, spa, bp, data, size, + zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); - if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) { - zcb->zcb_haderrors = 1; - zcb->zcb_errors[ioerr]++; - - if (dump_opt['b'] >= 2) - sprintf_blkptr(blkbuf, bp); - else - blkbuf[0] = '\0'; - - (void) printf("zdb_blkptr_cb: " - "Got error %d reading " - "<%llu, %llu, %lld, %llx> %s -- skipping\n", - ioerr, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, - blkbuf); - } } zcb->zcb_readfails = 0; @@ -2266,6 +2293,18 @@ dump_block_stats(spa_t *spa) zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); + /* + * If we've traversed the data blocks then we need to wait for those + * I/Os to complete. We leverage "The Godfather" zio to wait on + * all async I/Os to complete. + */ + if (dump_opt['c']) { + (void) zio_wait(spa->spa_async_zio_root); + spa->spa_async_zio_root = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } + if (zcb.zcb_haderrors) { (void) printf("\nError counts:\n\n"); (void) printf("\t%5s %s\n", "errno", "count"); @@ -3026,7 +3065,7 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) { + while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) { switch (c) { case 'b': case 'c': @@ -3055,6 +3094,15 @@ main(int argc, char **argv) case 'v': verbose++; break; + case 'M': + max_inflight = strtoull(optarg, NULL, 0); + if (max_inflight == 0) { + (void) fprintf(stderr, "maximum number " + "of inflight I/Os must be greater " + "than 0\n"); + usage(); + } + break; case 'p': if (searchdirs == NULL) { searchdirs = umem_alloc(sizeof (char *), diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 320069873..37aa2894d 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -246,7 +246,7 @@ get_usage(zpool_help_t idx) { case HELP_REMOVE: return (gettext("\tremove ...\n")); case HELP_REOPEN: - return (""); /* Undocumented command */ + return (gettext("\treopen \n")); case HELP_SCRUB: return (gettext("\tscrub [-s] ...\n")); case HELP_STATUS: @@ -3612,22 +3612,37 @@ zpool_do_reguid(int argc, char **argv) * zpool reopen * * Reopen the pool so that the kernel can update the sizes of all vdevs. - * - * NOTE: This command is currently undocumented. If the command is ever - * exposed then the appropriate usage() messages will need to be made. */ int zpool_do_reopen(int argc, char **argv) { + int c; int ret = 0; zpool_handle_t *zhp; char *pool; + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + argc--; argv++; - if (argc != 1) - return (2); + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } pool = argv[0]; if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index 3ce326573..364cf3015 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -11,6 +11,7 @@ .\" .\" .\" Copyright 2012, Richard Lowe. +.\" Copyright (c) 2012 by Delphix. All rights reserved. .\" .TH "ZDB" "8" "February 15, 2012" "" "" @@ -19,21 +20,23 @@ .SH "SYNOPSIS" \fBzdb\fR [-CumdibcsDvhLXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR] - \fIpoolname\fR [\fIobject\fR ...] + [-U \fIcache\fR] [-M \fIinflight I/Os\fR] [\fIpoolname\fR + [\fIobject\fR ...]] .P -\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] \fIdataset\fR [\fIobject\fR ...] +\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] + \fIdataset\fR [\fIobject\fR ...] .P -\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] \fIpoolname\fR - [\fIvdev\fR [\fImetaslab\fR ...]] +\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] + \fIpoolname\fR [\fIvdev\fR [\fImetaslab\fR ...]] .P -\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] \fIpoolname\fR +\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR \fIvdev\fR:\fIoffset\fR:\fIsize\fR[:\fIflags\fR] .P -\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] \fIpoolname\fR +\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR .P \fBzdb\fR -l [-uA] \fIdevice\fR @@ -354,6 +357,18 @@ Attempt to make an unreadable pool readable by trying progressively older transactions. .RE +.sp +.ne 2 +.na +\fB-M \fIinflight I/Os\fR \fR +.ad +.sp .6 +.RS 4n +Limit the number of outstanding checksum I/Os to the specified value. The +default value is 200. This option affects the performance of the \fB-c\fR +option. +.RE + .sp .ne 2 .na @@ -384,8 +399,7 @@ and their associated transaction numbers. .ad .sp .6 .RS 4n -Use a cache file other than \fB/etc/zfs/zpool.cache\fR. This option is only -valid with \fB-C\fR +Use a cache file other than \fB/etc/zfs/zpool.cache\fR. .RE .sp diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index c16cd6897..d79114ebc 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -112,6 +112,11 @@ zpool \- configures ZFS storage pools \fBzpool reguid\fR \fIpool\fR .fi +.LP +.nf +\fBzpool reopen\fR \fIpool\fR +.fi + .LP .nf \fBzpool remove\fR \fIpool\fR \fIdevice\fR ... @@ -1508,8 +1513,18 @@ Expand the device to use all available space. If the device is part of a mirror .ad .sp .6 .RS 4n -Generates a new unique identifier for the pool. You must ensure that all devices in this pool are online and -healthy before performing this action. +Generates a new unique identifier for the pool. You must ensure that all +devices in this pool are online and healthy before performing this action. +.RE + +.sp +.ne 2 +.na +\fB\fBzpool reopen\fR \fIpool\fR +.ad +.sp .6 +.RS 4n +Reopen all the vdevs associated with the pool. .RE .sp diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 0d3537eaa..12c24879b 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -110,7 +110,7 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(16), ZTI_FIX(5) }, - { ZTI_PCT(100), ZTI_NULL, ZTI_ONE, ZTI_NULL }, + { ZTI_FIX(8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, }; diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index 3c0ce53cd..45c917b09 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -139,21 +140,39 @@ vdev_file_close(vdev_t *vd) vd->vdev_tsd = NULL; } +static void +vdev_file_io_strategy(void *arg) +{ + zio_t *zio = (zio_t *)arg; + vdev_t *vd = zio->io_vd; + vdev_file_t *vf = vd->vdev_tsd; + ssize_t resid; + + zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? + UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, + zio->io_size, zio->io_offset, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, &resid); + + if (resid != 0 && zio->io_error == 0) + zio->io_error = ENOSPC; + + zio_interrupt(zio); +} + static int vdev_file_io_start(zio_t *zio) { + spa_t *spa = zio->io_spa; vdev_t *vd = zio->io_vd; - vdev_file_t *vf; - ssize_t resid = 0; - - if (!vdev_readable(vd)) { - zio->io_error = ENXIO; - return (ZIO_PIPELINE_CONTINUE); - } - - vf = vd->vdev_tsd; + vdev_file_t *vf = vd->vdev_tsd; if (zio->io_type == ZIO_TYPE_IOCTL) { + /* XXPOLICY */ + if (!vdev_readable(vd)) { + zio->io_error = ENXIO; + return (ZIO_PIPELINE_CONTINUE); + } + switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, @@ -166,15 +185,8 @@ vdev_file_io_start(zio_t *zio) return (ZIO_PIPELINE_CONTINUE); } - zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? - UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, - zio->io_size, zio->io_offset, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, &resid); - - if (resid != 0 && zio->io_error == 0) - zio->io_error = ENOSPC; - - zio_interrupt(zio); + taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE], + vdev_file_io_strategy, zio, 0, &zio->io_tqent); return (ZIO_PIPELINE_STOP); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 943f2d677..a721903a3 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3062,7 +3062,7 @@ zio_done(zio_t *zio) * Hand it off to the otherwise-unused claim taskq. */ ASSERT(taskq_empty_ent(&zio->io_tqent)); - (void) taskq_dispatch_ent( + taskq_dispatch_ent( zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], (task_func_t *)zio_reexecute, zio, 0, &zio->io_tqent);