Add zdb -r <dataset> <object-id | file> <output>

While you can use zdb -R poolname vdev:offset:[<lsize>/]<psize>[:flags] to extract individual DVAs from a vdev, it would be handy for be able copy an entire file out of the pool. Given a file or object number, add support to copy the contents to a file. Useful for debugging and recovery. Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Allan Jude <allan@klarasystems.com> Closes #11027
2025-06-01 19:34:58 +03:00 · 2021-01-28 00:36:01 -05:00 · 2021-01-28 00:36:01 -05:00 · 393e69241e
commit 393e69241e
parent b2c5904a78
7 changed files with 240 additions and 15 deletions
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@ -31,6 +31,7 @@
 *
 * [1] Portions of this software were developed by Allan Jude
 *     under sponsorship from the FreeBSD Foundation.
 * Copyright (c) 2021 Allan Jude
 */
 #include <stdio.h>
@ -755,13 +756,14 @@ usage(void)
 	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 	    "\t%s -O <dataset> <path>\n"
 	    "\t%s -r <dataset> <path> <destination>\n"
 	    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 	    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 	    "\t%s -E [-A] word0:word1:...:word15\n"
 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 	    "<poolname>\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
-	    cmdname, cmdname, cmdname);
+	    cmdname, cmdname, cmdname, cmdname);
 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
@ -800,6 +802,7 @@ usage(void)
 	(void) fprintf(stderr, "        -m metaslabs\n");
 	(void) fprintf(stderr, "        -M metaslab groups\n");
 	(void) fprintf(stderr, "        -O perform object lookups by path\n");
 	(void) fprintf(stderr, "        -r copy an object by path to file\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n");
 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
@ -4490,7 +4493,7 @@ static char curpath[PATH_MAX];
 * for the last one.
 */
 static int
-dump_path_impl(objset_t *os, uint64_t obj, char *name)
+dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj)
 {
 	int err;
 	boolean_t header = B_TRUE;
@ -4540,10 +4543,15 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
 	switch (doi.doi_type) {
 	case DMU_OT_DIRECTORY_CONTENTS:
 		if (s != NULL && *(s + 1) != '\0')
-			return (dump_path_impl(os, child_obj, s + 1));
+			return (dump_path_impl(os, child_obj, s + 1, retobj));
 		/*FALLTHROUGH*/
 	case DMU_OT_PLAIN_FILE_CONTENTS:
-		dump_object(os, child_obj, dump_opt['v'], &header, NULL, 0);
+		if (retobj != NULL) {
 			*retobj = child_obj;
 		} else {
 			dump_object(os, child_obj, dump_opt['v'], &header,
 			    NULL, 0);
 		}
 		return (0);
 	default:
 		(void) fprintf(stderr, "object %llu has non-file/directory "
@ -4558,7 +4566,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
 * Dump the blocks for the object specified by path inside the dataset.
 */
 static int
-dump_path(char *ds, char *path)
+dump_path(char *ds, char *path, uint64_t *retobj)
 {
 	int err;
 	objset_t *os;
@ -4578,12 +4586,89 @@ dump_path(char *ds, char *path)
 	(void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
-	err = dump_path_impl(os, root_obj, path);
+	err = dump_path_impl(os, root_obj, path, retobj);
 	close_objset(os, FTAG);
 	return (err);
 }
 static int
 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
 {
 	int err = 0;
 	uint64_t size, readsize, oursize, offset;
 	ssize_t writesize;
 	sa_handle_t *hdl;
 	(void) printf("Copying object %" PRIu64 " to file %s\n", srcobj,
 	    destfile);
 	VERIFY3P(os, ==, sa_os);
 	if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) {
 		(void) printf("Failed to get handle for SA znode\n");
 		return (err);
 	}
 	if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) {
 		(void) sa_handle_destroy(hdl);
 		return (err);
 	}
 	(void) sa_handle_destroy(hdl);
 	(void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj,
 	    size);
 	if (size == 0) {
 		return (EINVAL);
 	}
 	int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
 	/*
 	 * We cap the size at 1 mebibyte here to prevent
 	 * allocation failures and nigh-infinite printing if the
 	 * object is extremely large.
 	 */
 	oursize = MIN(size, 1 << 20);
 	offset = 0;
 	char *buf = kmem_alloc(oursize, KM_NOSLEEP);
 	if (buf == NULL) {
 		return (ENOMEM);
 	}
 	while (offset < size) {
 		readsize = MIN(size - offset, 1 << 20);
 		err = dmu_read(os, srcobj, offset, readsize, buf, 0);
 		if (err != 0) {
 			(void) printf("got error %u from dmu_read\n", err);
 			kmem_free(buf, oursize);
 			return (err);
 		}
 		if (dump_opt['v'] > 3) {
 			(void) printf("Read offset=%" PRIu64 " size=%" PRIu64
 			    " error=%d\n", offset, readsize, err);
 		}
 		writesize = write(fd, buf, readsize);
 		if (writesize < 0) {
 			err = errno;
 			break;
 		} else if (writesize != readsize) {
 			/* Incomplete write */
 			(void) fprintf(stderr, "Short write, only wrote %llu of"
 			    " %" PRIu64 " bytes, exiting...\n",
 			    (u_longlong_t)writesize, readsize);
 			break;
 		}
 		offset += readsize;
 	}
 	(void) close(fd);
 	if (buf != NULL)
 		kmem_free(buf, oursize);
 	return (err);
 }
 static int
 dump_label(const char *dev)
 {
@ -8167,6 +8252,7 @@ main(int argc, char **argv)
 	nvlist_t *policy = NULL;
 	uint64_t max_txg = UINT64_MAX;
 	int64_t objset_id = -1;
 	uint64_t object;
 	int flags = ZFS_IMPORT_MISSING_LOG;
 	int rewind = ZPOOL_NEVER_REWIND;
 	char *spa_config_path_env, *objset_str;
@ -8195,7 +8281,7 @@ main(int argc, char **argv)
 	zfs_btree_verify_intensity = 3;
 	while ((c = getopt(argc, argv,
-	    "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYyZ")) != -1) {
+	    "AbcCdDeEFGhiI:klLmMo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
@ -8210,6 +8296,7 @@ main(int argc, char **argv)
 		case 'm':
 		case 'M':
 		case 'O':
 		case 'r':
 		case 'R':
 		case 's':
 		case 'S':
@ -8299,7 +8386,7 @@ main(int argc, char **argv)
 		(void) fprintf(stderr, "-p option requires use of -e\n");
 		usage();
 	}
-	if (dump_opt['d']) {
+	if (dump_opt['d'] || dump_opt['r']) {
 		/* <pool>[/<dataset | objset id> is accepted */
 		if (argv[2] && (objset_str = strchr(argv[2], '/')) != NULL &&
 		    objset_str++ != NULL) {
@ -8358,7 +8445,7 @@ main(int argc, char **argv)
 		verbose = MAX(verbose, 1);
 	for (c = 0; c < 256; c++) {
-		if (dump_all && strchr("AeEFklLOPRSXy", c) == NULL)
+		if (dump_all && strchr("AeEFklLOPrRSXy", c) == NULL)
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
@ -8394,7 +8481,13 @@ main(int argc, char **argv)
 		if (argc != 2)
 			usage();
 		dump_opt['v'] = verbose + 3;
-		return (dump_path(argv[0], argv[1]));
+		return (dump_path(argv[0], argv[1], NULL));
 	}
 	if (dump_opt['r']) {
 		if (argc != 3)
 			usage();
 		dump_opt['v'] = verbose;
 		error = dump_path(argv[0], argv[1], &object);
 	}
 	if (dump_opt['X'] || dump_opt['F'])
@ -8572,7 +8665,9 @@ main(int argc, char **argv)
 	argv++;
 	argc--;
-	if (!dump_opt['R']) {
+	if (dump_opt['r']) {
 		error = zdb_copy_object(os, object, argv[1]);
 	} else if (!dump_opt['R']) {
 		flagbits['d'] = ZOR_FLAG_DIRECTORY;
 		flagbits['f'] = ZOR_FLAG_PLAIN_FILE;
 		flagbits['m'] = ZOR_FLAG_SPACE_MAP;
--- a/man/man8/zdb.8
+++ b/man/man8/zdb.8
@ -15,7 +15,7 @@
 .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 .\" Copyright (c) 2017 Intel Corporation.
 .\"
-.Dd April 14, 2019
+.Dd October 7, 2020
 .Dt ZDB 8 SMM
 .Os
 .Sh NAME
@ -60,6 +60,9 @@
 .Fl O
 .Ar dataset path
 .Nm
 .Fl r
 .Ar dataset path destination
 .Nm
 .Fl R
 .Op Fl A
 .Op Fl e Oo Fl V Oc Op Fl p Ar path ...
@ -274,6 +277,19 @@ must be relative to the root of
 This option can be combined with
 .Fl v
 for increasing verbosity.
 .It Fl r Ar dataset path destination
 Copy the specified
 .Ar path
 inside of the
 .Ar dataset
 to the specified destination.
 Specified
 .Ar path
 must be relative to the root of
 .Ar dataset .
 This option can be combined with
 .Fl v
 for increasing verbosity.
 .It Xo
 .Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [<lsize>/]<psize> Ns Op : Ns Ar flags
 .Xc
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@ -119,7 +119,7 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
    'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
    'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
    'zdb_display_block', 'zdb_object_range_neg', 'zdb_object_range_pos',
-    'zdb_objset_id', 'zdb_decompress_zstd']
+    'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
 pre =
 post =
 tags = ['functional', 'cli_root', 'zdb']
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
@ -14,4 +14,6 @@ dist_pkgdata_SCRIPTS = \
 	zdb_object_range_neg.ksh \
 	zdb_object_range_pos.ksh \
 	zdb_display_block.ksh \
-	zdb_objset_id.ksh
+	zdb_objset_id.ksh \
 	zdb_recover.ksh \
 	zdb_recover_2.ksh
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
@ -56,7 +56,7 @@ set -A args "create" "add" "destroy" "import fakepool" \
    "add mirror fakepool" "add raidz fakepool" \
    "add raidz1 fakepool" "add raidz2 fakepool" \
    "setvprop" "blah blah" "-%" "--?" "-*" "-=" \
-    "-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" "-r" \
+    "-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" \
    "-t" "-w" "-z" "-E" "-H" "-I" "-J" "-K" \
    "-N" "-Q" "-R" "-T" "-W"
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh
@ -0,0 +1,55 @@
 #!/bin/ksh
 #
 # This file and its contents are supplied under the terms of the
 # Common Development and Distribution License ("CDDL"), version 1.0.
 # You may only use this file in accordance with the terms of version
 # 1.0 of the CDDL.
 #
 # A full copy of the text of the CDDL should have accompanied this
 # source.  A copy of the CDDL is also available via the Internet at
 # http://www.illumos.org/license/CDDL.
 #
 #
 # Copyright (c) 2021 by Allan Jude.
 #
 . $STF_SUITE/include/libtest.shlib
 #
 # Description:
 # zdb -r <dataset> <path> <destination>
 # Will extract <path> (relative to <dataset>) to the file <destination>
 # Similar to -R, except it does the work for you to find each record
 #
 # Strategy:
 # 1. Create a pool
 # 2. Write some data to a file
 # 3. Extract the file
 # 4. Compare the file to the original
 #
 function cleanup
 {
 	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm $tmpfile
 }
 log_assert "Verify zdb -r <dataset> <path> <dest> extract the correct data."
 log_onexit cleanup
 init_data=$TESTDIR/file1
 tmpfile="$TEST_BASE_DIR/zdb-recover"
 write_count=8
 blksize=131072
 verify_runnable "global"
 verify_disk_count "$DISKS" 2
 default_mirror_setup_noexit $DISKS
 file_write -o create -w -f $init_data -b $blksize -c $write_count
 log_must zpool sync $TESTPOOL
 output=$(zdb -r $TESTPOOL/$TESTFS file1 $tmpfile)
 log_must cmp $init_data $tmpfile
 log_pass "zdb -r <dataset> <path> <dest> extracts the correct data."
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh
@ -0,0 +1,57 @@
 #!/bin/ksh
 #
 # This file and its contents are supplied under the terms of the
 # Common Development and Distribution License ("CDDL"), version 1.0.
 # You may only use this file in accordance with the terms of version
 # 1.0 of the CDDL.
 #
 # A full copy of the text of the CDDL should have accompanied this
 # source.  A copy of the CDDL is also available via the Internet at
 # http://www.illumos.org/license/CDDL.
 #
 #
 # Copyright (c) 2021 by Allan Jude.
 #
 . $STF_SUITE/include/libtest.shlib
 #
 # Description:
 # zdb -r <dataset> <path> <destination>
 # Will extract <path> (relative to <dataset>) to the file <destination>
 # Similar to -R, except it does the work for you to find each record
 #
 # Strategy:
 # 1. Create a pool
 # 2. Write some data to a file
 # 3. Append to the file so it isn't an divisible by 2
 # 4. Extract the file
 # 5. Compare the file to the original
 #
 function cleanup
 {
 	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm $tmpfile
 }
 log_assert "Verify zdb -r <dataset> <path> <dest> extract the correct data."
 log_onexit cleanup
 init_data=$TESTDIR/file1
 tmpfile="$TEST_BASE_DIR/zdb-recover"
 write_count=8
 blksize=131072
 verify_runnable "global"
 verify_disk_count "$DISKS" 2
 default_mirror_setup_noexit $DISKS
 file_write -o create -w -f $init_data -b $blksize -c $write_count
 log_must echo "zfs" >> $init_data
 log_must zpool sync $TESTPOOL
 output=$(zdb -r $TESTPOOL/$TESTFS file1 $tmpfile)
 log_must cmp $init_data $tmpfile
 log_pass "zdb -r <dataset> <path> <dest> extracts the correct data."