diff --git a/cmd/zhack.c b/cmd/zhack.c index 0b6da31ec..44611887d 100644 --- a/cmd/zhack.c +++ b/cmd/zhack.c @@ -58,6 +58,12 @@ static importargs_t g_importargs; static char *g_pool; static boolean_t g_readonly; +typedef enum { + ZHACK_REPAIR_OP_UNKNOWN = 0, + ZHACK_REPAIR_OP_CKSUM = (1 << 0), + ZHACK_REPAIR_OP_UNDETACH = (1 << 1) +} zhack_repair_op_t; + static __attribute__((noreturn)) void usage(void) { @@ -81,7 +87,10 @@ usage(void) " : should be a feature guid\n" "\n" " label repair \n" - " repair corrupted label checksums\n" + " repair labels of a specified device according to options\n" + " which may be combined to do their functions in one call\n" + " -c repair corrupted label checksums\n" + " -u restore the label on a detached device\n" "\n" " : path to vdev\n"); exit(1); @@ -485,23 +494,374 @@ zhack_do_feature(int argc, char **argv) return (0); } +#define ASHIFT_UBERBLOCK_SHIFT(ashift) \ + MIN(MAX(ashift, UBERBLOCK_SHIFT), \ + MAX_UBERBLOCK_SHIFT) +#define ASHIFT_UBERBLOCK_SIZE(ashift) \ + (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift)) + +#define REPAIR_LABEL_STATUS_CKSUM (1 << 0) +#define REPAIR_LABEL_STATUS_UB (1 << 1) + static int -zhack_repair_label_cksum(int argc, char **argv) +zhack_repair_read_label(const int fd, vdev_label_t *vl, + const uint64_t label_offset, const int l) { - zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); + + if (err == -1) { + (void) fprintf(stderr, + "error: cannot read label %d: %s\n", + l, strerror(errno)); + return (err); + } else if (err != sizeof (vdev_label_t)) { + (void) fprintf(stderr, + "error: bad label %d read size\n", l); + return (err); + } + + return (0); +} + +static void +zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, + const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum) +{ + zio_cksum_t verifier; + zio_cksum_t current_cksum; + zio_checksum_info_t *ci; + abd_t *abd; + + ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); + + if (byteswap) + byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); + + current_cksum = eck->zec_cksum; + eck->zec_cksum = verifier; + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + abd = abd_get_from_buf(data, abdsize); + ci->ci_func[byteswap](abd, abdsize, NULL, cksum); + abd_free(abd); + + eck->zec_cksum = current_cksum; +} + +static int +zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys, + const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg, + uint64_t *ashift) +{ + int err; + + if (ub->ub_txg != 0) { + (void) fprintf(stderr, + "error: label %d: UB TXG of 0 expected, but got %" + PRIu64 "\n", + l, ub->ub_txg); + (void) fprintf(stderr, "It would appear the device was not " + "properly removed.\n"); + return (1); + } + + for (int i = 0; i < cfg_keys_len; i++) { + uint64_t val; + err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); + if (err) { + (void) fprintf(stderr, + "error: label %d, %d: " + "cannot find nvlist key %s\n", + l, i, cfg_keys[i]); + return (err); + } + } + + err = nvlist_lookup_nvlist(cfg, + ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg); + if (err) { + (void) fprintf(stderr, + "error: label %d: cannot find nvlist key %s\n", + l, ZPOOL_CONFIG_VDEV_TREE); + return (err); + } + + err = nvlist_lookup_uint64(vdev_tree_cfg, + ZPOOL_CONFIG_ASHIFT, ashift); + if (err) { + (void) fprintf(stderr, + "error: label %d: cannot find nvlist key %s\n", + l, ZPOOL_CONFIG_ASHIFT); + return (err); + } + + if (*ashift == 0) { + (void) fprintf(stderr, + "error: label %d: nvlist key %s is zero\n", + l, ZPOOL_CONFIG_ASHIFT); + return (err); + } + + return (0); +} + +static int +zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l) +{ + /* + * Uberblock root block pointer has valid birth TXG. + * Copying it to the label NVlist + */ + if (ub->ub_rootbp.blk_birth != 0) { + const uint64_t txg = ub->ub_rootbp.blk_birth; + ub->ub_txg = txg; + + if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) { + (void) fprintf(stderr, + "error: label %d: " + "Failed to remove pool creation TXG\n", + l); + return (1); + } + + if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) { + (void) fprintf(stderr, + "error: label %d: Failed to remove pool TXG to " + "be replaced.\n", + l); + return (1); + } + + if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) { + (void) fprintf(stderr, + "error: label %d: " + "Failed to add pool TXG of %" PRIu64 "\n", + l, txg); + return (1); + } + } + + return (0); +} + +static boolean_t +zhack_repair_write_label(const int l, const int fd, const int byteswap, + void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize) +{ + zio_cksum_t actual_cksum; + zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck, + &actual_cksum); + zio_cksum_t expected_cksum = eck->zec_cksum; + ssize_t err; + + if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) + return (B_FALSE); + + eck->zec_cksum = actual_cksum; + + err = pwrite64(fd, data, abdsize, offset); + if (err == -1) { + (void) fprintf(stderr, "error: cannot write label %d: %s\n", + l, strerror(errno)); + return (B_FALSE); + } else if (err != abdsize) { + (void) fprintf(stderr, "error: bad write size label %d\n", l); + return (B_FALSE); + } else { + (void) fprintf(stderr, + "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n", + l, abdsize, offset); + } + + return (B_TRUE); +} + +static void +zhack_repair_write_uberblock(vdev_label_t *vl, const int l, + const uint64_t ashift, const int fd, const int byteswap, + const uint64_t label_offset, uint32_t *labels_repaired) +{ + void *ub_data = + (char *)vl + offsetof(vdev_label_t, vl_uberblock); + zio_eck_t *ub_eck = + (zio_eck_t *) + ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1; + + if (ub_eck->zec_magic != 0) { + (void) fprintf(stderr, + "error: label %d: " + "Expected Uberblock checksum magic number to " + "be 0, but got %" PRIu64 "\n", + l, ub_eck->zec_magic); + (void) fprintf(stderr, "It would appear there's already " + "a checksum for the uberblock.\n"); + return; + } + + + ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; + + if (zhack_repair_write_label(l, fd, byteswap, + ub_data, ub_eck, + label_offset + offsetof(vdev_label_t, vl_uberblock), + ASHIFT_UBERBLOCK_SIZE(ashift))) + labels_repaired[l] |= REPAIR_LABEL_STATUS_UB; +} + +static void +zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum) +{ + (void) fprintf(stream, + "%016llx:%016llx:%016llx:%016llx", + (u_longlong_t)cksum->zc_word[0], + (u_longlong_t)cksum->zc_word[1], + (u_longlong_t)cksum->zc_word[2], + (u_longlong_t)cksum->zc_word[3]); +} + +static int +zhack_repair_test_cksum(const int byteswap, void *vdev_data, + zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l) +{ + const zio_cksum_t expected_cksum = vdev_eck->zec_cksum; + zio_cksum_t actual_cksum; + zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset, + VDEV_PHYS_SIZE, vdev_eck, &actual_cksum); + const uint64_t expected_magic = byteswap ? + BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; + const uint64_t actual_magic = vdev_eck->zec_magic; + int err = 0; + if (actual_magic != expected_magic) { + (void) fprintf(stderr, "error: label %d: " + "Expected " + "the nvlist checksum magic number to not be %" + PRIu64 " not %" PRIu64 "\n", + l, expected_magic, actual_magic); + err = ECKSUM; + } + if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) { + (void) fprintf(stderr, "error: label %d: " + "Expected the nvlist checksum to be ", l); + (void) zhack_repair_print_cksum(stderr, + &expected_cksum); + (void) fprintf(stderr, " not "); + zhack_repair_print_cksum(stderr, &actual_cksum); + (void) fprintf(stderr, "\n"); + err = ECKSUM; + } + return (err); +} + +static void +zhack_repair_one_label(const zhack_repair_op_t op, const int fd, + vdev_label_t *vl, const uint64_t label_offset, const int l, + uint32_t *labels_repaired) +{ + ssize_t err; + uberblock_t *ub = (uberblock_t *)vl->vl_uberblock; + void *vdev_data = + (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); + zio_eck_t *vdev_eck = + (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1; + const uint64_t vdev_phys_offset = + label_offset + offsetof(vdev_label_t, vl_vdev_phys); const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; - boolean_t labels_repaired[VDEV_LABELS] = {0}; - boolean_t repaired = B_FALSE; + nvlist_t *cfg; + nvlist_t *vdev_tree_cfg = NULL; + uint64_t ashift; + int byteswap; + + err = zhack_repair_read_label(fd, vl, label_offset, l); + if (err) + return; + + if (vdev_eck->zec_magic == 0) { + (void) fprintf(stderr, "error: label %d: " + "Expected the nvlist checksum magic number to not be zero" + "\n", + l); + (void) fprintf(stderr, "There should already be a checksum " + "for the label.\n"); + return; + } + + byteswap = + (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)); + + if (byteswap) { + byteswap_uint64_array(&vdev_eck->zec_cksum, + sizeof (zio_cksum_t)); + vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic); + } + + if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 && + zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck, + vdev_phys_offset, l) != 0) { + (void) fprintf(stderr, "It would appear checksums are " + "corrupted. Try zhack repair label -c \n"); + return; + } + + err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, + VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); + if (err) { + (void) fprintf(stderr, + "error: cannot unpack nvlist label %d\n", l); + return; + } + + err = zhack_repair_check_label(ub, + l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift); + if (err) + return; + + if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) { + char *buf; + size_t buflen; + + err = zhack_repair_undetach(ub, cfg, l); + if (err) + return; + + buf = vl->vl_vdev_phys.vp_nvlist; + buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t); + if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) { + (void) fprintf(stderr, + "error: label %d: Failed to pack nvlist\n", l); + return; + } + + zhack_repair_write_uberblock(vl, + l, ashift, fd, byteswap, label_offset, labels_repaired); + } + + if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck, + vdev_phys_offset, VDEV_PHYS_SIZE)) + labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM; + + fsync(fd); +} + +static const char * +zhack_repair_label_status(const uint32_t label_status, + const uint32_t to_check) +{ + return ((label_status & to_check) != 0 ? "repaired" : "skipped"); +} + +static int +zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv) +{ + uint32_t labels_repaired[VDEV_LABELS] = {0}; vdev_label_t labels[VDEV_LABELS] = {{{0}}}; - struct stat st; + struct stat64 st; int fd; + off_t filesize; + uint32_t repaired = 0; abd_init(); - argc -= 1; - argv += 1; - if (argc < 1) { (void) fprintf(stderr, "error: missing device\n"); usage(); @@ -511,93 +871,21 @@ zhack_repair_label_cksum(int argc, char **argv) fatal(NULL, FTAG, "cannot open '%s': %s", argv[0], strerror(errno)); - if (stat(argv[0], &st) != 0) + if (fstat64_blk(fd, &st) != 0) fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0], strerror(errno)); + filesize = st.st_size; + (void) fprintf(stderr, "Calculated filesize to be %jd\n", + (intmax_t)filesize); + + if (filesize % sizeof (vdev_label_t) != 0) + filesize = + (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t); + for (int l = 0; l < VDEV_LABELS; l++) { - uint64_t label_offset, offset; - zio_cksum_t expected_cksum; - zio_cksum_t actual_cksum; - zio_cksum_t verifier; - zio_eck_t *eck; - nvlist_t *cfg; - int byteswap; - uint64_t val; - ssize_t err; - - vdev_label_t *vl = &labels[l]; - - label_offset = vdev_label_offset(st.st_size, l, 0); - err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); - if (err == -1) { - (void) fprintf(stderr, "error: cannot read " - "label %d: %s\n", l, strerror(errno)); - continue; - } else if (err != sizeof (vdev_label_t)) { - (void) fprintf(stderr, "error: bad label %d read size " - "\n", l); - continue; - } - - err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, - VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); - if (err) { - (void) fprintf(stderr, "error: cannot unpack nvlist " - "label %d\n", l); - continue; - } - - for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { - err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); - if (err) { - (void) fprintf(stderr, "error: label %d: " - "cannot find nvlist key %s\n", - l, cfg_keys[i]); - continue; - } - } - - void *data = (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); - eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1; - - offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys); - ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); - - byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); - if (byteswap) - byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); - - expected_cksum = eck->zec_cksum; - eck->zec_cksum = verifier; - - abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE); - ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum); - abd_free(abd); - - if (byteswap) - byteswap_uint64_array(&expected_cksum, - sizeof (zio_cksum_t)); - - if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) - continue; - - eck->zec_cksum = actual_cksum; - - err = pwrite64(fd, data, VDEV_PHYS_SIZE, offset); - if (err == -1) { - (void) fprintf(stderr, "error: cannot write " - "label %d: %s\n", l, strerror(errno)); - continue; - } else if (err != VDEV_PHYS_SIZE) { - (void) fprintf(stderr, "error: bad write size " - "label %d\n", l); - continue; - } - - fsync(fd); - - labels_repaired[l] = B_TRUE; + zhack_repair_one_label(op, fd, &labels[l], + vdev_label_offset(filesize, l, 0), l, labels_repaired); } close(fd); @@ -605,17 +893,51 @@ zhack_repair_label_cksum(int argc, char **argv) abd_fini(); for (int l = 0; l < VDEV_LABELS; l++) { - (void) printf("label %d: %s\n", l, - labels_repaired[l] ? "repaired" : "skipped"); - repaired |= labels_repaired[l]; + const uint32_t lr = labels_repaired[l]; + (void) printf("label %d: ", l); + (void) printf("uberblock: %s ", + zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB)); + (void) printf("checksum: %s\n", + zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM)); + repaired |= lr; } - if (repaired) + if (repaired > 0) return (0); return (1); } +static int +zhack_do_label_repair(int argc, char **argv) +{ + zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN; + int c; + + optind = 1; + while ((c = getopt(argc, argv, "+cu")) != -1) { + switch (c) { + case 'c': + op |= ZHACK_REPAIR_OP_CKSUM; + break; + case 'u': + op |= ZHACK_REPAIR_OP_UNDETACH; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (op == ZHACK_REPAIR_OP_UNKNOWN) + op = ZHACK_REPAIR_OP_CKSUM; + + return (zhack_label_repair(op, argc, argv)); +} + static int zhack_do_label(int argc, char **argv) { @@ -632,7 +954,7 @@ zhack_do_label(int argc, char **argv) subcommand = argv[0]; if (strcmp(subcommand, "repair") == 0) { - err = zhack_repair_label_cksum(argc, argv); + err = zhack_do_label_repair(argc, argv); } else { (void) fprintf(stderr, "error: unknown subcommand: %s\n", subcommand); diff --git a/man/man1/zhack.1 b/man/man1/zhack.1 index 26b8156b4..937f1e916 100644 --- a/man/man1/zhack.1 +++ b/man/man1/zhack.1 @@ -98,10 +98,29 @@ feature is now required to read the pool MOS. .It Xo .Nm zhack .Cm label repair +.Op Fl cu .Ar device .Xc -Repair corrupted labels by rewriting the checksum using the presumed valid -contents of the label. +Repair labels of a specified +.Ar device +according to options. +.Pp +Flags may be combined to do their functions simultaneously. +. +.Pp +The +.Fl c +flag repairs corrupted label checksums +. +.Pp +The +.Fl u +flag restores the label on a detached device +.Pp +Example: +.Nm zhack Cm label repair Fl cu Ar device + Fix checksums and undetach a device +. .El . .Sh GLOBAL OPTIONS diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 55991cfea..3730f2b27 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -325,7 +325,8 @@ tests = ['zfs_wait_deleteq', 'zfs_wait_getsubopt'] tags = ['functional', 'cli_root', 'zfs_wait'] [tests/functional/cli_root/zhack] -tests = ['zhack_label_checksum'] +tests = ['zhack_label_repair_001', 'zhack_label_repair_002', + 'zhack_label_repair_003', 'zhack_label_repair_004'] pre = post = tags = ['functional', 'cli_root', 'zhack'] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 74295b86d..0112d28d0 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -250,6 +250,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/cli_root/zpool_upgrade/zpool_upgrade.cfg \ functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib \ functional/cli_root/zpool_wait/zpool_wait.kshlib \ + functional/cli_root/zhack/library.kshlib \ functional/cli_user/misc/misc.cfg \ functional/cli_user/zfs_list/zfs_list.cfg \ functional/cli_user/zfs_list/zfs_list.kshlib \ @@ -932,7 +933,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs/zfs_001_neg.ksh \ functional/cli_root/zfs/zfs_002_pos.ksh \ functional/cli_root/zfs/zfs_003_neg.ksh \ - functional/cli_root/zhack/zhack_label_checksum.ksh \ + functional/cli_root/zhack/zhack_label_repair_001.ksh \ + functional/cli_root/zhack/zhack_label_repair_002.ksh \ + functional/cli_root/zhack/zhack_label_repair_003.ksh \ + functional/cli_root/zhack/zhack_label_repair_004.ksh \ functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \ functional/cli_root/zpool_add/add-o_ashift.ksh \ functional/cli_root/zpool_add/add_prop_ashift.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib b/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib new file mode 100644 index 000000000..880a78861 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib @@ -0,0 +1,361 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2021 by vStack. All rights reserved. +# + +. "$STF_SUITE"/include/libtest.shlib +. "$STF_SUITE"/include/blkdev.shlib + +# +# Description: +# +# Test whether zhack label repair commands can recover detached devices +# and corrupted checksums with a variety of sizes, and ensure +# the purposes of either command is cleanly separated from the others. +# +# Strategy: +# +# Tests are done on loopback devices with sizes divisible by label size and sizes that are not. +# +# Test one: +# +# 1. Create pool on a loopback device with some test data +# 2. Export the pool. +# 3. Corrupt all label checksums in the pool +# 4. Check that pool cannot be imported +# 5. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 6. Use zhack label repair -c on device +# 7. Check that pool can be imported and that data is intact +# +# Test two: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Verify that the remaining detached device cannot be imported +# 6. Verify that it cannot be imported after using zhack label repair -c +# to ensure that the -c option will not undetach a device. +# 7. Use zhack label repair -u on device +# 8. Verify that the detached device can be imported and that data is intact +# +# Test three: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 8. Verify that it cannot be imported after using zhack label repair -c +# -c should repair the checksums, but not undetach a device. +# 9. Use zhack label repair -u on device +# 10. Verify that the detached device can be imported and that data is intact +# +# Test four: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Use zhack label repair -cu on device to attempt to fix checksums and +# undetach the device in a single operation. +# 8. Verify that the detached device can be imported and that data is intact +# + +log_assert "Verify zhack label repair will repair label checksums and uberblocks" +log_onexit cleanup + +LABEL_SIZE="$((2**18))" +LABEL_NVLIST_END="$((LABEL_SIZE / 2))" +LABEL_CKSUM_SIZE="32" +LABEL_CKSUM_START="$(( LABEL_NVLIST_END - LABEL_CKSUM_SIZE ))" + +VIRTUAL_DISK=$TEST_BASE_DIR/disk +VIRTUAL_MIRROR_DISK=$TEST_BASE_DIR/mirrordisk + +VIRTUAL_DEVICE= +VIRTUAL_MIRROR_DEVICE= + +function cleanup_lo +{ + L_DEVICE="$1" + + if [[ -e $L_DEVICE ]]; then + if is_linux; then + log_must losetup -d "$L_DEVICE" + elif is_freebsd; then + log_must mdconfig -d -u "$L_DEVICE" + else + log_must lofiadm -d "$L_DEVICE" + fi + fi +} + +function cleanup +{ + poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL" + cleanup_lo "$VIRTUAL_DEVICE" + cleanup_lo "$VIRTUAL_MIRROR_DEVICE" + VIRTUAL_DEVICE= + VIRTUAL_MIRROR_DEVICE= + [[ -f "$VIRTUAL_DISK" ]] && log_must rm "$VIRTUAL_DISK" + [[ -f "$VIRTUAL_MIRROR_DISK" ]] && log_must rm "$VIRTUAL_MIRROR_DISK" +} + +RAND_MAX="$((2**15 - 1))" +function get_devsize +{ + if [ "$RANDOM" -gt "$(( RAND_MAX / 2 ))" ]; then + echo "$(( MINVDEVSIZE + RANDOM ))" + else + echo "$MINVDEVSIZE" + fi +} + +function pick_logop +{ + L_SHOULD_SUCCEED="$1" + + l_logop="log_mustnot" + if [ "$L_SHOULD_SUCCEED" == true ]; then + l_logop="log_must" + fi + + echo "$l_logop" +} + +function check_dataset +{ + L_SHOULD_SUCCEED="$1" + L_LOGOP="$(pick_logop "$L_SHOULD_SUCCEED")" + + "$L_LOGOP" mounted "$TESTPOOL"/"$TESTFS" + + "$L_LOGOP" test -f "$TESTDIR"/"test" +} + +function setup_dataset +{ + log_must zfs create "$TESTPOOL"/"$TESTFS" + + log_must mkdir -p "$TESTDIR" + log_must zfs set mountpoint="$TESTDIR" "$TESTPOOL"/"$TESTFS" + + log_must mounted "$TESTPOOL"/"$TESTFS" + + log_must touch "$TESTDIR"/"test" + log_must test -f "$TESTDIR"/"test" + + log_must zpool sync "$TESTPOOL" + + check_dataset true +} + +function get_practical_size +{ + L_SIZE="$1" + + if [ "$((L_SIZE % LABEL_SIZE))" -ne 0 ]; then + echo "$(((L_SIZE / LABEL_SIZE) * LABEL_SIZE))" + else + echo "$L_SIZE" + fi +} + +function corrupt_sized_label_checksum +{ + L_SIZE="$1" + L_LABEL="$2" + L_DEVICE="$3" + + L_PRACTICAL_SIZE="$(get_practical_size "$L_SIZE")" + + typeset -a L_OFFSETS=("$LABEL_CKSUM_START" \ + "$((LABEL_SIZE + LABEL_CKSUM_START))" \ + "$(((L_PRACTICAL_SIZE - LABEL_SIZE*2) + LABEL_CKSUM_START))" \ + "$(((L_PRACTICAL_SIZE - LABEL_SIZE) + LABEL_CKSUM_START))") + + dd if=/dev/urandom of="$L_DEVICE" \ + seek="${L_OFFSETS["$L_LABEL"]}" bs=1 count="$LABEL_CKSUM_SIZE" \ + conv=notrunc +} + +function corrupt_labels +{ + L_SIZE="$1" + L_DISK="$2" + + corrupt_sized_label_checksum "$L_SIZE" 0 "$L_DISK" + corrupt_sized_label_checksum "$L_SIZE" 1 "$L_DISK" + corrupt_sized_label_checksum "$L_SIZE" 2 "$L_DISK" + corrupt_sized_label_checksum "$L_SIZE" 3 "$L_DISK" +} + +function try_import_and_repair +{ + L_REPAIR_SHOULD_SUCCEED="$1" + L_IMPORT_SHOULD_SUCCEED="$2" + L_OP="$3" + L_POOLDISK="$4" + L_REPAIR_LOGOP="$(pick_logop "$L_REPAIR_SHOULD_SUCCEED")" + L_IMPORT_LOGOP="$(pick_logop "$L_IMPORT_SHOULD_SUCCEED")" + + log_mustnot zpool import "$TESTPOOL" -d "$L_POOLDISK" + + "$L_REPAIR_LOGOP" zhack label repair "$L_OP" "$L_POOLDISK" + + "$L_IMPORT_LOGOP" zpool import "$TESTPOOL" -d "$L_POOLDISK" + + check_dataset "$L_IMPORT_SHOULD_SUCCEED" +} + +function prepare_vdev +{ + L_SIZE="$1" + L_BACKFILE="$2" + + l_devname= + if truncate -s "$L_SIZE" "$L_BACKFILE"; then + if is_linux; then + l_devname="$(losetup -f "$L_BACKFILE" --show)" + elif is_freebsd; then + l_devname=/dev/"$(mdconfig -a -t vnode -f "$L_BACKFILE")" + else + l_devname="$(lofiadm -a "$L_BACKFILE")" + fi + fi + echo "$l_devname" +} + +function run_test_one +{ + L_SIZE="$1" + + VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")" + log_must test -e "$VIRTUAL_DEVICE" + + log_must zpool create "$TESTPOOL" "$VIRTUAL_DEVICE" + + setup_dataset + + log_must zpool export "$TESTPOOL" + + corrupt_labels "$L_SIZE" "$VIRTUAL_DISK" + + try_import_and_repair false false "-u" "$VIRTUAL_DEVICE" + + try_import_and_repair true true "-c" "$VIRTUAL_DEVICE" + + cleanup + + log_pass "zhack label repair corruption test passed with a randomized size of $L_SIZE" +} + +function make_mirrored_pool +{ + L_SIZE="$1" + + VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")" + log_must test -e "$VIRTUAL_DEVICE" + VIRTUAL_MIRROR_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_MIRROR_DISK")" + log_must test -e "$VIRTUAL_MIRROR_DEVICE" + + log_must zpool create "$TESTPOOL" "$VIRTUAL_DEVICE" + log_must zpool attach "$TESTPOOL" "$VIRTUAL_DEVICE" "$VIRTUAL_MIRROR_DEVICE" +} + +function export_and_cleanup_vdisk +{ + log_must zpool export "$TESTPOOL" + + cleanup_lo "$VIRTUAL_DEVICE" + + VIRTUAL_DEVICE= + + log_must rm "$VIRTUAL_DISK" +} + +function run_test_two +{ + L_SIZE="$1" + + make_mirrored_pool "$L_SIZE" + + setup_dataset + + log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE" + + export_and_cleanup_vdisk + + try_import_and_repair false false "-c" "$VIRTUAL_MIRROR_DEVICE" + + try_import_and_repair true true "-u" "$VIRTUAL_MIRROR_DEVICE" + + cleanup + + log_pass "zhack label repair detached test passed with a randomized size of $L_SIZE" +} + +function run_test_three +{ + L_SIZE="$1" + + make_mirrored_pool "$L_SIZE" + + setup_dataset + + log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE" + + export_and_cleanup_vdisk + + corrupt_labels "$L_SIZE" "$VIRTUAL_MIRROR_DISK" + + try_import_and_repair false false "-u" "$VIRTUAL_MIRROR_DEVICE" + + try_import_and_repair true false "-c" "$VIRTUAL_MIRROR_DEVICE" + + try_import_and_repair true true "-u" "$VIRTUAL_MIRROR_DEVICE" + + cleanup + + log_pass "zhack label repair corruption and detached test passed with a randomized size of $L_SIZE" +} + +function run_test_four +{ + L_SIZE="$1" + + make_mirrored_pool "$L_SIZE" + + setup_dataset + + log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE" + + export_and_cleanup_vdisk + + corrupt_labels "$L_SIZE" "$VIRTUAL_MIRROR_DISK" + + try_import_and_repair true true "-cu" "$VIRTUAL_MIRROR_DEVICE" + + cleanup + + log_pass "zhack label repair corruption and detached single-command test passed with a randomized size of $L_SIZE." +} diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh deleted file mode 100755 index 67c7e7c44..000000000 --- a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/ksh - -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# - -# -# Copyright (c) 2021 by vStack. All rights reserved. -# - -. $STF_SUITE/include/libtest.shlib -. $STF_SUITE/include/blkdev.shlib - -# -# Description: -# zhack label repair will calculate and rewrite label checksum if invalid -# -# Strategy: -# 1. Create pool with some number of vdevs and export it -# 2. Corrupt all labels checksums -# 3. Check that pool cannot be imported -# 4. Use zhack to repair labels checksums -# 5. Check that pool can be imported -# - -log_assert "Verify zhack label repair will repair labels checksums" -log_onexit cleanup - -VIRTUAL_DISK=$TEST_BASE_DIR/disk - -function cleanup -{ - poolexists $TESTPOOL && destroy_pool $TESTPOOL - [[ -f $VIRTUAL_DISK ]] && log_must rm $VIRTUAL_DISK -} - -log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK - -log_must zpool create $TESTPOOL $VIRTUAL_DISK -log_must zpool export $TESTPOOL - -log_mustnot zhack label repair $VIRTUAL_DISK - -corrupt_label_checksum 0 $VIRTUAL_DISK -corrupt_label_checksum 1 $VIRTUAL_DISK -corrupt_label_checksum 2 $VIRTUAL_DISK -corrupt_label_checksum 3 $VIRTUAL_DISK - -log_mustnot zpool import $TESTPOOL -d $TEST_BASE_DIR - -log_must zhack label repair $VIRTUAL_DISK - -log_must zpool import $TESTPOOL -d $TEST_BASE_DIR - -cleanup - -log_pass "zhack label repair works correctly." diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh new file mode 100755 index 000000000..2a511e9ef --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover +# corrupted checksums on devices of varied size, +# but not undetached devices. +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Export the pool. +# 3. Corrupt all label checksums in the pool +# 4. Check that pool cannot be imported +# 5. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 6. Use zhack label repair -c on device +# 7. Check that pool can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_one "$(get_devsize)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh new file mode 100755 index 000000000..4f1e61a39 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover +# detached drives on devices of varied size, but not +# repair corrupted checksums. +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Verify that the remaining detached device cannot be imported +# 6. Verify that it cannot be imported after using zhack label repair -c +# to ensure that the -c option will not undetach a device. +# 7. Use zhack label repair -u on device +# 8. Verify that the detached device can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_two "$(get_devsize)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh new file mode 100755 index 000000000..7e82363d2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh @@ -0,0 +1,33 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover a device of varied size with +# corrupted checksums and which has been detached. +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 8. Verify that it cannot be imported after using zhack label repair -c +# -c should repair the checksums, but not undetach a device. +# 9. Use zhack label repair -u on device +# 10. Verify that the detached device can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_three "$(get_devsize)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh new file mode 100755 index 000000000..0b739402b --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover a device of varied size with +# corrupted checksums and which has been detached (in one command). +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Use zhack label repair -cu on device to attempt to fix checksums and +# undetach the device in a single operation. +# 8. Verify that the detached device can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_four "$(get_devsize)"