diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 05891f84a..92bfd3ecf 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2409,7 +2409,7 @@ static const char *objset_types[DMU_OST_NUMTYPES] = { static void dump_dir(objset_t *os) { - dmu_objset_stats_t dds; + dmu_objset_stats_t dds = { 0 }; uint64_t object, object_count; uint64_t refdbytes, usedobjs, scratch; char numbuf[32]; @@ -5447,9 +5447,9 @@ dump_zpool(spa_t *spa) #define ZDB_FLAG_BSWAP 0x0004 #define ZDB_FLAG_GBH 0x0008 #define ZDB_FLAG_INDIRECT 0x0010 -#define ZDB_FLAG_PHYS 0x0020 -#define ZDB_FLAG_RAW 0x0040 -#define ZDB_FLAG_PRINT_BLKPTR 0x0080 +#define ZDB_FLAG_RAW 0x0020 +#define ZDB_FLAG_PRINT_BLKPTR 0x0040 +#define ZDB_FLAG_VERBOSE 0x0080 static int flagbits[256]; @@ -5580,11 +5580,30 @@ name: return (NULL); } +static boolean_t +zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize) +{ + char *s0, *s1; + + if (sizes == NULL) + return (B_FALSE); + + s0 = strtok(sizes, "/"); + if (s0 == NULL) + return (B_FALSE); + s1 = strtok(NULL, "/"); + *lsize = strtoull(s0, NULL, 16); + *psize = s1 ? strtoull(s1, NULL, 16) : *lsize; + return (*lsize >= *psize && *psize > 0); +} + +#define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg)) + /* * Read a block from a pool and print it out. The syntax of the * block descriptor is: * - * pool:vdev_specifier:offset:size[:flags] + * pool:vdev_specifier:offset:[lsize/]psize[:flags] * * pool - The name of the pool you wish to read from * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) @@ -5597,8 +5616,8 @@ name: * e: Byteswap data before dumping * g: Display data as a gang block header * i: Display as an indirect block - * p: Do I/O to physical offset * r: Dump raw data to stdout + * v: Verbose * */ static void @@ -5607,13 +5626,12 @@ zdb_read_block(char *thing, spa_t *spa) blkptr_t blk, *bp = &blk; dva_t *dva = bp->blk_dva; int flags = 0; - uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; + uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0; zio_t *zio; vdev_t *vd; abd_t *pabd; void *lbuf, *buf; - const char *s, *vdev; - char *p, *dup, *flagstr; + char *s, *p, *dup, *vdev, *flagstr, *sizes; int i, error; boolean_t borrowed = B_FALSE; @@ -5622,18 +5640,14 @@ zdb_read_block(char *thing, spa_t *spa) vdev = s ? s : ""; s = strtok(NULL, ":"); offset = strtoull(s ? s : "", NULL, 16); + sizes = strtok(NULL, ":"); s = strtok(NULL, ":"); - size = strtoull(s ? s : "", NULL, 16); - s = strtok(NULL, ":"); - if (s) - flagstr = strdup(s); - else - flagstr = strdup(""); + flagstr = strdup(s ? s : ""); s = NULL; - if (size == 0) - s = "size must not be zero"; - if (!IS_P2ALIGNED(size, DEV_BSIZE)) + if (!zdb_parse_block_sizes(sizes, &lsize, &psize)) + s = "invalid size(s)"; + if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE)) s = "size must be a multiple of sector size"; if (!IS_P2ALIGNED(offset, DEV_BSIZE)) s = "offset must be a multiple of sector size"; @@ -5689,9 +5703,6 @@ zdb_read_block(char *thing, spa_t *spa) vd->vdev_ops->vdev_op_type); } - psize = size; - lsize = size; - pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); @@ -5748,30 +5759,41 @@ zdb_read_block(char *thing, spa_t *spa) * We don't know how the data was compressed, so just try * every decompress function at every inflated blocksize. */ - enum zio_compress c; void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); + int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 }; + int *cfuncp = cfuncs; + uint64_t maxlsize = SPA_MAXBLOCKSIZE; + uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) | + ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) | + (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0); + *cfuncp++ = ZIO_COMPRESS_LZ4; + *cfuncp++ = ZIO_COMPRESS_LZJB; + mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); + for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) + if (((1ULL << c) & mask) == 0) + *cfuncp++ = c; /* - * XXX - On the one hand, with SPA_MAXBLOCKSIZE at 16MB, - * this could take a while and we should let the user know + * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this + * could take a while and we should let the user know * we are not stuck. On the other hand, printing progress - * info gets old after a while. What to do? + * info gets old after a while. User can specify 'v' flag + * to see the progression. */ - for (lsize = psize + SPA_MINBLOCKSIZE; - lsize <= SPA_MAXBLOCKSIZE; lsize += SPA_MINBLOCKSIZE) { - for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { - /* - * ZLE can easily decompress non zle stream. - * So have an option to disable it. - */ - if (c == ZIO_COMPRESS_ZLE && - getenv("ZDB_NO_ZLE")) - continue; - - (void) fprintf(stderr, - "Trying %05llx -> %05llx (%s)\n", - (u_longlong_t)psize, (u_longlong_t)lsize, - zio_compress_table[c].ci_name); + if (lsize == psize) + lsize += SPA_MINBLOCKSIZE; + else + maxlsize = lsize; + for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { + for (cfuncp = cfuncs; *cfuncp; cfuncp++) { + if (flags & ZDB_FLAG_VERBOSE) { + (void) fprintf(stderr, + "Trying %05llx -> %05llx (%s)\n", + (u_longlong_t)psize, + (u_longlong_t)lsize, + zio_compress_table[*cfuncp].\ + ci_name); + } /* * We randomize lbuf2, and decompress to both @@ -5780,27 +5802,30 @@ zdb_read_block(char *thing, spa_t *spa) */ VERIFY0(random_get_pseudo_bytes(lbuf2, lsize)); - if (zio_decompress_data(c, pabd, + if (zio_decompress_data(*cfuncp, pabd, lbuf, psize, lsize) == 0 && - zio_decompress_data(c, pabd, + zio_decompress_data(*cfuncp, pabd, lbuf2, psize, lsize) == 0 && bcmp(lbuf, lbuf2, lsize) == 0) break; } - if (c != ZIO_COMPRESS_FUNCTIONS) + if (*cfuncp != 0) break; } umem_free(lbuf2, SPA_MAXBLOCKSIZE); - if (lsize > SPA_MAXBLOCKSIZE) { + if (lsize > maxlsize) { (void) printf("Decompress of %s failed\n", thing); goto out; } buf = lbuf; - size = lsize; + if (*cfuncp == ZIO_COMPRESS_ZLE) { + printf("\nZLE decompression was selected. If you " + "suspect the results are wrong,\ntry avoiding ZLE " + "by setting and exporting ZDB_NO_ZLE=\"true\"\n"); + } } else { - size = psize; - buf = abd_borrow_buf_copy(pabd, size); + buf = abd_borrow_buf_copy(pabd, lsize); borrowed = B_TRUE; } @@ -5808,14 +5833,14 @@ zdb_read_block(char *thing, spa_t *spa) zdb_print_blkptr((blkptr_t *)(void *) ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); else if (flags & ZDB_FLAG_RAW) - zdb_dump_block_raw(buf, size, flags); + zdb_dump_block_raw(buf, lsize, flags); else if (flags & ZDB_FLAG_INDIRECT) - zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t), + zdb_dump_indirect((blkptr_t *)buf, lsize / sizeof (blkptr_t), flags); else if (flags & ZDB_FLAG_GBH) zdb_dump_gbh(buf, flags); else - zdb_dump_block(thing, buf, size, flags); + zdb_dump_block(thing, buf, lsize, flags); /* * If :c was specified, iterate through the checksum table to @@ -5879,7 +5904,7 @@ zdb_read_block(char *thing, spa_t *spa) } if (borrowed) - abd_return_buf_copy(pabd, buf, size); + abd_return_buf_copy(pabd, buf, lsize); out: abd_free(pabd); @@ -6294,8 +6319,8 @@ main(int argc, char **argv) flagbits['e'] = ZDB_FLAG_BSWAP; flagbits['g'] = ZDB_FLAG_GBH; flagbits['i'] = ZDB_FLAG_INDIRECT; - flagbits['p'] = ZDB_FLAG_PHYS; flagbits['r'] = ZDB_FLAG_RAW; + flagbits['v'] = ZDB_FLAG_VERBOSE; for (int i = 0; i < argc; i++) zdb_read_block(argv[i], spa); diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index 4f74c4b26..e907d03fe 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -63,7 +63,7 @@ .Op Fl A .Op Fl e Oo Fl V Oc Op Fl p Ar path ... .Op Fl U Ar cache -.Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags +.Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [/] Ns Op : Ns Ar flags .Nm .Fl S .Op Fl AP @@ -227,7 +227,7 @@ This option can be combined with .Fl v for increasing verbosity. .It Xo -.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags +.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [/] Ns Op : Ns Ar flags .Xc Read and display a block from the specified device. By default the block is displayed as a hex dump, but see the description of the @@ -240,8 +240,8 @@ The block is specified in terms of a colon-separated tuple .Ar offset .Pq the offset within the vdev .Ar size -.Pq the size of the block to read -and, optionally, +.Pq the physical size, or logical size / physical size +of the block to read and, optionally, .Ar flags .Pq a set of flags, described below . .Pp @@ -262,6 +262,8 @@ Dump gang block header Dump indirect block .It Sy r Dump raw uninterpreted block data +.It Sy v +Verbose output for guessing compression algorithm .El .It Fl s Report statistics on diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index cdaade27c..01c51347f 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -159,7 +159,6 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, * the checksum. However, for extra protection (e.g. against bitflips * in non-ECC RAM), we handle this error (and test it). */ - ASSERT0(ret); if (zio_decompress_fail_fraction != 0 && spa_get_random(zio_decompress_fail_fraction) == 0) ret = SET_ERROR(EINVAL); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 916631e41..6579cc748 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -106,7 +106,7 @@ tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_001_neg', 'zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', - 'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum'] + 'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum', 'zdb_decompress'] pre = post = tags = ['functional', 'cli_root', 'zdb'] diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am index 0c4de2b25..9f143078f 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am @@ -6,4 +6,5 @@ dist_pkgdata_SCRIPTS = \ zdb_004_pos.ksh \ zdb_005_pos.ksh \ zdb_006_pos.ksh \ - zdb_checksum.ksh + zdb_checksum.ksh \ + zdb_decompress.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh new file mode 100755 index 000000000..0e468d7c9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# zdb -R pool :d will display the correct data and length +# +# Strategy: +# 1. Create a pool, set compression to lzjb +# 2. Write some identifiable data to a file +# 3. Run zdb -ddddddbbbbbb against the file +# 4. Record the DVA, lsize, and psize of L0 block 0 +# 5. Run zdb -R with :d flag and match the data +# 6. Run zdb -R with :dr flags and match the lsize/psize +# 7. Run zdb -R with :dr flags and match the lsize +# 8. Run zdb -R with :dr flags and match the psize +# + + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Verify zdb -R :d flag (decompress) works as expected" +log_onexit cleanup +init_data=$TESTDIR/file1 +write_count=256 +blksize=4096 +pattern="_match__pattern_" +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS +log_must zfs set recordsize=$blksize $TESTPOOL/$TESTFS +log_must zfs set compression=lzjb $TESTPOOL/$TESTFS + +# 16 chars 256 times = 4k = block size +typeset four_k="" +for i in {1..$write_count} +do + four_k=$four_k$pattern +done + +# write the 4k block 256 times +for i in {1..$write_count} +do + echo $four_k >> $init_data +done + +sync_pool $TESTPOOL true + +# get object number of file +listing=$(ls -i $init_data) +set -A array $listing +obj=${array[0]} +log_note "file $init_data has object number $obj" + +output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \ + |grep -m 1 "L0 DVA" |head -n1) +dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output") +log_note "block 0 of $init_data has a DVA of $dva" + +# use the length reported by zdb -ddddddbbbbbb +size_str=$(grep -oP 'size=\K.*?(?= )' <<< "$output") +log_note "block size $size_str" + +vdev=$(echo "$dva" |awk '{split($0,array,":")} END{print array[1]}') +offset=$(echo "$dva" |awk '{split($0,array,":")} END{print array[2]}') +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:d 2> /dev/null) +echo $output |grep $pattern > /dev/null +(( $? != 0 )) && log_fail "zdb -R :d failed to decompress the data properly" + +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null) +echo $output |grep $four_k > /dev/null +(( $? != 0 )) && log_fail "zdb -R :dr failed to decompress the data properly" + +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null) +result=${#output} +(( $result != $blksize)) && log_fail \ +"zdb -R failed to decompress the data to the length (${#output} != $size_str)" + +# decompress using lsize +lsize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[1]}') +psize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[2]}') +output=$(zdb -R $TESTPOOL $vdev:$offset:$lsize:dr 2> /dev/null) +result=${#output} +(( $result != $blksize)) && log_fail \ +"zdb -R failed to decompress the data (length ${#output} != $blksize)" + +# Specifying psize will decompress successfully , but not always to full +# lsize since zdb has to guess lsize incrementally. +output=$(zdb -R $TESTPOOL $vdev:$offset:$psize:dr 2> /dev/null) +result=${#output} +# convert psize to decimal +psize_orig=$psize +psize=${psize%?} +psize=$((16#$psize)) +(( $result < $psize)) && log_fail \ +"zdb -R failed to decompress the data with psize $psize_orig\ + (length ${#output} < $psize)" + +log_pass "zdb -R :d flag (decompress) works as expected"