From ccf89b39fe7f30dd53aec69e04de3f2728c7387c Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Fri, 24 Jun 2022 14:28:42 -0600 Subject: [PATCH] Add a "zstream decompress" subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It can be used to repair a ZFS file system corrupted by ZFS bug #12762. Use it like this: zfs send -c | \ zstream decompress ,[,] ... | \ zfs recv Reviewed-by: Ahelenia Ziemiańska Reviewed-by: Brian Behlendorf Reviewed-by: Allan Jude Signed-off-by: Alan Somers Sponsored-by: Axcient Workaround for #12762 Closes #13256 --- cmd/zstream/Makefile.am | 2 + cmd/zstream/zstream.c | 4 + cmd/zstream/zstream.h | 4 + cmd/zstream/zstream_decompress.c | 359 +++++++++++++++++++++++++++++++ cmd/zstream/zstream_dump.c | 2 +- cmd/zstream/zstream_redup.c | 4 +- man/man8/zstream.8 | 55 ++++- 7 files changed, 425 insertions(+), 5 deletions(-) create mode 100644 cmd/zstream/zstream_decompress.c diff --git a/cmd/zstream/Makefile.am b/cmd/zstream/Makefile.am index 9b2716ae0..9ae33179e 100644 --- a/cmd/zstream/Makefile.am +++ b/cmd/zstream/Makefile.am @@ -4,6 +4,7 @@ CPPCHECKTARGETS += zstream zstream_SOURCES = \ %D%/zstream.c \ %D%/zstream.h \ + %D%/zstream_decompress.c \ %D%/zstream_dump.c \ %D%/zstream_redup.c \ %D%/zstream_token.c @@ -11,6 +12,7 @@ zstream_SOURCES = \ zstream_LDADD = \ libzfs.la \ libzfs_core.la \ + libzpool.la \ libnvpair.la PHONY += install-exec-hook diff --git a/cmd/zstream/zstream.c b/cmd/zstream/zstream.c index a228f45fa..eeceba247 100644 --- a/cmd/zstream/zstream.c +++ b/cmd/zstream/zstream.c @@ -40,6 +40,8 @@ zstream_usage(void) "\tzstream dump [-vCd] FILE\n" "\t... | zstream dump [-vCd]\n" "\n" + "\tzstream decompress [-v] [OBJECT,OFFSET[,TYPE]] ...\n" + "\n" "\tzstream token resume_token\n" "\n" "\tzstream redup [-v] FILE | ...\n"); @@ -61,6 +63,8 @@ main(int argc, char *argv[]) if (strcmp(subcommand, "dump") == 0) { return (zstream_do_dump(argc - 1, argv + 1)); + } else if (strcmp(subcommand, "decompress") == 0) { + return (zstream_do_decompress(argc - 1, argv + 1)); } else if (strcmp(subcommand, "token") == 0) { return (zstream_do_token(argc - 1, argv + 1)); } else if (strcmp(subcommand, "redup") == 0) { diff --git a/cmd/zstream/zstream.h b/cmd/zstream/zstream.h index 319fecb28..931d4e13f 100644 --- a/cmd/zstream/zstream.h +++ b/cmd/zstream/zstream.h @@ -24,8 +24,12 @@ extern "C" { #endif +extern void *safe_calloc(size_t n); +extern int sfread(void *buf, size_t size, FILE *fp); +extern void *safe_malloc(size_t size); extern int zstream_do_redup(int, char *[]); extern int zstream_do_dump(int, char *[]); +extern int zstream_do_decompress(int argc, char *argv[]); extern int zstream_do_token(int, char *[]); extern void zstream_usage(void); diff --git a/cmd/zstream/zstream_decompress.c b/cmd/zstream/zstream_decompress.c new file mode 100644 index 000000000..4c924e0e1 --- /dev/null +++ b/cmd/zstream/zstream_decompress.c @@ -0,0 +1,359 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2022 Axcient. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "zfs_fletcher.h" +#include "zstream.h" + +static int +dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, + zio_cksum_t *zc, int outfd) +{ + assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum) + == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); + if (drr->drr_type != DRR_BEGIN) { + assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. + drr_checksum.drr_checksum)); + drr->drr_u.drr_checksum.drr_checksum = *zc; + } + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); + if (write(outfd, drr, sizeof (*drr)) == -1) + return (errno); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, zc); + if (write(outfd, payload, payload_len) == -1) + return (errno); + } + return (0); +} + +int +zstream_do_decompress(int argc, char *argv[]) +{ + const int KEYSIZE = 64; + int bufsz = SPA_MAXBLOCKSIZE; + char *buf = safe_malloc(bufsz); + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + zio_cksum_t stream_cksum; + int c; + boolean_t verbose = B_FALSE; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose = B_TRUE; + break; + case '?': + (void) fprintf(stderr, "invalid option '%c'\n", + optopt); + zstream_usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc < 0) + zstream_usage(); + + if (hcreate(argc) == 0) + errx(1, "hcreate"); + for (int i = 0; i < argc; i++) { + uint64_t object, offset; + char *obj_str; + char *offset_str; + char *key; + char *end; + enum zio_compress type = ZIO_COMPRESS_LZ4; + + obj_str = strsep(&argv[i], ","); + if (argv[i] == NULL) { + zstream_usage(); + exit(2); + } + errno = 0; + object = strtoull(obj_str, &end, 0); + if (errno || *end != '\0') + errx(1, "invalid value for object"); + offset_str = strsep(&argv[i], ","); + offset = strtoull(offset_str, &end, 0); + if (errno || *end != '\0') + errx(1, "invalid value for offset"); + if (argv[i]) { + if (0 == strcmp("lz4", argv[i])) + type = ZIO_COMPRESS_LZ4; + else if (0 == strcmp("lzjb", argv[i])) + type = ZIO_COMPRESS_LZJB; + else if (0 == strcmp("gzip", argv[i])) + type = ZIO_COMPRESS_GZIP_1; + else if (0 == strcmp("zle", argv[i])) + type = ZIO_COMPRESS_ZLE; + else if (0 == strcmp("zstd", argv[i])) + type = ZIO_COMPRESS_ZSTD; + else { + fprintf(stderr, "Invalid compression type %s.\n" + "Supported types are lz4, lzjb, gzip, zle, " + "and zstd\n", + argv[i]); + exit(2); + } + } + + if (asprintf(&key, "%llu,%llu", (u_longlong_t)object, + (u_longlong_t)offset) < 0) { + err(1, "asprintf"); + } + ENTRY e = {.key = key}; + ENTRY *p; + + p = hsearch(e, ENTER); + if (p == NULL) + errx(1, "hsearch"); + p->data = (void*)type; + } + + if (isatty(STDIN_FILENO)) { + (void) fprintf(stderr, + "Error: The send stream is a binary format " + "and can not be read from a\n" + "terminal. Standard input must be redirected.\n"); + exit(1); + } + + fletcher_4_init(); + while (sfread(drr, sizeof (*drr), stdin) != 0) { + struct drr_write *drrw; + uint64_t payload_size = 0; + + /* + * We need to regenerate the checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + memset(&drr->drr_u.drr_checksum.drr_checksum, 0, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + + switch (drr->drr_type) { + case DRR_BEGIN: + { + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + + int sz = drr->drr_payloadlen; + if (sz != 0) { + if (sz > bufsz) { + buf = realloc(buf, sz); + if (buf == NULL) + err(1, "realloc"); + bufsz = sz; + } + (void) sfread(buf, sz, stdin); + } + payload_size = sz; + break; + } + case DRR_END: + { + struct drr_end *drre = &drr->drr_u.drr_end; + /* + * Use the recalculated checksum, unless this is + * the END record of a stream package, which has + * no checksum. + */ + if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum)) + drre->drr_checksum = stream_cksum; + break; + } + + case DRR_OBJECT: + { + struct drr_object *drro = &drr->drr_u.drr_object; + + if (drro->drr_bonuslen > 0) { + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + (void) sfread(buf, payload_size, stdin); + } + break; + } + + case DRR_SPILL: + { + struct drr_spill *drrs = &drr->drr_u.drr_spill; + payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); + (void) sfread(buf, payload_size, stdin); + break; + } + + case DRR_WRITE_BYREF: + fprintf(stderr, + "Deduplicated streams are not supported\n"); + exit(1); + break; + + case DRR_WRITE: + { + drrw = &thedrr.drr_u.drr_write; + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + ENTRY *p; + char key[KEYSIZE]; + + snprintf(key, KEYSIZE, "%llu,%llu", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + ENTRY e = {.key = key}; + + p = hsearch(e, FIND); + if (p != NULL) { + zio_decompress_func_t *xfunc = NULL; + switch ((enum zio_compress)(intptr_t)p->data) { + case ZIO_COMPRESS_LZJB: + xfunc = lzjb_decompress; + break; + case ZIO_COMPRESS_GZIP_1: + xfunc = gzip_decompress; + break; + case ZIO_COMPRESS_ZLE: + xfunc = zle_decompress; + break; + case ZIO_COMPRESS_LZ4: + xfunc = lz4_decompress_zfs; + break; + case ZIO_COMPRESS_ZSTD: + xfunc = zfs_zstd_decompress; + break; + default: + assert(B_FALSE); + } + assert(xfunc != NULL); + + + /* + * Read and decompress the block + */ + char *lzbuf = safe_calloc(payload_size); + (void) sfread(lzbuf, payload_size, stdin); + if (0 != xfunc(lzbuf, buf, + payload_size, payload_size, 0)) { + /* + * The block must not be compressed, + * possibly because it gets written + * multiple times in this stream. + */ + warnx("decompression failed for " + "ino %llu offset %llu", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + memcpy(buf, lzbuf, payload_size); + } else if (verbose) { + fprintf(stderr, "successfully " + "decompressed ino %llu " + "offset %llu\n", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + } + free(lzbuf); + } else { + /* + * Read the contents of the block unaltered + */ + (void) sfread(buf, payload_size, stdin); + } + break; + } + + case DRR_WRITE_EMBEDDED: + { + struct drr_write_embedded *drrwe = + &drr->drr_u.drr_write_embedded; + payload_size = + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); + (void) sfread(buf, payload_size, stdin); + break; + } + + case DRR_FREEOBJECTS: + case DRR_FREE: + case DRR_OBJECT_RANGE: + break; + + default: + (void) fprintf(stderr, "INVALID record type 0x%x\n", + drr->drr_type); + /* should never happen, so assert */ + assert(B_FALSE); + } + + if (feof(stdout)) { + fprintf(stderr, "Error: unexpected end-of-file\n"); + exit(1); + } + if (ferror(stdout)) { + fprintf(stderr, "Error while reading file: %s\n", + strerror(errno)); + exit(1); + } + + /* + * We need to recalculate the checksum, and it needs to be + * initially zero to do that. BEGIN records don't have + * a checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + memset(&drr->drr_u.drr_checksum.drr_checksum, 0, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + if (dump_record(drr, buf, payload_size, + &stream_cksum, STDOUT_FILENO) != 0) + break; + if (drr->drr_type == DRR_END) { + /* + * Typically the END record is either the last + * thing in the stream, or it is followed + * by a BEGIN record (which also zeros the checksum). + * However, a stream package ends with two END + * records. The last END record's checksum starts + * from zero. + */ + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + } + } + free(buf); + fletcher_4_fini(); + hdestroy(); + + return (0); +} diff --git a/cmd/zstream/zstream_dump.c b/cmd/zstream/zstream_dump.c index 977256cae..170d84fed 100644 --- a/cmd/zstream/zstream_dump.c +++ b/cmd/zstream/zstream_dump.c @@ -59,7 +59,7 @@ FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; boolean_t do_cksum = B_TRUE; -static void * +void * safe_malloc(size_t size) { void *rv = malloc(size); diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c index 20aff17ae..5807fabce 100644 --- a/cmd/zstream/zstream_redup.c +++ b/cmd/zstream/zstream_redup.c @@ -65,7 +65,7 @@ highbit64(uint64_t i) return (NBBY * sizeof (uint64_t) - __builtin_clzll(i)); } -static void * +void * safe_calloc(size_t n) { void *rv = calloc(1, n); @@ -81,7 +81,7 @@ safe_calloc(size_t n) /* * Safe version of fread(), exits on error. */ -static int +int sfread(void *buf, size_t size, FILE *fp) { int rv = fread(buf, size, 1, fp); diff --git a/man/man8/zstream.8 b/man/man8/zstream.8 index c0322ee3a..aac7e3487 100644 --- a/man/man8/zstream.8 +++ b/man/man8/zstream.8 @@ -20,7 +20,7 @@ .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd May 8, 2021 +.Dd March 25, 2022 .Dt ZSTREAM 8 .Os . @@ -33,6 +33,10 @@ .Op Fl Cvd .Op Ar file .Nm +.Cm decompress +.Op Fl v +.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ... +.Nm .Cm redup .Op Fl v .Ar file @@ -82,6 +86,36 @@ alias is provided for compatibility and is equivalent to running Dumps zfs resume token information .It Xo .Nm +.Cm decompress +.Op Fl v +.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ... +.Xc +Decompress selected records in a ZFS send stream provided on standard input, +when the compression type recorded in ZFS metadata may be incorrect. +Specify the object number and byte offset of each record that you wish to +decompress. +Optionally specify the compression type. +Valid compression types include +.Sy gzip , +.Sy lz4 , +.Sy lzjb , +.Sy zstd , +and +.Sy zle . +The default is +.Sy lz4 . +Every record for that object beginning at that offset will be decompressed, if +possible. +It may not be possible, because the record may be corrupted in some but not +all of the stream's snapshots. +The repaired stream will be written to standard output. +.Bl -tag -width "-v" +.It Fl v +Verbose. +Print summary of decompressed records. +.El +.It Xo +.Nm .Cm redup .Op Fl v .Ar file @@ -111,7 +145,24 @@ Print summary of converted records. .El .El . +.Sh EXAMPLES +Heal a dataset that was corrupted due to OpenZFS bug #12762. +First, determine which records are corrupt. +That cannot be done automatically; it requires information beyond ZFS's +metadata. +If object +.Sy 128 +is corrupted at offset +.Sy 0 +and is compressed using +.Sy lz4 , +then run this command: +.Bd -literal +.No # Nm zfs Ar send Fl c Ar … | Nm zstream decompress Ar 128,0,lz4 | \ +Nm zfs recv Ar … +.Ed .Sh SEE ALSO .Xr zfs 8 , .Xr zfs-receive 8 , -.Xr zfs-send 8 +.Xr zfs-send 8 , +.Lk https://github.com/openzfs/zfs/issues/12762