mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Add zstream redup command to convert deduplicated send streams
Deduplicated send and receive is deprecated. To ease migration to the new dedup-send-less world, the commit adds a `zstream redup` utility to convert deduplicated send streams to normal streams, so that they can continue to be received indefinitely. The new `zstream` command also replaces the functionality of `zstreamdump`, by way of the `zstream dump` subcommand. The `zstreamdump` command is replaced by a shell script which invokes `zstream dump`. The way that `zstream redup` works under the hood is that as we read the send stream, we build up a hash table which maps from `<GUID, object, offset> -> <file_offset>`. Whenever we see a WRITE record, we add a new entry to the hash table, which indicates where in the stream file to find the WRITE record for this block. (The key is `drr_toguid, drr_object, drr_offset`.) For entries other than WRITE_BYREF, we pass them through unchanged (except for the running checksum, which is recalculated). For WRITE_BYREF records, we change them to WRITE records. We find the referenced WRITE record by looking in the hash table (for the record with key `drr_refguid, drr_refobject, drr_refoffset`), and then reading the record header and payload from the specified offset in the stream file. This is why the stream can not be a pipe. The found WRITE record replaces the WRITE_BYREF record, with its `drr_toguid`, `drr_object`, and `drr_offset` fields changed to be the same as the WRITE_BYREF's (i.e. we are writing the same logical block, but with the data supplied by the previous WRITE record). This algorithm requires memory proportional to the number of WRITE records (same as `zfs send -D`), but the size per WRITE record is relatively low (40 bytes, vs. 72 for `zfs send -D`). A 1TB send stream with 8KB blocks (`recordsize=8k`) would use around 5GB of RAM to "redup". Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Paul Dagnelie <pcd@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #10124 Closes #10156
This commit is contained in:
@@ -0,0 +1 @@
|
||||
zstream
|
||||
@@ -0,0 +1,13 @@
|
||||
include $(top_srcdir)/config/Rules.am
|
||||
|
||||
sbin_PROGRAMS = zstream
|
||||
|
||||
zstream_SOURCES = \
|
||||
zstream.c \
|
||||
zstream.h \
|
||||
zstream_dump.c \
|
||||
zstream_redup.c
|
||||
|
||||
zstream_LDADD = \
|
||||
$(top_builddir)/lib/libnvpair/libnvpair.la \
|
||||
$(top_builddir)/lib/libzfs/libzfs.la
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2020 by Delphix. All rights reserved.
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <libintl.h>
|
||||
#include <stddef.h>
|
||||
#include <libzfs.h>
|
||||
#include "zstream.h"
|
||||
|
||||
void
|
||||
zstream_usage(void)
|
||||
{
|
||||
(void) fprintf(stderr,
|
||||
"usage: zstream command args ...\n"
|
||||
"Available commands are:\n"
|
||||
"\n"
|
||||
"\tzstream dump [-vCd] FILE\n"
|
||||
"\t... | zstream dump [-vCd]\n"
|
||||
"\n"
|
||||
"\tzstream redup [-v] FILE | ...\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2)
|
||||
zstream_usage();
|
||||
|
||||
char *subcommand = argv[1];
|
||||
|
||||
if (strcmp(subcommand, "dump") == 0) {
|
||||
return (zstream_do_dump(argc - 1, argv + 1));
|
||||
} else if (strcmp(subcommand, "redup") == 0) {
|
||||
return (zstream_do_redup(argc - 1, argv + 1));
|
||||
} else {
|
||||
zstream_usage();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2020 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZSTREAM_H
|
||||
#define _ZSTREAM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int zstream_do_redup(int, char *[]);
|
||||
extern int zstream_do_dump(int, char *[]);
|
||||
extern void zstream_usage(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZSTREAM_H */
|
||||
@@ -0,0 +1,797 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Portions Copyright 2012 Martin Matuska <martin@matuska.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <libnvpair.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <zfs_fletcher.h>
|
||||
#include "zstream.h"
|
||||
|
||||
/*
|
||||
* If dump mode is enabled, the number of bytes to print per line
|
||||
*/
|
||||
#define BYTES_PER_LINE 16
|
||||
/*
|
||||
* If dump mode is enabled, the number of bytes to group together, separated
|
||||
* by newlines or spaces
|
||||
*/
|
||||
#define DUMP_GROUPING 4
|
||||
|
||||
uint64_t total_stream_len = 0;
|
||||
FILE *send_stream = 0;
|
||||
boolean_t do_byteswap = B_FALSE;
|
||||
boolean_t do_cksum = B_TRUE;
|
||||
|
||||
static void *
|
||||
safe_malloc(size_t size)
|
||||
{
|
||||
void *rv = malloc(size);
|
||||
if (rv == NULL) {
|
||||
(void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n",
|
||||
size);
|
||||
abort();
|
||||
}
|
||||
return (rv);
|
||||
}
|
||||
|
||||
/*
|
||||
* ssread - send stream read.
|
||||
*
|
||||
* Read while computing incremental checksum
|
||||
*/
|
||||
static size_t
|
||||
ssread(void *buf, size_t len, zio_cksum_t *cksum)
|
||||
{
|
||||
size_t outlen;
|
||||
|
||||
if ((outlen = fread(buf, len, 1, send_stream)) == 0)
|
||||
return (0);
|
||||
|
||||
if (do_cksum) {
|
||||
if (do_byteswap)
|
||||
fletcher_4_incremental_byteswap(buf, len, cksum);
|
||||
else
|
||||
fletcher_4_incremental_native(buf, len, cksum);
|
||||
}
|
||||
total_stream_len += len;
|
||||
return (outlen);
|
||||
}
|
||||
|
||||
static size_t
|
||||
read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
|
||||
{
|
||||
ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
|
||||
==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
|
||||
size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
|
||||
if (r == 0)
|
||||
return (0);
|
||||
zio_cksum_t saved_cksum = *cksum;
|
||||
r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
|
||||
sizeof (zio_cksum_t), cksum);
|
||||
if (r == 0)
|
||||
return (0);
|
||||
if (do_cksum &&
|
||||
!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
|
||||
!ZIO_CHECKSUM_EQUAL(saved_cksum,
|
||||
drr->drr_u.drr_checksum.drr_checksum)) {
|
||||
fprintf(stderr, "invalid checksum\n");
|
||||
(void) printf("Incorrect checksum in record header.\n");
|
||||
(void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
|
||||
(longlong_t)saved_cksum.zc_word[0],
|
||||
(longlong_t)saved_cksum.zc_word[1],
|
||||
(longlong_t)saved_cksum.zc_word[2],
|
||||
(longlong_t)saved_cksum.zc_word[3]);
|
||||
return (0);
|
||||
}
|
||||
return (sizeof (*drr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Print part of a block in ASCII characters
|
||||
*/
|
||||
static void
|
||||
print_ascii_block(char *subbuf, int length)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
char char_print = isprint(subbuf[i]) ? subbuf[i] : '.';
|
||||
if (i != 0 && i % DUMP_GROUPING == 0) {
|
||||
(void) printf(" ");
|
||||
}
|
||||
(void) printf("%c", char_print);
|
||||
}
|
||||
(void) printf("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* print_block - Dump the contents of a modified block to STDOUT
|
||||
*
|
||||
* Assume that buf has capacity evenly divisible by BYTES_PER_LINE
|
||||
*/
|
||||
static void
|
||||
print_block(char *buf, int length)
|
||||
{
|
||||
int i;
|
||||
/*
|
||||
* Start printing ASCII characters at a constant offset, after
|
||||
* the hex prints. Leave 3 characters per byte on a line (2 digit
|
||||
* hex number plus 1 space) plus spaces between characters and
|
||||
* groupings.
|
||||
*/
|
||||
int ascii_start = BYTES_PER_LINE * 3 +
|
||||
BYTES_PER_LINE / DUMP_GROUPING + 2;
|
||||
|
||||
for (i = 0; i < length; i += BYTES_PER_LINE) {
|
||||
int j;
|
||||
int this_line_length = MIN(BYTES_PER_LINE, length - i);
|
||||
int print_offset = 0;
|
||||
|
||||
for (j = 0; j < this_line_length; j++) {
|
||||
int buf_offset = i + j;
|
||||
|
||||
/*
|
||||
* Separate every DUMP_GROUPING bytes by a space.
|
||||
*/
|
||||
if (buf_offset % DUMP_GROUPING == 0) {
|
||||
print_offset += printf(" ");
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the two-digit hex value for this byte.
|
||||
*/
|
||||
unsigned char hex_print = buf[buf_offset];
|
||||
print_offset += printf("%02x ", hex_print);
|
||||
}
|
||||
|
||||
(void) printf("%*s", ascii_start - print_offset, " ");
|
||||
|
||||
print_ascii_block(buf + i, this_line_length);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print an array of bytes to stdout as hexadecimal characters. str must
|
||||
* have buf_len * 2 + 1 bytes of space.
|
||||
*/
|
||||
static void
|
||||
sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
|
||||
{
|
||||
int i, n;
|
||||
|
||||
for (i = 0; i < buf_len; i++) {
|
||||
n = sprintf(str, "%02x", buf[i] & 0xff);
|
||||
str += n;
|
||||
}
|
||||
|
||||
str[0] = '\0';
|
||||
}
|
||||
|
||||
int
|
||||
zstream_do_dump(int argc, char *argv[])
|
||||
{
|
||||
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
|
||||
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
|
||||
uint64_t total_payload_size = 0;
|
||||
uint64_t total_overhead_size = 0;
|
||||
uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 };
|
||||
char salt[ZIO_DATA_SALT_LEN * 2 + 1];
|
||||
char iv[ZIO_DATA_IV_LEN * 2 + 1];
|
||||
char mac[ZIO_DATA_MAC_LEN * 2 + 1];
|
||||
uint64_t total_records = 0;
|
||||
uint64_t payload_size;
|
||||
dmu_replay_record_t thedrr;
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
|
||||
struct drr_end *drre = &thedrr.drr_u.drr_end;
|
||||
struct drr_object *drro = &thedrr.drr_u.drr_object;
|
||||
struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects;
|
||||
struct drr_write *drrw = &thedrr.drr_u.drr_write;
|
||||
struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
|
||||
struct drr_free *drrf = &thedrr.drr_u.drr_free;
|
||||
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
|
||||
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
|
||||
struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
|
||||
struct drr_redact *drrr = &thedrr.drr_u.drr_redact;
|
||||
struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
|
||||
int c;
|
||||
boolean_t verbose = B_FALSE;
|
||||
boolean_t very_verbose = B_FALSE;
|
||||
boolean_t first = B_TRUE;
|
||||
/*
|
||||
* dump flag controls whether the contents of any modified data blocks
|
||||
* are printed to the console during processing of the stream. Warning:
|
||||
* for large streams, this can obviously lead to massive prints.
|
||||
*/
|
||||
boolean_t dump = B_FALSE;
|
||||
int err;
|
||||
zio_cksum_t zc = { { 0 } };
|
||||
zio_cksum_t pcksum = { { 0 } };
|
||||
|
||||
while ((c = getopt(argc, argv, ":vCd")) != -1) {
|
||||
switch (c) {
|
||||
case 'C':
|
||||
do_cksum = B_FALSE;
|
||||
break;
|
||||
case 'v':
|
||||
if (verbose)
|
||||
very_verbose = B_TRUE;
|
||||
verbose = B_TRUE;
|
||||
break;
|
||||
case 'd':
|
||||
dump = B_TRUE;
|
||||
verbose = B_TRUE;
|
||||
very_verbose = B_TRUE;
|
||||
break;
|
||||
case ':':
|
||||
(void) fprintf(stderr,
|
||||
"missing argument for '%c' option\n", optopt);
|
||||
zstream_usage();
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, "invalid option '%c'\n",
|
||||
optopt);
|
||||
zstream_usage();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc > optind) {
|
||||
const char *filename = argv[optind];
|
||||
send_stream = fopen(filename, "r");
|
||||
if (send_stream == NULL) {
|
||||
(void) fprintf(stderr,
|
||||
"Error while opening file '%s': %s\n",
|
||||
filename, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
if (isatty(STDIN_FILENO)) {
|
||||
(void) fprintf(stderr,
|
||||
"Error: The send stream is a binary format "
|
||||
"and can not be read from a\n"
|
||||
"terminal. Standard input must be redirected, "
|
||||
"or a file must be\n"
|
||||
"specified as a command-line argument.\n");
|
||||
exit(1);
|
||||
}
|
||||
send_stream = stdin;
|
||||
}
|
||||
|
||||
fletcher_4_init();
|
||||
while (read_hdr(drr, &zc)) {
|
||||
|
||||
/*
|
||||
* If this is the first DMU record being processed, check for
|
||||
* the magic bytes and figure out the endian-ness based on them.
|
||||
*/
|
||||
if (first) {
|
||||
if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
|
||||
do_byteswap = B_TRUE;
|
||||
if (do_cksum) {
|
||||
ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
|
||||
/*
|
||||
* recalculate header checksum now
|
||||
* that we know it needs to be
|
||||
* byteswapped.
|
||||
*/
|
||||
fletcher_4_incremental_byteswap(drr,
|
||||
sizeof (dmu_replay_record_t), &zc);
|
||||
}
|
||||
} else if (drrb->drr_magic != DMU_BACKUP_MAGIC) {
|
||||
(void) fprintf(stderr, "Invalid stream "
|
||||
"(bad magic number)\n");
|
||||
exit(1);
|
||||
}
|
||||
first = B_FALSE;
|
||||
}
|
||||
if (do_byteswap) {
|
||||
drr->drr_type = BSWAP_32(drr->drr_type);
|
||||
drr->drr_payloadlen =
|
||||
BSWAP_32(drr->drr_payloadlen);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point, the leading fields of the replay record
|
||||
* (drr_type and drr_payloadlen) have been byte-swapped if
|
||||
* necessary, but the rest of the data structure (the
|
||||
* union of type-specific structures) is still in its
|
||||
* original state.
|
||||
*/
|
||||
if (drr->drr_type >= DRR_NUMTYPES) {
|
||||
(void) printf("INVALID record found: type 0x%x\n",
|
||||
drr->drr_type);
|
||||
(void) printf("Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
drr_record_count[drr->drr_type]++;
|
||||
total_overhead_size += sizeof (*drr);
|
||||
total_records++;
|
||||
payload_size = 0;
|
||||
|
||||
switch (drr->drr_type) {
|
||||
case DRR_BEGIN:
|
||||
if (do_byteswap) {
|
||||
drrb->drr_magic = BSWAP_64(drrb->drr_magic);
|
||||
drrb->drr_versioninfo =
|
||||
BSWAP_64(drrb->drr_versioninfo);
|
||||
drrb->drr_creation_time =
|
||||
BSWAP_64(drrb->drr_creation_time);
|
||||
drrb->drr_type = BSWAP_32(drrb->drr_type);
|
||||
drrb->drr_flags = BSWAP_32(drrb->drr_flags);
|
||||
drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
|
||||
drrb->drr_fromguid =
|
||||
BSWAP_64(drrb->drr_fromguid);
|
||||
}
|
||||
|
||||
(void) printf("BEGIN record\n");
|
||||
(void) printf("\thdrtype = %lld\n",
|
||||
DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo));
|
||||
(void) printf("\tfeatures = %llx\n",
|
||||
DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo));
|
||||
(void) printf("\tmagic = %llx\n",
|
||||
(u_longlong_t)drrb->drr_magic);
|
||||
(void) printf("\tcreation_time = %llx\n",
|
||||
(u_longlong_t)drrb->drr_creation_time);
|
||||
(void) printf("\ttype = %u\n", drrb->drr_type);
|
||||
(void) printf("\tflags = 0x%x\n", drrb->drr_flags);
|
||||
(void) printf("\ttoguid = %llx\n",
|
||||
(u_longlong_t)drrb->drr_toguid);
|
||||
(void) printf("\tfromguid = %llx\n",
|
||||
(u_longlong_t)drrb->drr_fromguid);
|
||||
(void) printf("\ttoname = %s\n", drrb->drr_toname);
|
||||
if (verbose)
|
||||
(void) printf("\n");
|
||||
|
||||
if (drr->drr_payloadlen != 0) {
|
||||
nvlist_t *nv;
|
||||
int sz = drr->drr_payloadlen;
|
||||
|
||||
if (sz > SPA_MAXBLOCKSIZE) {
|
||||
free(buf);
|
||||
buf = safe_malloc(sz);
|
||||
}
|
||||
(void) ssread(buf, sz, &zc);
|
||||
if (ferror(send_stream))
|
||||
perror("fread");
|
||||
err = nvlist_unpack(buf, sz, &nv, 0);
|
||||
if (err) {
|
||||
perror(strerror(err));
|
||||
} else {
|
||||
nvlist_print(stdout, nv);
|
||||
nvlist_free(nv);
|
||||
}
|
||||
payload_size = sz;
|
||||
}
|
||||
break;
|
||||
|
||||
case DRR_END:
|
||||
if (do_byteswap) {
|
||||
drre->drr_checksum.zc_word[0] =
|
||||
BSWAP_64(drre->drr_checksum.zc_word[0]);
|
||||
drre->drr_checksum.zc_word[1] =
|
||||
BSWAP_64(drre->drr_checksum.zc_word[1]);
|
||||
drre->drr_checksum.zc_word[2] =
|
||||
BSWAP_64(drre->drr_checksum.zc_word[2]);
|
||||
drre->drr_checksum.zc_word[3] =
|
||||
BSWAP_64(drre->drr_checksum.zc_word[3]);
|
||||
}
|
||||
/*
|
||||
* We compare against the *previous* checksum
|
||||
* value, because the stored checksum is of
|
||||
* everything before the DRR_END record.
|
||||
*/
|
||||
if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum,
|
||||
pcksum)) {
|
||||
(void) printf("Expected checksum differs from "
|
||||
"checksum in stream.\n");
|
||||
(void) printf("Expected checksum = "
|
||||
"%llx/%llx/%llx/%llx\n",
|
||||
(long long unsigned int)pcksum.zc_word[0],
|
||||
(long long unsigned int)pcksum.zc_word[1],
|
||||
(long long unsigned int)pcksum.zc_word[2],
|
||||
(long long unsigned int)pcksum.zc_word[3]);
|
||||
}
|
||||
(void) printf("END checksum = %llx/%llx/%llx/%llx\n",
|
||||
(long long unsigned int)
|
||||
drre->drr_checksum.zc_word[0],
|
||||
(long long unsigned int)
|
||||
drre->drr_checksum.zc_word[1],
|
||||
(long long unsigned int)
|
||||
drre->drr_checksum.zc_word[2],
|
||||
(long long unsigned int)
|
||||
drre->drr_checksum.zc_word[3]);
|
||||
|
||||
ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case DRR_OBJECT:
|
||||
if (do_byteswap) {
|
||||
drro->drr_object = BSWAP_64(drro->drr_object);
|
||||
drro->drr_type = BSWAP_32(drro->drr_type);
|
||||
drro->drr_bonustype =
|
||||
BSWAP_32(drro->drr_bonustype);
|
||||
drro->drr_blksz = BSWAP_32(drro->drr_blksz);
|
||||
drro->drr_bonuslen =
|
||||
BSWAP_32(drro->drr_bonuslen);
|
||||
drro->drr_raw_bonuslen =
|
||||
BSWAP_32(drro->drr_raw_bonuslen);
|
||||
drro->drr_toguid = BSWAP_64(drro->drr_toguid);
|
||||
drro->drr_maxblkid =
|
||||
BSWAP_64(drro->drr_maxblkid);
|
||||
}
|
||||
|
||||
payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
|
||||
|
||||
if (verbose) {
|
||||
(void) printf("OBJECT object = %llu type = %u "
|
||||
"bonustype = %u blksz = %u bonuslen = %u "
|
||||
"dn_slots = %u raw_bonuslen = %u "
|
||||
"flags = %u maxblkid = %llu "
|
||||
"indblkshift = %u nlevels = %u "
|
||||
"nblkptr = %u\n",
|
||||
(u_longlong_t)drro->drr_object,
|
||||
drro->drr_type,
|
||||
drro->drr_bonustype,
|
||||
drro->drr_blksz,
|
||||
drro->drr_bonuslen,
|
||||
drro->drr_dn_slots,
|
||||
drro->drr_raw_bonuslen,
|
||||
drro->drr_flags,
|
||||
(u_longlong_t)drro->drr_maxblkid,
|
||||
drro->drr_indblkshift,
|
||||
drro->drr_nlevels,
|
||||
drro->drr_nblkptr);
|
||||
}
|
||||
if (drro->drr_bonuslen > 0) {
|
||||
(void) ssread(buf, payload_size, &zc);
|
||||
if (dump)
|
||||
print_block(buf, payload_size);
|
||||
}
|
||||
break;
|
||||
|
||||
case DRR_FREEOBJECTS:
|
||||
if (do_byteswap) {
|
||||
drrfo->drr_firstobj =
|
||||
BSWAP_64(drrfo->drr_firstobj);
|
||||
drrfo->drr_numobjs =
|
||||
BSWAP_64(drrfo->drr_numobjs);
|
||||
drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid);
|
||||
}
|
||||
if (verbose) {
|
||||
(void) printf("FREEOBJECTS firstobj = %llu "
|
||||
"numobjs = %llu\n",
|
||||
(u_longlong_t)drrfo->drr_firstobj,
|
||||
(u_longlong_t)drrfo->drr_numobjs);
|
||||
}
|
||||
break;
|
||||
|
||||
case DRR_WRITE:
|
||||
if (do_byteswap) {
|
||||
drrw->drr_object = BSWAP_64(drrw->drr_object);
|
||||
drrw->drr_type = BSWAP_32(drrw->drr_type);
|
||||
drrw->drr_offset = BSWAP_64(drrw->drr_offset);
|
||||
drrw->drr_logical_size =
|
||||
BSWAP_64(drrw->drr_logical_size);
|
||||
drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
|
||||
drrw->drr_key.ddk_prop =
|
||||
BSWAP_64(drrw->drr_key.ddk_prop);
|
||||
drrw->drr_compressed_size =
|
||||
BSWAP_64(drrw->drr_compressed_size);
|
||||
}
|
||||
|
||||
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
|
||||
/*
|
||||
* If this is verbose and/or dump output,
|
||||
* print info on the modified block
|
||||
*/
|
||||
if (verbose) {
|
||||
sprintf_bytes(salt, drrw->drr_salt,
|
||||
ZIO_DATA_SALT_LEN);
|
||||
sprintf_bytes(iv, drrw->drr_iv,
|
||||
ZIO_DATA_IV_LEN);
|
||||
sprintf_bytes(mac, drrw->drr_mac,
|
||||
ZIO_DATA_MAC_LEN);
|
||||
|
||||
(void) printf("WRITE object = %llu type = %u "
|
||||
"checksum type = %u compression type = %u "
|
||||
"flags = %u offset = %llu "
|
||||
"logical_size = %llu "
|
||||
"compressed_size = %llu "
|
||||
"payload_size = %llu props = %llx "
|
||||
"salt = %s iv = %s mac = %s\n",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
drrw->drr_type,
|
||||
drrw->drr_checksumtype,
|
||||
drrw->drr_compressiontype,
|
||||
drrw->drr_flags,
|
||||
(u_longlong_t)drrw->drr_offset,
|
||||
(u_longlong_t)drrw->drr_logical_size,
|
||||
(u_longlong_t)drrw->drr_compressed_size,
|
||||
(u_longlong_t)payload_size,
|
||||
(u_longlong_t)drrw->drr_key.ddk_prop,
|
||||
salt,
|
||||
iv,
|
||||
mac);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the contents of the block in from STDIN to buf
|
||||
*/
|
||||
(void) ssread(buf, payload_size, &zc);
|
||||
/*
|
||||
* If in dump mode
|
||||
*/
|
||||
if (dump) {
|
||||
print_block(buf, payload_size);
|
||||
}
|
||||
break;
|
||||
|
||||
case DRR_WRITE_BYREF:
|
||||
if (do_byteswap) {
|
||||
drrwbr->drr_object =
|
||||
BSWAP_64(drrwbr->drr_object);
|
||||
drrwbr->drr_offset =
|
||||
BSWAP_64(drrwbr->drr_offset);
|
||||
drrwbr->drr_length =
|
||||
BSWAP_64(drrwbr->drr_length);
|
||||
drrwbr->drr_toguid =
|
||||
BSWAP_64(drrwbr->drr_toguid);
|
||||
drrwbr->drr_refguid =
|
||||
BSWAP_64(drrwbr->drr_refguid);
|
||||
drrwbr->drr_refobject =
|
||||
BSWAP_64(drrwbr->drr_refobject);
|
||||
drrwbr->drr_refoffset =
|
||||
BSWAP_64(drrwbr->drr_refoffset);
|
||||
drrwbr->drr_key.ddk_prop =
|
||||
BSWAP_64(drrwbr->drr_key.ddk_prop);
|
||||
}
|
||||
if (verbose) {
|
||||
(void) printf("WRITE_BYREF object = %llu "
|
||||
"checksum type = %u props = %llx "
|
||||
"offset = %llu length = %llu "
|
||||
"toguid = %llx refguid = %llx "
|
||||
"refobject = %llu refoffset = %llu\n",
|
||||
(u_longlong_t)drrwbr->drr_object,
|
||||
drrwbr->drr_checksumtype,
|
||||
(u_longlong_t)drrwbr->drr_key.ddk_prop,
|
||||
(u_longlong_t)drrwbr->drr_offset,
|
||||
(u_longlong_t)drrwbr->drr_length,
|
||||
(u_longlong_t)drrwbr->drr_toguid,
|
||||
(u_longlong_t)drrwbr->drr_refguid,
|
||||
(u_longlong_t)drrwbr->drr_refobject,
|
||||
(u_longlong_t)drrwbr->drr_refoffset);
|
||||
}
|
||||
break;
|
||||
|
||||
case DRR_FREE:
|
||||
if (do_byteswap) {
|
||||
drrf->drr_object = BSWAP_64(drrf->drr_object);
|
||||
drrf->drr_offset = BSWAP_64(drrf->drr_offset);
|
||||
drrf->drr_length = BSWAP_64(drrf->drr_length);
|
||||
}
|
||||
if (verbose) {
|
||||
(void) printf("FREE object = %llu "
|
||||
"offset = %llu length = %lld\n",
|
||||
(u_longlong_t)drrf->drr_object,
|
||||
(u_longlong_t)drrf->drr_offset,
|
||||
(longlong_t)drrf->drr_length);
|
||||
}
|
||||
break;
|
||||
case DRR_SPILL:
|
||||
if (do_byteswap) {
|
||||
drrs->drr_object = BSWAP_64(drrs->drr_object);
|
||||
drrs->drr_length = BSWAP_64(drrs->drr_length);
|
||||
drrs->drr_compressed_size =
|
||||
BSWAP_64(drrs->drr_compressed_size);
|
||||
drrs->drr_type = BSWAP_32(drrs->drr_type);
|
||||
}
|
||||
|
||||
payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
||||
|
||||
if (verbose) {
|
||||
sprintf_bytes(salt, drrs->drr_salt,
|
||||
ZIO_DATA_SALT_LEN);
|
||||
sprintf_bytes(iv, drrs->drr_iv,
|
||||
ZIO_DATA_IV_LEN);
|
||||
sprintf_bytes(mac, drrs->drr_mac,
|
||||
ZIO_DATA_MAC_LEN);
|
||||
|
||||
(void) printf("SPILL block for object = %llu "
|
||||
"length = %llu flags = %u "
|
||||
"compression type = %u "
|
||||
"compressed_size = %llu "
|
||||
"payload_size = %llu "
|
||||
"salt = %s iv = %s mac = %s\n",
|
||||
(u_longlong_t)drrs->drr_object,
|
||||
(u_longlong_t)drrs->drr_length,
|
||||
drrs->drr_flags,
|
||||
drrs->drr_compressiontype,
|
||||
(u_longlong_t)drrs->drr_compressed_size,
|
||||
(u_longlong_t)payload_size,
|
||||
salt,
|
||||
iv,
|
||||
mac);
|
||||
}
|
||||
(void) ssread(buf, payload_size, &zc);
|
||||
if (dump) {
|
||||
print_block(buf, payload_size);
|
||||
}
|
||||
break;
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
if (do_byteswap) {
|
||||
drrwe->drr_object =
|
||||
BSWAP_64(drrwe->drr_object);
|
||||
drrwe->drr_offset =
|
||||
BSWAP_64(drrwe->drr_offset);
|
||||
drrwe->drr_length =
|
||||
BSWAP_64(drrwe->drr_length);
|
||||
drrwe->drr_toguid =
|
||||
BSWAP_64(drrwe->drr_toguid);
|
||||
drrwe->drr_lsize =
|
||||
BSWAP_32(drrwe->drr_lsize);
|
||||
drrwe->drr_psize =
|
||||
BSWAP_32(drrwe->drr_psize);
|
||||
}
|
||||
if (verbose) {
|
||||
(void) printf("WRITE_EMBEDDED object = %llu "
|
||||
"offset = %llu length = %llu "
|
||||
"toguid = %llx comp = %u etype = %u "
|
||||
"lsize = %u psize = %u\n",
|
||||
(u_longlong_t)drrwe->drr_object,
|
||||
(u_longlong_t)drrwe->drr_offset,
|
||||
(u_longlong_t)drrwe->drr_length,
|
||||
(u_longlong_t)drrwe->drr_toguid,
|
||||
drrwe->drr_compression,
|
||||
drrwe->drr_etype,
|
||||
drrwe->drr_lsize,
|
||||
drrwe->drr_psize);
|
||||
}
|
||||
(void) ssread(buf,
|
||||
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
|
||||
if (dump) {
|
||||
print_block(buf,
|
||||
P2ROUNDUP(drrwe->drr_psize, 8));
|
||||
}
|
||||
payload_size = P2ROUNDUP(drrwe->drr_psize, 8);
|
||||
break;
|
||||
case DRR_OBJECT_RANGE:
|
||||
if (do_byteswap) {
|
||||
drror->drr_firstobj =
|
||||
BSWAP_64(drror->drr_firstobj);
|
||||
drror->drr_numslots =
|
||||
BSWAP_64(drror->drr_numslots);
|
||||
drror->drr_toguid = BSWAP_64(drror->drr_toguid);
|
||||
}
|
||||
if (verbose) {
|
||||
sprintf_bytes(salt, drror->drr_salt,
|
||||
ZIO_DATA_SALT_LEN);
|
||||
sprintf_bytes(iv, drror->drr_iv,
|
||||
ZIO_DATA_IV_LEN);
|
||||
sprintf_bytes(mac, drror->drr_mac,
|
||||
ZIO_DATA_MAC_LEN);
|
||||
|
||||
(void) printf("OBJECT_RANGE firstobj = %llu "
|
||||
"numslots = %llu flags = %u "
|
||||
"salt = %s iv = %s mac = %s\n",
|
||||
(u_longlong_t)drror->drr_firstobj,
|
||||
(u_longlong_t)drror->drr_numslots,
|
||||
drror->drr_flags,
|
||||
salt,
|
||||
iv,
|
||||
mac);
|
||||
}
|
||||
break;
|
||||
case DRR_REDACT:
|
||||
if (do_byteswap) {
|
||||
drrr->drr_object = BSWAP_64(drrr->drr_object);
|
||||
drrr->drr_offset = BSWAP_64(drrr->drr_offset);
|
||||
drrr->drr_length = BSWAP_64(drrr->drr_length);
|
||||
drrr->drr_toguid = BSWAP_64(drrr->drr_toguid);
|
||||
}
|
||||
if (verbose) {
|
||||
(void) printf("REDACT object = %llu offset = "
|
||||
"%llu length = %llu\n",
|
||||
(u_longlong_t)drrr->drr_object,
|
||||
(u_longlong_t)drrr->drr_offset,
|
||||
(u_longlong_t)drrr->drr_length);
|
||||
}
|
||||
break;
|
||||
case DRR_NUMTYPES:
|
||||
/* should never be reached */
|
||||
exit(1);
|
||||
}
|
||||
if (drr->drr_type != DRR_BEGIN && very_verbose) {
|
||||
(void) printf(" checksum = %llx/%llx/%llx/%llx\n",
|
||||
(longlong_t)drrc->drr_checksum.zc_word[0],
|
||||
(longlong_t)drrc->drr_checksum.zc_word[1],
|
||||
(longlong_t)drrc->drr_checksum.zc_word[2],
|
||||
(longlong_t)drrc->drr_checksum.zc_word[3]);
|
||||
}
|
||||
pcksum = zc;
|
||||
drr_byte_count[drr->drr_type] += payload_size;
|
||||
total_payload_size += payload_size;
|
||||
}
|
||||
free(buf);
|
||||
fletcher_4_fini();
|
||||
|
||||
/* Print final summary */
|
||||
|
||||
(void) printf("SUMMARY:\n");
|
||||
(void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_BEGIN],
|
||||
(u_longlong_t)drr_byte_count[DRR_BEGIN]);
|
||||
(void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_END],
|
||||
(u_longlong_t)drr_byte_count[DRR_END]);
|
||||
(void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_OBJECT],
|
||||
(u_longlong_t)drr_byte_count[DRR_OBJECT]);
|
||||
(void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_FREEOBJECTS],
|
||||
(u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]);
|
||||
(void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_WRITE],
|
||||
(u_longlong_t)drr_byte_count[DRR_WRITE]);
|
||||
(void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_WRITE_BYREF],
|
||||
(u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]);
|
||||
(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu "
|
||||
"bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED],
|
||||
(u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]);
|
||||
(void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_FREE],
|
||||
(u_longlong_t)drr_byte_count[DRR_FREE]);
|
||||
(void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n",
|
||||
(u_longlong_t)drr_record_count[DRR_SPILL],
|
||||
(u_longlong_t)drr_byte_count[DRR_SPILL]);
|
||||
(void) printf("\tTotal records = %lld\n",
|
||||
(u_longlong_t)total_records);
|
||||
(void) printf("\tTotal payload size = %lld (0x%llx)\n",
|
||||
(u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size);
|
||||
(void) printf("\tTotal header overhead = %lld (0x%llx)\n",
|
||||
(u_longlong_t)total_overhead_size,
|
||||
(u_longlong_t)total_overhead_size);
|
||||
(void) printf("\tTotal stream length = %lld (0x%llx)\n",
|
||||
(u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len);
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,468 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2020 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <cityhash.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <libzfs_impl.h>
|
||||
#include <libzfs.h>
|
||||
#include <libzutil.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
#include <umem.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include "zfs_fletcher.h"
|
||||
#include "zstream.h"
|
||||
|
||||
|
||||
#define MAX_RDT_PHYSMEM_PERCENT 20
|
||||
#define SMALLEST_POSSIBLE_MAX_RDT_MB 128
|
||||
|
||||
typedef struct redup_entry {
|
||||
struct redup_entry *rde_next;
|
||||
uint64_t rde_guid;
|
||||
uint64_t rde_object;
|
||||
uint64_t rde_offset;
|
||||
uint64_t rde_stream_offset;
|
||||
} redup_entry_t;
|
||||
|
||||
typedef struct redup_table {
|
||||
redup_entry_t **redup_hash_array;
|
||||
umem_cache_t *ddecache;
|
||||
uint64_t ddt_count;
|
||||
int numhashbits;
|
||||
} redup_table_t;
|
||||
|
||||
int
|
||||
highbit64(uint64_t i)
|
||||
{
|
||||
if (i == 0)
|
||||
return (0);
|
||||
|
||||
return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
|
||||
}
|
||||
|
||||
static void *
|
||||
safe_calloc(size_t n)
|
||||
{
|
||||
void *rv = calloc(1, n);
|
||||
if (rv == NULL) {
|
||||
fprintf(stderr,
|
||||
"Error: could not allocate %u bytes of memory\n",
|
||||
(int)n);
|
||||
exit(1);
|
||||
}
|
||||
return (rv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Safe version of fread(), exits on error.
|
||||
*/
|
||||
static int
|
||||
sfread(void *buf, size_t size, FILE *fp)
|
||||
{
|
||||
int rv = fread(buf, size, 1, fp);
|
||||
if (rv == 0 && ferror(fp)) {
|
||||
(void) fprintf(stderr, "Error while reading file: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
return (rv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Safe version of pread(), exits on error.
|
||||
*/
|
||||
static void
|
||||
spread(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ssize_t err = pread(fd, buf, count, offset);
|
||||
if (err == -1) {
|
||||
(void) fprintf(stderr,
|
||||
"Error while reading file: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
} else if (err != count) {
|
||||
(void) fprintf(stderr,
|
||||
"Error while reading file: short read\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
|
||||
zio_cksum_t *zc, int outfd)
|
||||
{
|
||||
assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
|
||||
== sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
|
||||
fletcher_4_incremental_native(drr,
|
||||
offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
|
||||
if (drr->drr_type != DRR_BEGIN) {
|
||||
assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
|
||||
drr_checksum.drr_checksum));
|
||||
drr->drr_u.drr_checksum.drr_checksum = *zc;
|
||||
}
|
||||
fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
|
||||
sizeof (zio_cksum_t), zc);
|
||||
if (write(outfd, drr, sizeof (*drr)) == -1)
|
||||
return (errno);
|
||||
if (payload_len != 0) {
|
||||
fletcher_4_incremental_native(payload, payload_len, zc);
|
||||
if (write(outfd, payload, payload_len) == -1)
|
||||
return (errno);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
rdt_insert(redup_table_t *rdt,
|
||||
uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
|
||||
{
|
||||
uint64_t ch = cityhash4(guid, object, offset, 0);
|
||||
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
|
||||
redup_entry_t **rdepp;
|
||||
|
||||
rdepp = &(rdt->redup_hash_array[hashcode]);
|
||||
redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL);
|
||||
rde->rde_next = *rdepp;
|
||||
rde->rde_guid = guid;
|
||||
rde->rde_object = object;
|
||||
rde->rde_offset = offset;
|
||||
rde->rde_stream_offset = stream_offset;
|
||||
*rdepp = rde;
|
||||
rdt->ddt_count++;
|
||||
}
|
||||
|
||||
static void
|
||||
rdt_lookup(redup_table_t *rdt,
|
||||
uint64_t guid, uint64_t object, uint64_t offset,
|
||||
uint64_t *stream_offsetp)
|
||||
{
|
||||
uint64_t ch = cityhash4(guid, object, offset, 0);
|
||||
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
|
||||
|
||||
for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
|
||||
rde != NULL; rde = rde->rde_next) {
|
||||
if (rde->rde_guid == guid &&
|
||||
rde->rde_object == object &&
|
||||
rde->rde_offset == offset) {
|
||||
*stream_offsetp = rde->rde_stream_offset;
|
||||
return;
|
||||
}
|
||||
}
|
||||
assert(!"could not find expected redup table entry");
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a dedup stream (generated by "zfs send -D") to a
|
||||
* non-deduplicated stream. The entire infd will be converted, including
|
||||
* any substreams in a stream package (generated by "zfs send -RD"). The
|
||||
* infd must be seekable.
|
||||
*/
|
||||
static void
|
||||
zfs_redup_stream(int infd, int outfd, boolean_t verbose)
|
||||
{
|
||||
int bufsz = SPA_MAXBLOCKSIZE;
|
||||
dmu_replay_record_t thedrr = { 0 };
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
redup_table_t rdt;
|
||||
zio_cksum_t stream_cksum;
|
||||
uint64_t numbuckets;
|
||||
uint64_t num_records = 0;
|
||||
uint64_t num_write_byref_records = 0;
|
||||
|
||||
#ifdef _ILP32
|
||||
uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20;
|
||||
#else
|
||||
uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
|
||||
uint64_t max_rde_size =
|
||||
MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100,
|
||||
SMALLEST_POSSIBLE_MAX_RDT_MB << 20);
|
||||
#endif
|
||||
|
||||
numbuckets = max_rde_size / (sizeof (redup_entry_t));
|
||||
|
||||
/*
|
||||
* numbuckets must be a power of 2. Increase number to
|
||||
* a power of 2 if necessary.
|
||||
*/
|
||||
if (!ISP2(numbuckets))
|
||||
numbuckets = 1ULL << highbit64(numbuckets);
|
||||
|
||||
rdt.redup_hash_array =
|
||||
safe_calloc(numbuckets * sizeof (redup_entry_t *));
|
||||
rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0,
|
||||
NULL, NULL, NULL, NULL, NULL, 0);
|
||||
rdt.numhashbits = highbit64(numbuckets) - 1;
|
||||
|
||||
char *buf = safe_calloc(bufsz);
|
||||
FILE *ofp = fdopen(infd, "r");
|
||||
long offset = ftell(ofp);
|
||||
while (sfread(drr, sizeof (*drr), ofp) != 0) {
|
||||
num_records++;
|
||||
|
||||
/*
|
||||
* We need to regenerate the checksum.
|
||||
*/
|
||||
if (drr->drr_type != DRR_BEGIN) {
|
||||
bzero(&drr->drr_u.drr_checksum.drr_checksum,
|
||||
sizeof (drr->drr_u.drr_checksum.drr_checksum));
|
||||
}
|
||||
|
||||
uint64_t payload_size = 0;
|
||||
switch (drr->drr_type) {
|
||||
case DRR_BEGIN:
|
||||
{
|
||||
struct drr_begin *drrb = &drr->drr_u.drr_begin;
|
||||
int fflags;
|
||||
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
|
||||
|
||||
assert(drrb->drr_magic == DMU_BACKUP_MAGIC);
|
||||
|
||||
/* clear the DEDUP feature flag for this stream */
|
||||
fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
|
||||
fflags &= ~(DMU_BACKUP_FEATURE_DEDUP |
|
||||
DMU_BACKUP_FEATURE_DEDUPPROPS);
|
||||
DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
|
||||
|
||||
int sz = drr->drr_payloadlen;
|
||||
if (sz != 0) {
|
||||
if (sz > bufsz) {
|
||||
free(buf);
|
||||
buf = safe_calloc(sz);
|
||||
bufsz = sz;
|
||||
}
|
||||
(void) sfread(buf, sz, ofp);
|
||||
}
|
||||
payload_size = sz;
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_END:
|
||||
{
|
||||
struct drr_end *drre = &drr->drr_u.drr_end;
|
||||
/*
|
||||
* Use the recalculated checksum, unless this is
|
||||
* the END record of a stream package, which has
|
||||
* no checksum.
|
||||
*/
|
||||
if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
|
||||
drre->drr_checksum = stream_cksum;
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_OBJECT:
|
||||
{
|
||||
struct drr_object *drro = &drr->drr_u.drr_object;
|
||||
|
||||
if (drro->drr_bonuslen > 0) {
|
||||
payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
|
||||
(void) sfread(buf, payload_size, ofp);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_SPILL:
|
||||
{
|
||||
struct drr_spill *drrs = &drr->drr_u.drr_spill;
|
||||
payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
||||
(void) sfread(buf, payload_size, ofp);
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_WRITE_BYREF:
|
||||
{
|
||||
struct drr_write_byref drrwb =
|
||||
drr->drr_u.drr_write_byref;
|
||||
|
||||
num_write_byref_records++;
|
||||
|
||||
/*
|
||||
* Look up in hash table by drrwb->drr_refguid,
|
||||
* drr_refobject, drr_refoffset. Replace this
|
||||
* record with the found WRITE record, but with
|
||||
* drr_object,drr_offset,drr_toguid replaced with ours.
|
||||
*/
|
||||
uint64_t stream_offset;
|
||||
rdt_lookup(&rdt, drrwb.drr_refguid,
|
||||
drrwb.drr_refobject, drrwb.drr_refoffset,
|
||||
&stream_offset);
|
||||
|
||||
spread(infd, drr, sizeof (*drr), stream_offset);
|
||||
|
||||
assert(drr->drr_type == DRR_WRITE);
|
||||
struct drr_write *drrw = &drr->drr_u.drr_write;
|
||||
assert(drrw->drr_toguid == drrwb.drr_refguid);
|
||||
assert(drrw->drr_object == drrwb.drr_refobject);
|
||||
assert(drrw->drr_offset == drrwb.drr_refoffset);
|
||||
|
||||
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
spread(infd, buf, payload_size,
|
||||
stream_offset + sizeof (*drr));
|
||||
|
||||
drrw->drr_toguid = drrwb.drr_toguid;
|
||||
drrw->drr_object = drrwb.drr_object;
|
||||
drrw->drr_offset = drrwb.drr_offset;
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_WRITE:
|
||||
{
|
||||
struct drr_write *drrw = &drr->drr_u.drr_write;
|
||||
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
(void) sfread(buf, payload_size, ofp);
|
||||
|
||||
rdt_insert(&rdt, drrw->drr_toguid,
|
||||
drrw->drr_object, drrw->drr_offset, offset);
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
{
|
||||
struct drr_write_embedded *drrwe =
|
||||
&drr->drr_u.drr_write_embedded;
|
||||
payload_size =
|
||||
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
|
||||
(void) sfread(buf, payload_size, ofp);
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_FREEOBJECTS:
|
||||
case DRR_FREE:
|
||||
case DRR_OBJECT_RANGE:
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) fprintf(stderr, "INVALID record type 0x%x\n",
|
||||
drr->drr_type);
|
||||
/* should never happen, so assert */
|
||||
assert(B_FALSE);
|
||||
}
|
||||
|
||||
if (feof(ofp)) {
|
||||
fprintf(stderr, "Error: unexpected end-of-file\n");
|
||||
exit(1);
|
||||
}
|
||||
if (ferror(ofp)) {
|
||||
fprintf(stderr, "Error while reading file: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to recalculate the checksum, and it needs to be
|
||||
* initially zero to do that. BEGIN records don't have
|
||||
* a checksum.
|
||||
*/
|
||||
if (drr->drr_type != DRR_BEGIN) {
|
||||
bzero(&drr->drr_u.drr_checksum.drr_checksum,
|
||||
sizeof (drr->drr_u.drr_checksum.drr_checksum));
|
||||
}
|
||||
if (dump_record(drr, buf, payload_size,
|
||||
&stream_cksum, outfd) != 0)
|
||||
break;
|
||||
if (drr->drr_type == DRR_END) {
|
||||
/*
|
||||
* Typically the END record is either the last
|
||||
* thing in the stream, or it is followed
|
||||
* by a BEGIN record (which also zeros the checksum).
|
||||
* However, a stream package ends with two END
|
||||
* records. The last END record's checksum starts
|
||||
* from zero.
|
||||
*/
|
||||
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
|
||||
}
|
||||
offset = ftell(ofp);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
char mem_str[16];
|
||||
zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t),
|
||||
mem_str, sizeof (mem_str));
|
||||
fprintf(stderr, "converted stream with %llu total records, "
|
||||
"including %llu dedup records, using %sB memory.\n",
|
||||
(long long)num_records,
|
||||
(long long)num_write_byref_records,
|
||||
mem_str);
|
||||
}
|
||||
|
||||
umem_cache_destroy(rdt.ddecache);
|
||||
free(rdt.redup_hash_array);
|
||||
free(buf);
|
||||
(void) fclose(ofp);
|
||||
}
|
||||
|
||||
int
|
||||
zstream_do_redup(int argc, char *argv[])
|
||||
{
|
||||
boolean_t verbose = B_FALSE;
|
||||
char c;
|
||||
|
||||
while ((c = getopt(argc, argv, "v")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
verbose = B_TRUE;
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, "invalid option '%c'\n",
|
||||
optopt);
|
||||
zstream_usage();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc != 1)
|
||||
zstream_usage();
|
||||
|
||||
const char *filename = argv[0];
|
||||
|
||||
if (isatty(STDOUT_FILENO)) {
|
||||
(void) fprintf(stderr,
|
||||
"Error: Stream can not be written to a terminal.\n"
|
||||
"You must redirect standard output.\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
int fd = open(filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
(void) fprintf(stderr,
|
||||
"Error while opening file '%s': %s\n",
|
||||
filename, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fletcher_4_init();
|
||||
zfs_redup_stream(fd, STDOUT_FILENO, verbose);
|
||||
fletcher_4_fini();
|
||||
|
||||
close(fd);
|
||||
|
||||
return (0);
|
||||
}
|
||||
Reference in New Issue
Block a user