mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 03:30:34 +03:00
d31a7cb4fa
Coverity has long complained about the checksum being uninitialized if
an END record is processed before its BEGIN record. This should not
happen, but there was no code to check for it. I had left this unfixed
since it was a low priority issue, but then
9f4ede63d2
added another instance of this.
I am making an effort to "hold the line" to keep new coverity defect
reports from going unaddressed, so I find myself forced to fix this much
earlier than I had originally planned to address it.
The solution is to maintain a counter and a flag. Then use VERIFY
statements to verify the following runtime constraints:
* Every record either has a corresponding BEGIN record, is a BEGIN
record or is the end of stream END record for replication streams.
* BEGIN records cannot be nested. i.e. There must be an END record
before another BEGIN record may be seen.
Failure to meet these constraints will cause the program to exit.
This is sufficient to ensure that the checksum is never accessed when
uninitialized.
Reported-by: Coverity (CID 1524578)
Reported-by: Coverity (CID 1524633)
Reported-by: Coverity (CID 1527295)
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Damian Szuberski <szuberskidamian@gmail.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #14176
487 lines
12 KiB
C
487 lines
12 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* This file and its contents are supplied under the terms of the
|
|
* Common Development and Distribution License ("CDDL"), version 1.0.
|
|
* You may only use this file in accordance with the terms of version
|
|
* 1.0 of the CDDL.
|
|
*
|
|
* A full copy of the text of the CDDL should have accompanied this
|
|
* source. A copy of the CDDL is also available via the Internet at
|
|
* http://www.illumos.org/license/CDDL.
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2020 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <cityhash.h>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <libzfs.h>
|
|
#include <libzutil.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <umem.h>
|
|
#include <unistd.h>
|
|
#include <sys/debug.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/zfs_ioctl.h>
|
|
#include <sys/zio_checksum.h>
|
|
#include "zfs_fletcher.h"
|
|
#include "zstream.h"
|
|
|
|
|
|
#define MAX_RDT_PHYSMEM_PERCENT 20
|
|
#define SMALLEST_POSSIBLE_MAX_RDT_MB 128
|
|
|
|
typedef struct redup_entry {
|
|
struct redup_entry *rde_next;
|
|
uint64_t rde_guid;
|
|
uint64_t rde_object;
|
|
uint64_t rde_offset;
|
|
uint64_t rde_stream_offset;
|
|
} redup_entry_t;
|
|
|
|
typedef struct redup_table {
|
|
redup_entry_t **redup_hash_array;
|
|
umem_cache_t *ddecache;
|
|
uint64_t ddt_count;
|
|
int numhashbits;
|
|
} redup_table_t;
|
|
|
|
int
|
|
highbit64(uint64_t i)
|
|
{
|
|
if (i == 0)
|
|
return (0);
|
|
|
|
return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
|
|
}
|
|
|
|
void *
|
|
safe_calloc(size_t n)
|
|
{
|
|
void *rv = calloc(1, n);
|
|
if (rv == NULL) {
|
|
fprintf(stderr,
|
|
"Error: could not allocate %u bytes of memory\n",
|
|
(int)n);
|
|
exit(1);
|
|
}
|
|
return (rv);
|
|
}
|
|
|
|
/*
|
|
* Safe version of fread(), exits on error.
|
|
*/
|
|
int
|
|
sfread(void *buf, size_t size, FILE *fp)
|
|
{
|
|
int rv = fread(buf, size, 1, fp);
|
|
if (rv == 0 && ferror(fp)) {
|
|
(void) fprintf(stderr, "Error while reading file: %s\n",
|
|
strerror(errno));
|
|
exit(1);
|
|
}
|
|
return (rv);
|
|
}
|
|
|
|
/*
|
|
* Safe version of pread(), exits on error.
|
|
*/
|
|
static void
|
|
spread(int fd, void *buf, size_t count, off_t offset)
|
|
{
|
|
ssize_t err = pread(fd, buf, count, offset);
|
|
if (err == -1) {
|
|
(void) fprintf(stderr,
|
|
"Error while reading file: %s\n",
|
|
strerror(errno));
|
|
exit(1);
|
|
} else if (err != count) {
|
|
(void) fprintf(stderr,
|
|
"Error while reading file: short read\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
static int
|
|
dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
|
|
zio_cksum_t *zc, int outfd)
|
|
{
|
|
assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
|
|
== sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
|
|
fletcher_4_incremental_native(drr,
|
|
offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
|
|
if (drr->drr_type != DRR_BEGIN) {
|
|
assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
|
|
drr_checksum.drr_checksum));
|
|
drr->drr_u.drr_checksum.drr_checksum = *zc;
|
|
}
|
|
fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
|
|
sizeof (zio_cksum_t), zc);
|
|
if (write(outfd, drr, sizeof (*drr)) == -1)
|
|
return (errno);
|
|
if (payload_len != 0) {
|
|
fletcher_4_incremental_native(payload, payload_len, zc);
|
|
if (write(outfd, payload, payload_len) == -1)
|
|
return (errno);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
rdt_insert(redup_table_t *rdt,
|
|
uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
|
|
{
|
|
uint64_t ch = cityhash4(guid, object, offset, 0);
|
|
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
|
|
redup_entry_t **rdepp;
|
|
|
|
rdepp = &(rdt->redup_hash_array[hashcode]);
|
|
redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL);
|
|
rde->rde_next = *rdepp;
|
|
rde->rde_guid = guid;
|
|
rde->rde_object = object;
|
|
rde->rde_offset = offset;
|
|
rde->rde_stream_offset = stream_offset;
|
|
*rdepp = rde;
|
|
rdt->ddt_count++;
|
|
}
|
|
|
|
static void
|
|
rdt_lookup(redup_table_t *rdt,
|
|
uint64_t guid, uint64_t object, uint64_t offset,
|
|
uint64_t *stream_offsetp)
|
|
{
|
|
uint64_t ch = cityhash4(guid, object, offset, 0);
|
|
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
|
|
|
|
for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
|
|
rde != NULL; rde = rde->rde_next) {
|
|
if (rde->rde_guid == guid &&
|
|
rde->rde_object == object &&
|
|
rde->rde_offset == offset) {
|
|
*stream_offsetp = rde->rde_stream_offset;
|
|
return;
|
|
}
|
|
}
|
|
assert(!"could not find expected redup table entry");
|
|
}
|
|
|
|
/*
|
|
* Convert a dedup stream (generated by "zfs send -D") to a
|
|
* non-deduplicated stream. The entire infd will be converted, including
|
|
* any substreams in a stream package (generated by "zfs send -RD"). The
|
|
* infd must be seekable.
|
|
*/
|
|
static void
|
|
zfs_redup_stream(int infd, int outfd, boolean_t verbose)
|
|
{
|
|
int bufsz = SPA_MAXBLOCKSIZE;
|
|
dmu_replay_record_t thedrr = { 0 };
|
|
dmu_replay_record_t *drr = &thedrr;
|
|
redup_table_t rdt;
|
|
zio_cksum_t stream_cksum;
|
|
uint64_t numbuckets;
|
|
uint64_t num_records = 0;
|
|
uint64_t num_write_byref_records = 0;
|
|
|
|
#ifdef _ILP32
|
|
uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20;
|
|
#else
|
|
uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
|
|
uint64_t max_rde_size =
|
|
MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100,
|
|
SMALLEST_POSSIBLE_MAX_RDT_MB << 20);
|
|
#endif
|
|
|
|
numbuckets = max_rde_size / (sizeof (redup_entry_t));
|
|
|
|
/*
|
|
* numbuckets must be a power of 2. Increase number to
|
|
* a power of 2 if necessary.
|
|
*/
|
|
if (!ISP2(numbuckets))
|
|
numbuckets = 1ULL << highbit64(numbuckets);
|
|
|
|
rdt.redup_hash_array =
|
|
safe_calloc(numbuckets * sizeof (redup_entry_t *));
|
|
rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0,
|
|
NULL, NULL, NULL, NULL, NULL, 0);
|
|
rdt.numhashbits = highbit64(numbuckets) - 1;
|
|
rdt.ddt_count = 0;
|
|
|
|
char *buf = safe_calloc(bufsz);
|
|
FILE *ofp = fdopen(infd, "r");
|
|
long offset = ftell(ofp);
|
|
int begin = 0;
|
|
boolean_t seen = B_FALSE;
|
|
while (sfread(drr, sizeof (*drr), ofp) != 0) {
|
|
num_records++;
|
|
|
|
/*
|
|
* We need to regenerate the checksum.
|
|
*/
|
|
if (drr->drr_type != DRR_BEGIN) {
|
|
memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
|
|
sizeof (drr->drr_u.drr_checksum.drr_checksum));
|
|
}
|
|
|
|
uint64_t payload_size = 0;
|
|
switch (drr->drr_type) {
|
|
case DRR_BEGIN:
|
|
{
|
|
struct drr_begin *drrb = &drr->drr_u.drr_begin;
|
|
int fflags;
|
|
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
|
|
VERIFY0(begin++);
|
|
seen = B_TRUE;
|
|
|
|
assert(drrb->drr_magic == DMU_BACKUP_MAGIC);
|
|
|
|
/* clear the DEDUP feature flag for this stream */
|
|
fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
|
|
fflags &= ~(DMU_BACKUP_FEATURE_DEDUP |
|
|
DMU_BACKUP_FEATURE_DEDUPPROPS);
|
|
/* cppcheck-suppress syntaxError */
|
|
DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
|
|
|
|
int sz = drr->drr_payloadlen;
|
|
if (sz != 0) {
|
|
if (sz > bufsz) {
|
|
free(buf);
|
|
buf = safe_calloc(sz);
|
|
bufsz = sz;
|
|
}
|
|
(void) sfread(buf, sz, ofp);
|
|
}
|
|
payload_size = sz;
|
|
break;
|
|
}
|
|
|
|
case DRR_END:
|
|
{
|
|
struct drr_end *drre = &drr->drr_u.drr_end;
|
|
/*
|
|
* We would prefer to just check --begin == 0, but
|
|
* replication streams have an end of stream END
|
|
* record, so we must avoid tripping it.
|
|
*/
|
|
VERIFY3B(seen, ==, B_TRUE);
|
|
begin--;
|
|
/*
|
|
* Use the recalculated checksum, unless this is
|
|
* the END record of a stream package, which has
|
|
* no checksum.
|
|
*/
|
|
if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
|
|
drre->drr_checksum = stream_cksum;
|
|
break;
|
|
}
|
|
|
|
case DRR_OBJECT:
|
|
{
|
|
struct drr_object *drro = &drr->drr_u.drr_object;
|
|
VERIFY3S(begin, ==, 1);
|
|
|
|
if (drro->drr_bonuslen > 0) {
|
|
payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
|
|
(void) sfread(buf, payload_size, ofp);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case DRR_SPILL:
|
|
{
|
|
struct drr_spill *drrs = &drr->drr_u.drr_spill;
|
|
VERIFY3S(begin, ==, 1);
|
|
payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
|
(void) sfread(buf, payload_size, ofp);
|
|
break;
|
|
}
|
|
|
|
case DRR_WRITE_BYREF:
|
|
{
|
|
struct drr_write_byref drrwb =
|
|
drr->drr_u.drr_write_byref;
|
|
VERIFY3S(begin, ==, 1);
|
|
|
|
num_write_byref_records++;
|
|
|
|
/*
|
|
* Look up in hash table by drrwb->drr_refguid,
|
|
* drr_refobject, drr_refoffset. Replace this
|
|
* record with the found WRITE record, but with
|
|
* drr_object,drr_offset,drr_toguid replaced with ours.
|
|
*/
|
|
uint64_t stream_offset = 0;
|
|
rdt_lookup(&rdt, drrwb.drr_refguid,
|
|
drrwb.drr_refobject, drrwb.drr_refoffset,
|
|
&stream_offset);
|
|
|
|
spread(infd, drr, sizeof (*drr), stream_offset);
|
|
|
|
assert(drr->drr_type == DRR_WRITE);
|
|
struct drr_write *drrw = &drr->drr_u.drr_write;
|
|
assert(drrw->drr_toguid == drrwb.drr_refguid);
|
|
assert(drrw->drr_object == drrwb.drr_refobject);
|
|
assert(drrw->drr_offset == drrwb.drr_refoffset);
|
|
|
|
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
|
spread(infd, buf, payload_size,
|
|
stream_offset + sizeof (*drr));
|
|
|
|
drrw->drr_toguid = drrwb.drr_toguid;
|
|
drrw->drr_object = drrwb.drr_object;
|
|
drrw->drr_offset = drrwb.drr_offset;
|
|
break;
|
|
}
|
|
|
|
case DRR_WRITE:
|
|
{
|
|
struct drr_write *drrw = &drr->drr_u.drr_write;
|
|
VERIFY3S(begin, ==, 1);
|
|
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
|
(void) sfread(buf, payload_size, ofp);
|
|
|
|
rdt_insert(&rdt, drrw->drr_toguid,
|
|
drrw->drr_object, drrw->drr_offset, offset);
|
|
break;
|
|
}
|
|
|
|
case DRR_WRITE_EMBEDDED:
|
|
{
|
|
struct drr_write_embedded *drrwe =
|
|
&drr->drr_u.drr_write_embedded;
|
|
VERIFY3S(begin, ==, 1);
|
|
payload_size =
|
|
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
|
|
(void) sfread(buf, payload_size, ofp);
|
|
break;
|
|
}
|
|
|
|
case DRR_FREEOBJECTS:
|
|
case DRR_FREE:
|
|
case DRR_OBJECT_RANGE:
|
|
VERIFY3S(begin, ==, 1);
|
|
break;
|
|
|
|
default:
|
|
(void) fprintf(stderr, "INVALID record type 0x%x\n",
|
|
drr->drr_type);
|
|
/* should never happen, so assert */
|
|
assert(B_FALSE);
|
|
}
|
|
|
|
if (feof(ofp)) {
|
|
fprintf(stderr, "Error: unexpected end-of-file\n");
|
|
exit(1);
|
|
}
|
|
if (ferror(ofp)) {
|
|
fprintf(stderr, "Error while reading file: %s\n",
|
|
strerror(errno));
|
|
exit(1);
|
|
}
|
|
|
|
/*
|
|
* We need to recalculate the checksum, and it needs to be
|
|
* initially zero to do that. BEGIN records don't have
|
|
* a checksum.
|
|
*/
|
|
if (drr->drr_type != DRR_BEGIN) {
|
|
memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
|
|
sizeof (drr->drr_u.drr_checksum.drr_checksum));
|
|
}
|
|
if (dump_record(drr, buf, payload_size,
|
|
&stream_cksum, outfd) != 0)
|
|
break;
|
|
if (drr->drr_type == DRR_END) {
|
|
/*
|
|
* Typically the END record is either the last
|
|
* thing in the stream, or it is followed
|
|
* by a BEGIN record (which also zeros the checksum).
|
|
* However, a stream package ends with two END
|
|
* records. The last END record's checksum starts
|
|
* from zero.
|
|
*/
|
|
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
|
|
}
|
|
offset = ftell(ofp);
|
|
}
|
|
|
|
if (verbose) {
|
|
char mem_str[16];
|
|
zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t),
|
|
mem_str, sizeof (mem_str));
|
|
fprintf(stderr, "converted stream with %llu total records, "
|
|
"including %llu dedup records, using %sB memory.\n",
|
|
(long long)num_records,
|
|
(long long)num_write_byref_records,
|
|
mem_str);
|
|
}
|
|
|
|
umem_cache_destroy(rdt.ddecache);
|
|
free(rdt.redup_hash_array);
|
|
free(buf);
|
|
(void) fclose(ofp);
|
|
}
|
|
|
|
int
|
|
zstream_do_redup(int argc, char *argv[])
|
|
{
|
|
boolean_t verbose = B_FALSE;
|
|
int c;
|
|
|
|
while ((c = getopt(argc, argv, "v")) != -1) {
|
|
switch (c) {
|
|
case 'v':
|
|
verbose = B_TRUE;
|
|
break;
|
|
case '?':
|
|
(void) fprintf(stderr, "invalid option '%c'\n",
|
|
optopt);
|
|
zstream_usage();
|
|
break;
|
|
}
|
|
}
|
|
|
|
argc -= optind;
|
|
argv += optind;
|
|
|
|
if (argc != 1)
|
|
zstream_usage();
|
|
|
|
const char *filename = argv[0];
|
|
|
|
if (isatty(STDOUT_FILENO)) {
|
|
(void) fprintf(stderr,
|
|
"Error: Stream can not be written to a terminal.\n"
|
|
"You must redirect standard output.\n");
|
|
return (1);
|
|
}
|
|
|
|
int fd = open(filename, O_RDONLY);
|
|
if (fd == -1) {
|
|
(void) fprintf(stderr,
|
|
"Error while opening file '%s': %s\n",
|
|
filename, strerror(errno));
|
|
exit(1);
|
|
}
|
|
|
|
fletcher_4_init();
|
|
zfs_redup_stream(fd, STDOUT_FILENO, verbose);
|
|
fletcher_4_fini();
|
|
|
|
close(fd);
|
|
|
|
return (0);
|
|
}
|