mirror_zfs/cmd/zstream/zstream_redup.c
Rob N d0aa9dbccf
Use memset to zero stack allocations containing unions
C99 6.7.8.17 says that when an undesignated initialiser is used, only
the first element of a union is initialised. If the first element is not
the largest within the union, how the remaining space is initialised is
up to the compiler.

GCC extends the initialiser to the entire union, while Clang treats the
remainder as padding, and so initialises according to whatever
automatic/implicit initialisation rules are currently active.

When Linux is compiled with CONFIG_INIT_STACK_ALL_PATTERN,
-ftrivial-auto-var-init=pattern is added to the kernel CFLAGS. This flag
sets the policy for automatic/implicit initialisation of variables on
the stack.

Taken together, this means that when compiling under
CONFIG_INIT_STACK_ALL_PATTERN on Clang, the "zero" initialiser will only
zero the first element in a union, and the rest will be filled with a
pattern. This is significant for aes_ctx_t, which in
aes_encrypt_atomic() and aes_decrypt_atomic() is initialised to zero,
but then used as a gcm_ctx_t, which is the fifth element in the union,
and thus gets pattern initialisation. Later, it's assumed to be zero,
resulting in a hang.

As confusing and undiscoverable as it is, by the spec, we are at fault
when we initialise a structure containing a union with the zero
initializer. As such, this commit replaces these uses with an explicit
memset(0).

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Tino Reichardt <milky-zfs@mcmilk.de>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #16135
Closes #16206
2024-05-24 19:00:29 -07:00

492 lines
12 KiB
C

/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2020 by Delphix. All rights reserved.
*/
#include <assert.h>
#include <cityhash.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libzfs.h>
#include <libzutil.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <umem.h>
#include <unistd.h>
#include <sys/debug.h>
#include <sys/stat.h>
#include <sys/zfs_ioctl.h>
#include <sys/zio_checksum.h>
#include "zfs_fletcher.h"
#include "zstream.h"
#define MAX_RDT_PHYSMEM_PERCENT 20
#define SMALLEST_POSSIBLE_MAX_RDT_MB 128
typedef struct redup_entry {
struct redup_entry *rde_next;
uint64_t rde_guid;
uint64_t rde_object;
uint64_t rde_offset;
uint64_t rde_stream_offset;
} redup_entry_t;
typedef struct redup_table {
redup_entry_t **redup_hash_array;
umem_cache_t *ddecache;
uint64_t ddt_count;
int numhashbits;
} redup_table_t;
int
highbit64(uint64_t i)
{
if (i == 0)
return (0);
return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
}
void *
safe_calloc(size_t n)
{
void *rv = calloc(1, n);
if (rv == NULL) {
fprintf(stderr,
"Error: could not allocate %u bytes of memory\n",
(int)n);
exit(1);
}
return (rv);
}
/*
* Safe version of fread(), exits on error.
*/
int
sfread(void *buf, size_t size, FILE *fp)
{
int rv = fread(buf, size, 1, fp);
if (rv == 0 && ferror(fp)) {
(void) fprintf(stderr, "Error while reading file: %s\n",
strerror(errno));
exit(1);
}
return (rv);
}
/*
* Safe version of pread(), exits on error.
*/
static void
spread(int fd, void *buf, size_t count, off_t offset)
{
ssize_t err = pread(fd, buf, count, offset);
if (err == -1) {
(void) fprintf(stderr,
"Error while reading file: %s\n",
strerror(errno));
exit(1);
} else if (err != count) {
(void) fprintf(stderr,
"Error while reading file: short read\n");
exit(1);
}
}
static int
dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
zio_cksum_t *zc, int outfd)
{
assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
== sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
fletcher_4_incremental_native(drr,
offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
if (drr->drr_type != DRR_BEGIN) {
assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
drr_checksum.drr_checksum));
drr->drr_u.drr_checksum.drr_checksum = *zc;
}
fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
sizeof (zio_cksum_t), zc);
if (write(outfd, drr, sizeof (*drr)) == -1)
return (errno);
if (payload_len != 0) {
fletcher_4_incremental_native(payload, payload_len, zc);
if (write(outfd, payload, payload_len) == -1)
return (errno);
}
return (0);
}
static void
rdt_insert(redup_table_t *rdt,
uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
{
uint64_t ch = cityhash4(guid, object, offset, 0);
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
redup_entry_t **rdepp;
rdepp = &(rdt->redup_hash_array[hashcode]);
redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL);
rde->rde_next = *rdepp;
rde->rde_guid = guid;
rde->rde_object = object;
rde->rde_offset = offset;
rde->rde_stream_offset = stream_offset;
*rdepp = rde;
rdt->ddt_count++;
}
static void
rdt_lookup(redup_table_t *rdt,
uint64_t guid, uint64_t object, uint64_t offset,
uint64_t *stream_offsetp)
{
uint64_t ch = cityhash4(guid, object, offset, 0);
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
rde != NULL; rde = rde->rde_next) {
if (rde->rde_guid == guid &&
rde->rde_object == object &&
rde->rde_offset == offset) {
*stream_offsetp = rde->rde_stream_offset;
return;
}
}
assert(!"could not find expected redup table entry");
}
/*
* Convert a dedup stream (generated by "zfs send -D") to a
* non-deduplicated stream. The entire infd will be converted, including
* any substreams in a stream package (generated by "zfs send -RD"). The
* infd must be seekable.
*/
static void
zfs_redup_stream(int infd, int outfd, boolean_t verbose)
{
int bufsz = SPA_MAXBLOCKSIZE;
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
redup_table_t rdt;
zio_cksum_t stream_cksum;
uint64_t numbuckets;
uint64_t num_records = 0;
uint64_t num_write_byref_records = 0;
memset(&thedrr, 0, sizeof (dmu_replay_record_t));
#ifdef _ILP32
uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20;
#else
uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
uint64_t max_rde_size =
MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100,
SMALLEST_POSSIBLE_MAX_RDT_MB << 20);
#endif
numbuckets = max_rde_size / (sizeof (redup_entry_t));
/*
* numbuckets must be a power of 2. Increase number to
* a power of 2 if necessary.
*/
if (!ISP2(numbuckets))
numbuckets = 1ULL << highbit64(numbuckets);
rdt.redup_hash_array =
safe_calloc(numbuckets * sizeof (redup_entry_t *));
rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0,
NULL, NULL, NULL, NULL, NULL, 0);
rdt.numhashbits = highbit64(numbuckets) - 1;
rdt.ddt_count = 0;
char *buf = safe_calloc(bufsz);
FILE *ofp = fdopen(infd, "r");
long offset = ftell(ofp);
int begin = 0;
boolean_t seen = B_FALSE;
while (sfread(drr, sizeof (*drr), ofp) != 0) {
num_records++;
/*
* We need to regenerate the checksum.
*/
if (drr->drr_type != DRR_BEGIN) {
memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
sizeof (drr->drr_u.drr_checksum.drr_checksum));
}
uint64_t payload_size = 0;
switch (drr->drr_type) {
case DRR_BEGIN:
{
struct drr_begin *drrb = &drr->drr_u.drr_begin;
int fflags;
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
VERIFY0(begin++);
seen = B_TRUE;
assert(drrb->drr_magic == DMU_BACKUP_MAGIC);
/* clear the DEDUP feature flag for this stream */
fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
fflags &= ~(DMU_BACKUP_FEATURE_DEDUP |
DMU_BACKUP_FEATURE_DEDUPPROPS);
/* cppcheck-suppress syntaxError */
DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
uint32_t sz = drr->drr_payloadlen;
VERIFY3U(sz, <=, 1U << 28);
if (sz != 0) {
if (sz > bufsz) {
free(buf);
buf = safe_calloc(sz);
bufsz = sz;
}
(void) sfread(buf, sz, ofp);
}
payload_size = sz;
break;
}
case DRR_END:
{
struct drr_end *drre = &drr->drr_u.drr_end;
/*
* We would prefer to just check --begin == 0, but
* replication streams have an end of stream END
* record, so we must avoid tripping it.
*/
VERIFY3B(seen, ==, B_TRUE);
begin--;
/*
* Use the recalculated checksum, unless this is
* the END record of a stream package, which has
* no checksum.
*/
if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
drre->drr_checksum = stream_cksum;
break;
}
case DRR_OBJECT:
{
struct drr_object *drro = &drr->drr_u.drr_object;
VERIFY3S(begin, ==, 1);
if (drro->drr_bonuslen > 0) {
payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
(void) sfread(buf, payload_size, ofp);
}
break;
}
case DRR_SPILL:
{
struct drr_spill *drrs = &drr->drr_u.drr_spill;
VERIFY3S(begin, ==, 1);
payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
(void) sfread(buf, payload_size, ofp);
break;
}
case DRR_WRITE_BYREF:
{
struct drr_write_byref drrwb =
drr->drr_u.drr_write_byref;
VERIFY3S(begin, ==, 1);
num_write_byref_records++;
/*
* Look up in hash table by drrwb->drr_refguid,
* drr_refobject, drr_refoffset. Replace this
* record with the found WRITE record, but with
* drr_object,drr_offset,drr_toguid replaced with ours.
*/
uint64_t stream_offset = 0;
rdt_lookup(&rdt, drrwb.drr_refguid,
drrwb.drr_refobject, drrwb.drr_refoffset,
&stream_offset);
spread(infd, drr, sizeof (*drr), stream_offset);
assert(drr->drr_type == DRR_WRITE);
struct drr_write *drrw = &drr->drr_u.drr_write;
assert(drrw->drr_toguid == drrwb.drr_refguid);
assert(drrw->drr_object == drrwb.drr_refobject);
assert(drrw->drr_offset == drrwb.drr_refoffset);
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
spread(infd, buf, payload_size,
stream_offset + sizeof (*drr));
drrw->drr_toguid = drrwb.drr_toguid;
drrw->drr_object = drrwb.drr_object;
drrw->drr_offset = drrwb.drr_offset;
break;
}
case DRR_WRITE:
{
struct drr_write *drrw = &drr->drr_u.drr_write;
VERIFY3S(begin, ==, 1);
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
(void) sfread(buf, payload_size, ofp);
rdt_insert(&rdt, drrw->drr_toguid,
drrw->drr_object, drrw->drr_offset, offset);
break;
}
case DRR_WRITE_EMBEDDED:
{
struct drr_write_embedded *drrwe =
&drr->drr_u.drr_write_embedded;
VERIFY3S(begin, ==, 1);
payload_size =
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
(void) sfread(buf, payload_size, ofp);
break;
}
case DRR_FREEOBJECTS:
case DRR_FREE:
case DRR_OBJECT_RANGE:
VERIFY3S(begin, ==, 1);
break;
default:
(void) fprintf(stderr, "INVALID record type 0x%x\n",
drr->drr_type);
/* should never happen, so assert */
assert(B_FALSE);
}
if (feof(ofp)) {
fprintf(stderr, "Error: unexpected end-of-file\n");
exit(1);
}
if (ferror(ofp)) {
fprintf(stderr, "Error while reading file: %s\n",
strerror(errno));
exit(1);
}
/*
* We need to recalculate the checksum, and it needs to be
* initially zero to do that. BEGIN records don't have
* a checksum.
*/
if (drr->drr_type != DRR_BEGIN) {
memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
sizeof (drr->drr_u.drr_checksum.drr_checksum));
}
if (dump_record(drr, buf, payload_size,
&stream_cksum, outfd) != 0)
break;
if (drr->drr_type == DRR_END) {
/*
* Typically the END record is either the last
* thing in the stream, or it is followed
* by a BEGIN record (which also zeros the checksum).
* However, a stream package ends with two END
* records. The last END record's checksum starts
* from zero.
*/
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
}
offset = ftell(ofp);
}
if (verbose) {
char mem_str[16];
zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t),
mem_str, sizeof (mem_str));
fprintf(stderr, "converted stream with %llu total records, "
"including %llu dedup records, using %sB memory.\n",
(long long)num_records,
(long long)num_write_byref_records,
mem_str);
}
umem_cache_destroy(rdt.ddecache);
free(rdt.redup_hash_array);
free(buf);
(void) fclose(ofp);
}
int
zstream_do_redup(int argc, char *argv[])
{
boolean_t verbose = B_FALSE;
int c;
while ((c = getopt(argc, argv, "v")) != -1) {
switch (c) {
case 'v':
verbose = B_TRUE;
break;
case '?':
(void) fprintf(stderr, "invalid option '%c'\n",
optopt);
zstream_usage();
break;
}
}
argc -= optind;
argv += optind;
if (argc != 1)
zstream_usage();
const char *filename = argv[0];
if (isatty(STDOUT_FILENO)) {
(void) fprintf(stderr,
"Error: Stream can not be written to a terminal.\n"
"You must redirect standard output.\n");
return (1);
}
int fd = open(filename, O_RDONLY);
if (fd == -1) {
(void) fprintf(stderr,
"Error while opening file '%s': %s\n",
filename, strerror(errno));
exit(1);
}
fletcher_4_init();
zfs_redup_stream(fd, STDOUT_FILENO, verbose);
fletcher_4_fini();
close(fd);
return (0);
}