Dump unique configurations and Uberblocks in zdb -lu

For zdb -l, detect when the configuration nvlist in some label l (l>0)
is the same as a configuration already dumped.  If so, do not dump it.

Make a similar check when dumping Uberblocks for zdb -lu.  Check whether
a label already dumped contains an identical Uberblock.  If so, do not
dump the Uberblock.

When dumping a configuration or Uberblock, state which labels it is
found in (0-3), for example: labels = 1 2 3

Detecting redundant uberblocks or configurations is accomplished by
calculating checksums of the uberblocks and the packed nvlists
containing the configuration.

If there is nothing unique to be dumped for a label (ie the
configuration and uberblocks have checksums matching those already
dumped) print nothing for that label.

With additional l's or u's, increase verbosity as follows:

-l      Dump each unique configuration only once.
        Indicate which labels it appears in.
-ll     In addition, dump label space usage stats.
-lll    Dump every configuration, unique or not.

-u      Dump each unique, valid, uberblock only once.
        Indicate which labels it appears in.
-uu     In addition, state which slots are invalid.
-uuu    Dump every uberblock, unique or not.
-uuuu   Dump the uberblock blockpointer (used to be -uuu)

Make exit values conform to the manual page.  Failing to unpack a
configuration nvlist is considered an error, as well as failing to open
or read from the device.

Add three tests, zdb_00{3,4,5}_pos to verify the above functionality.

An example of the output:
	------------------------------------
	LABEL 0
	------------------------------------
	    version: 5000
	    name: 'pool'
	    state: 1
	    txg: 880
	    < ... redacted ... >
	    features_for_read:
		com.delphix:hole_birth
		com.delphix:embedded_data
	    labels = 0
	    Uberblock[0]
		magic = 0000000000bab10c
		version = 5000
		txg = 0
		guid_sum = 3038694082047428541
		timestamp = 1487715500 UTC = Tue Feb 21 14:18:20 2017
		labels = 0 1 2 3
	    Uberblock[4]
		magic = 0000000000bab10c
		version = 5000
		txg = 772
		guid_sum = 9045970794941528051
		timestamp = 1487727291 UTC = Tue Feb 21 17:34:51 2017
		labels = 0
	    < ... redacted ... >
	------------------------------------
	LABEL 1
	------------------------------------
	    version: 5000
	    name: 'pool'
	    state: 1
	    txg: 14
	    < ... redacted ... >
		com.delphix:embedded_data
	    labels = 1 2 3
	    Uberblock[4]
		magic = 0000000000bab10c
		version = 5000
		txg = 4
		guid_sum = 7793930272573252584
		timestamp = 1487727521 UTC = Tue Feb 21 17:38:41 2017
		labels = 1 2 3
	    < ... redacted ... >

Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed-by: Don Brady <don.brady@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #5738
This commit is contained in:
Olaf Faaland 2017-03-06 16:01:45 -08:00 committed by Brian Behlendorf
parent 7a789346af
commit 3c9e0d673e
7 changed files with 477 additions and 65 deletions

View File

@ -2127,7 +2127,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
(void) printf("\ttimestamp = %llu UTC = %s",
(u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
if (dump_opt['u'] >= 3) {
if (dump_opt['u'] >= 4) {
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
(void) printf("\trootbp = %s\n", blkbuf);
@ -2205,31 +2205,6 @@ dump_cachefile(const char *cachefile)
nvlist_free(config);
}
#define ZDB_MAX_UB_HEADER_SIZE 32
static void
dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
{
vdev_t vd;
vdev_t *vdp = &vd;
char header[ZDB_MAX_UB_HEADER_SIZE];
int i;
vd.vdev_ashift = ashift;
vdp->vdev_top = vdp;
for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
uberblock_t *ub = (void *)((char *)lbl + uoff);
if (uberblock_verify(ub))
continue;
(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
"Uberblock[%d]\n", i);
dump_uberblock(ub, header, "");
}
}
/*
* ZFS label nvlist stats
*/
@ -2363,18 +2338,182 @@ dump_nvlist_stats(nvlist_t *nvl, size_t cap)
nvlist_free(stats.zns_boolean);
}
typedef struct cksum_record {
zio_cksum_t cksum;
boolean_t labels[VDEV_LABELS];
avl_node_t link;
} cksum_record_t;
static int
cksum_record_compare(const void *x1, const void *x2)
{
const cksum_record_t *l = (cksum_record_t *)x1;
const cksum_record_t *r = (cksum_record_t *)x2;
int arraysize = ARRAY_SIZE(l->cksum.zc_word);
int difference;
for (int i = 0; i < arraysize; i++) {
difference = AVL_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]);
if (difference)
break;
}
return (difference);
}
static cksum_record_t *
cksum_record_alloc(zio_cksum_t *cksum, int l)
{
cksum_record_t *rec;
rec = umem_zalloc(sizeof (*rec), UMEM_NOFAIL);
rec->cksum = *cksum;
rec->labels[l] = B_TRUE;
return (rec);
}
static cksum_record_t *
cksum_record_lookup(avl_tree_t *tree, zio_cksum_t *cksum)
{
cksum_record_t lookup = { .cksum = *cksum };
avl_index_t where;
return (avl_find(tree, &lookup, &where));
}
static cksum_record_t *
cksum_record_insert(avl_tree_t *tree, zio_cksum_t *cksum, int l)
{
cksum_record_t *rec;
rec = cksum_record_lookup(tree, cksum);
if (rec) {
rec->labels[l] = B_TRUE;
} else {
rec = cksum_record_alloc(cksum, l);
avl_add(tree, rec);
}
return (rec);
}
static int
first_label(cksum_record_t *rec)
{
for (int i = 0; i < VDEV_LABELS; i++)
if (rec->labels[i])
return (i);
return (-1);
}
static void
print_label_numbers(char *prefix, cksum_record_t *rec)
{
printf("%s", prefix);
for (int i = 0; i < VDEV_LABELS; i++)
if (rec->labels[i] == B_TRUE)
printf("%d ", i);
printf("\n");
}
#define MAX_UBERBLOCK_COUNT (VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT)
typedef struct label {
vdev_label_t label;
nvlist_t *config_nv;
cksum_record_t *config;
cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT];
boolean_t header_printed;
boolean_t read_failed;
} label_t;
static void
print_label_header(label_t *label, int l)
{
if (dump_opt['q'])
return;
if (label->header_printed == B_TRUE)
return;
(void) printf("------------------------------------\n");
(void) printf("LABEL %d\n", l);
(void) printf("------------------------------------\n");
label->header_printed = B_TRUE;
}
static void
dump_config_from_label(label_t *label, size_t buflen, int l)
{
if (dump_opt['q'])
return;
if ((dump_opt['l'] < 3) && (first_label(label->config) != l))
return;
print_label_header(label, l);
dump_nvlist(label->config_nv, 4);
print_label_numbers(" labels = ", label->config);
if (dump_opt['l'] >= 2)
dump_nvlist_stats(label->config_nv, buflen);
}
#define ZDB_MAX_UB_HEADER_SIZE 32
static void
dump_label_uberblocks(label_t *label, uint64_t ashift, int label_num)
{
vdev_t vd;
char header[ZDB_MAX_UB_HEADER_SIZE];
vd.vdev_ashift = ashift;
vd.vdev_top = &vd;
for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) {
uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i);
uberblock_t *ub = (void *)((char *)&label->label + uoff);
cksum_record_t *rec = label->uberblocks[i];
if (rec == NULL) {
if (dump_opt['u'] >= 2) {
print_label_header(label, label_num);
(void) printf(" Uberblock[%d] invalid\n", i);
}
continue;
}
if ((dump_opt['u'] < 3) && (first_label(rec) != label_num))
continue;
print_label_header(label, label_num);
(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
" Uberblock[%d]\n", i);
dump_uberblock(ub, header, "");
print_label_numbers(" labels = ", rec);
}
}
static int
dump_label(const char *dev)
{
int fd;
vdev_label_t label;
char path[MAXPATHLEN];
char *buf = label.vl_vdev_phys.vp_nvlist;
size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
struct stat64 statbuf;
label_t labels[VDEV_LABELS];
uint64_t psize, ashift;
boolean_t label_found = B_FALSE;
int l;
struct stat64 statbuf;
boolean_t config_found = B_FALSE;
boolean_t error = B_FALSE;
avl_tree_t config_tree;
avl_tree_t uberblock_tree;
void *node, *cookie;
int fd;
bzero(labels, sizeof (labels));
(void) strlcpy(path, dev, sizeof (path));
@ -2390,52 +2529,118 @@ dump_label(const char *dev)
exit(1);
}
avl_create(&config_tree, cksum_record_compare,
sizeof (cksum_record_t), offsetof(cksum_record_t, link));
avl_create(&uberblock_tree, cksum_record_compare,
sizeof (cksum_record_t), offsetof(cksum_record_t, link));
psize = statbuf.st_size;
psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
ashift = SPA_MINBLOCKSHIFT;
for (l = 0; l < VDEV_LABELS; l++) {
nvlist_t *config = NULL;
/*
* 1. Read the label from disk
* 2. Unpack the configuration and insert in config tree.
* 3. Traverse all uberblocks and insert in uberblock tree.
*/
for (int l = 0; l < VDEV_LABELS; l++) {
label_t *label = &labels[l];
char *buf = label->label.vl_vdev_phys.vp_nvlist;
size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist);
nvlist_t *config;
cksum_record_t *rec;
zio_cksum_t cksum;
vdev_t vd;
if (!dump_opt['q']) {
(void) printf("------------------------------------\n");
(void) printf("LABEL %d\n", l);
(void) printf("------------------------------------\n");
}
if (pread64(fd, &label, sizeof (label),
vdev_label_offset(psize, l, 0)) != sizeof (label)) {
if (pread64(fd, &label->label, sizeof (label->label),
vdev_label_offset(psize, l, 0)) != sizeof (label->label)) {
if (!dump_opt['q'])
(void) printf("failed to read label %d\n", l);
label->read_failed = B_TRUE;
error = B_TRUE;
continue;
}
if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
if (!dump_opt['q'])
(void) printf("failed to unpack label %d\n", l);
ashift = SPA_MINBLOCKSHIFT;
} else {
nvlist_t *vdev_tree = NULL;
label->read_failed = B_FALSE;
if (nvlist_unpack(buf, buflen, &config, 0) == 0) {
nvlist_t *vdev_tree = NULL;
size_t size;
if (!dump_opt['q']) {
dump_nvlist(config, 4);
if (l == 3 && dump_opt['l'] >= 2)
dump_nvlist_stats(config, buflen);
}
if ((nvlist_lookup_nvlist(config,
ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
(nvlist_lookup_uint64(vdev_tree,
ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
ashift = SPA_MINBLOCKSHIFT;
nvlist_free(config);
label_found = B_TRUE;
if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0)
size = buflen;
fletcher_4_native_varsize(buf, size, &cksum);
rec = cksum_record_insert(&config_tree, &cksum, l);
label->config = rec;
label->config_nv = config;
config_found = B_TRUE;
} else {
error = B_TRUE;
}
vd.vdev_ashift = ashift;
vd.vdev_top = &vd;
for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) {
uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i);
uberblock_t *ub = (void *)((char *)label + uoff);
if (uberblock_verify(ub))
continue;
fletcher_4_native_varsize(ub, sizeof (*ub), &cksum);
rec = cksum_record_insert(&uberblock_tree, &cksum, l);
label->uberblocks[i] = rec;
}
if (dump_opt['u'])
dump_label_uberblocks(&label, ashift);
}
/*
* Dump the label and uberblocks.
*/
for (int l = 0; l < VDEV_LABELS; l++) {
label_t *label = &labels[l];
size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist);
if (label->read_failed == B_TRUE)
continue;
if (label->config_nv) {
dump_config_from_label(label, buflen, l);
} else {
if (!dump_opt['q'])
(void) printf("failed to unpack label %d\n", l);
}
if (dump_opt['u'])
dump_label_uberblocks(label, ashift, l);
nvlist_free(label->config_nv);
}
cookie = NULL;
while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
umem_free(node, sizeof (cksum_record_t));
cookie = NULL;
while ((node = avl_destroy_nodes(&uberblock_tree, &cookie)) != NULL)
umem_free(node, sizeof (cksum_record_t));
avl_destroy(&config_tree);
avl_destroy(&uberblock_tree);
(void) close(fd);
return (label_found ? 0 : 2);
return (config_found == B_FALSE ? 2 :
(error == B_TRUE ? 1 : 0));
}
static uint64_t dataset_feature_count[SPA_FEATURES];

View File

@ -177,13 +177,16 @@ transaction type.
.ad
.sp .6
.RS 4n
Read the vdev labels from the specified device. \fBzdb -l\fR will return 0 if
valid label was found, 1 if error occured, and 2 if no valid labels were found.
Read the vdev labels from the specified device and dump the unique
configuration nvlist(s). \fBzdb -l\fR will return 1 if an error occured, 2 if
no configuration nvlist could be unpacked (errors or not), and 0 otherwise.
Specify multiple times to increase verbosity.
.P
If the \fB-u\fR option is also specified, also display the uberblocks on this
device.
device. Specify multiple times to increase verbosity.
.P
If the \fB-q\fR option is also specified, don't print the labels.
If the \fB-q\fR option is also specified, don't dump the configurations or the
uberblocks.
.RE
.sp

View File

@ -71,7 +71,8 @@ tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos',
'clean_mirror_003_pos', 'clean_mirror_004_pos']
[tests/functional/cli_root/zdb]
tests = ['zdb_001_neg', 'zdb_002_pos']
tests = ['zdb_001_neg', 'zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos',
'zdb_005_pos']
pre =
post =

View File

@ -1,4 +1,7 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zdb
dist_pkgdata_SCRIPTS = \
zdb_001_neg.ksh \
zdb_002_pos.ksh
zdb_002_pos.ksh \
zdb_003_pos.ksh \
zdb_004_pos.ksh \
zdb_005_pos.ksh

View File

@ -0,0 +1,58 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
#
# Description:
# zdb will not produce redundant dumps of configurations
#
# Strategy:
# 1. Create a pool with two vdevs
# 2. Copy label 1 from the first vdev to the second vdev
# 3. Collect zdb -l output for both vdevs
# 4. Verify that the correct number of configs is dumped for each
#
log_assert "Verify zdb does not produce redundant dumps of configurations"
log_onexit cleanup
function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
}
verify_runnable "global"
verify_disk_count "$DISKS" 2
config_count=(1 2)
set -A DISK $DISKS
default_mirror_setup_noexit $DISKS
log_must $DD if=/dev/${DISK[0]} of=/dev/${DISK[1]} bs=1K count=256 conv=notrunc
for x in 0 1 ; do
config_count=$($ZDB -l $DEV_RDSKDIR/${DISK[$x]} | $GREP -c features_for_read)
(( $? != 0)) && log_fail "failed to get config_count from DISK[$x]"
log_note "vdev $x: message_count $config_count"
[ $config_count -ne ${config_count[$x]} ] && \
log_fail "zdb produces an incorrect number of configuration dumps."
done
cleanup
log_pass "zdb produces unique dumps of configurations."

View File

@ -0,0 +1,78 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
#
# Description:
# zdb will not produce redundant dumps of uberblocks
#
# Strategy:
# 1. Create a pool with two vdevs, A and B
# 2. Offline vdev A
# 3. Do some I/O
# 4. Export the pool
# 5. Copy label 1 from vdev A to vdev B
# 6. Collect zdb -lu output for vdev B
# 7. Verify labels 0 and 1 have unique Uberblocks, but 2 and 3 have none
#
log_assert "Verify zdb produces unique dumps of uberblocks"
log_onexit cleanup
function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
for DISK in $DISKS; do
$ZPOOL labelclear -f $DEV_RDSKDIR/$DISK
done
}
verify_runnable "global"
verify_disk_count "$DISKS" 2
set -A DISK $DISKS
default_mirror_setup_noexit $DISKS
log_must $ZPOOL offline $TESTPOOL ${DISK[0]}
log_must $DD if=/dev/urandom of=$TESTDIR/testfile bs=1K count=2
log_must $ZPOOL export $TESTPOOL
log_must $DD if=$DEV_RDSKDIR/${DISK[0]} of=$DEV_RDSKDIR/${DISK[1]} bs=1K count=256 conv=notrunc
ubs=$($ZDB -lu $DEV_RDSKDIR/${DISK[1]} | $GREP -e LABEL -e Uberblock -e 'labels = ')
log_note "vdev 1: ubs $ubs"
ub_dump_counts=$($ZDB -lu $DEV_RDSKDIR/${DISK[1]} | \
$AWK ' /LABEL/ {label=$NF; blocks[label]=0};
/Uberblock/ {blocks[label]++};
END {print blocks[0],blocks[1],blocks[2],blocks[3]}')
(( $? != 0)) && log_fail "failed to get ub_dump_counts from DISK[1]"
log_note "vdev 1: ub_dump_counts $ub_dump_counts"
set -A dump_count $ub_dump_counts
for label in 0 1 2 3; do
if [[ $label -lt 2 ]]; then
[[ ${dump_count[$label]} -eq 0 ]] && \
log_fail "zdb incorrectly dumps duplicate uberblocks"
else
[[ ${dump_count[$label]} -ne 0 ]] && \
log_fail "zdb incorrectly dumps duplicate uberblocks"
fi
done
cleanup
log_pass "zdb produces unique dumps of uberblocks"

View File

@ -0,0 +1,64 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
#
# Description:
# zdb -l exit codes are correct
#
# Strategy:
# 1. Create a pool
# 2. Overwrite label 0 on vdev[1] with dd
# 3. Create an empty file
# 3. Run zdb -l on vdev[0] and verify exit value 0
# 4. Run zdb -l on vdev[1] and verify exit value 1
# 5. Run zdb -l on the empty file and verify exit value 2
#
log_assert "Verify zdb -l exit codes are correct"
log_onexit cleanup
function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
}
verify_runnable "global"
verify_disk_count "$DISKS" 2
set -A DISK $DISKS
default_mirror_setup_noexit $DISKS
log_must $DD if=/dev/zero of=$DEV_RDSKDIR/${DISK[1]} bs=1K count=256 conv=notrunc
log_must $TRUNCATE -s 0 $TEMPFILE
$ZDB -l $DEV_RDSKDIR/${DISK[0]}
[[ $? -ne 0 ]] &&
log_fail "zdb -l exit codes are incorrect."
$ZDB -l $DEV_RDSKDIR/${DISK[1]}
[[ $? -ne 1 ]] &&
log_fail "zdb -l exit codes are incorrect."
$ZDB -l $TEMPFILE
[[ $? -ne 2 ]] &&
log_fail "zdb -l exit codes are incorrect."
cleanup
log_pass "zdb -l exit codes are correct."