Remove dependency on linear ABD

Wherever possible it's best to avoid depending on a linear ABD.
Update the code accordingly in the following areas.

- vdev_raidz
- zio, zio_checksum
- zfs_fm
- change abd_alloc_for_io() to use abd_alloc()

Reviewed-by: David Quigley <david.quigley@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Closes #5668
This commit is contained in:
Gvozden Neskovic 2017-01-05 14:10:07 -05:00 committed by Brian Behlendorf
parent 2035575fd6
commit 84c07adadb
8 changed files with 147 additions and 118 deletions

View File

@ -3736,6 +3736,7 @@ zdb_read_block(char *thing, spa_t *spa)
void *lbuf, *buf; void *lbuf, *buf;
char *s, *p, *dup, *vdev, *flagstr; char *s, *p, *dup, *vdev, *flagstr;
int i, error; int i, error;
boolean_t borrowed = B_FALSE;
dup = strdup(thing); dup = strdup(thing);
s = strtok(dup, ":"); s = strtok(dup, ":");
@ -3806,7 +3807,7 @@ zdb_read_block(char *thing, spa_t *spa)
psize = size; psize = size;
lsize = size; lsize = size;
pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE); pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
BP_ZERO(bp); BP_ZERO(bp);
@ -3907,8 +3908,9 @@ zdb_read_block(char *thing, spa_t *spa)
buf = lbuf; buf = lbuf;
size = lsize; size = lsize;
} else { } else {
buf = abd_to_buf(pabd);
size = psize; size = psize;
buf = abd_borrow_buf_copy(pabd, size);
borrowed = B_TRUE;
} }
if (flags & ZDB_FLAG_PRINT_BLKPTR) if (flags & ZDB_FLAG_PRINT_BLKPTR)
@ -3924,6 +3926,9 @@ zdb_read_block(char *thing, spa_t *spa)
else else
zdb_dump_block(thing, buf, size, flags); zdb_dump_block(thing, buf, size, flags);
if (borrowed)
abd_return_buf_copy(pabd, buf, size);
out: out:
abd_free(pabd); abd_free(pabd);
umem_free(lbuf, SPA_MAXBLOCKSIZE); umem_free(lbuf, SPA_MAXBLOCKSIZE);

View File

@ -296,7 +296,7 @@ typedef struct zio_prop {
typedef struct zio_cksum_report zio_cksum_report_t; typedef struct zio_cksum_report zio_cksum_report_t;
typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
const void *good_data); const abd_t *good_data);
typedef void zio_cksum_free_f(void *cbdata, size_t size); typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */ struct zio_bad_cksum; /* defined in zio_checksum.h */
@ -587,14 +587,14 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const void *good_data, const void *bad_data, boolean_t drop_if_identical); const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */ /* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length, struct zio *zio, uint64_t offset, uint64_t length,
const void *good_data, const void *bad_data, struct zio_bad_cksum *info); const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */ /* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa); extern void spa_handle_ignored_writes(spa_t *spa);

View File

@ -130,7 +130,7 @@ extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
void *, uint64_t, uint64_t, zio_bad_cksum_t *); void *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern void zio_checksum_compute(zio_t *, enum zio_checksum, extern void zio_checksum_compute(zio_t *, enum zio_checksum,
struct abd *, uint64_t); struct abd *, uint64_t);
extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum, extern int zio_checksum_error_impl(spa_t *, const blkptr_t *, enum zio_checksum,
struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *); struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
extern enum zio_checksum spa_dedup_checksum(spa_t *spa); extern enum zio_checksum spa_dedup_checksum(spa_t *spa);

View File

@ -722,7 +722,7 @@ abd_alloc_sametype(abd_t *sabd, size_t size)
abd_t * abd_t *
abd_alloc_for_io(size_t size, boolean_t is_metadata) abd_alloc_for_io(size_t size, boolean_t is_metadata)
{ {
return (abd_alloc_linear(size, is_metadata)); return (abd_alloc(size, is_metadata));
} }
/* /*

View File

@ -134,21 +134,16 @@ void
vdev_raidz_map_free(raidz_map_t *rm) vdev_raidz_map_free(raidz_map_t *rm)
{ {
int c; int c;
size_t size;
for (c = 0; c < rm->rm_firstdatacol; c++) { for (c = 0; c < rm->rm_firstdatacol; c++) {
abd_free(rm->rm_col[c].rc_abd); abd_free(rm->rm_col[c].rc_abd);
if (rm->rm_col[c].rc_gdata != NULL) if (rm->rm_col[c].rc_gdata != NULL)
zio_buf_free(rm->rm_col[c].rc_gdata, abd_free(rm->rm_col[c].rc_gdata);
rm->rm_col[c].rc_size);
} }
size = 0; for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
abd_put(rm->rm_col[c].rc_abd); abd_put(rm->rm_col[c].rc_abd);
size += rm->rm_col[c].rc_size;
}
if (rm->rm_abd_copy != NULL) if (rm->rm_abd_copy != NULL)
abd_free(rm->rm_abd_copy); abd_free(rm->rm_abd_copy);
@ -181,14 +176,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored)
} }
static void static void
vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{ {
raidz_map_t *rm = zcr->zcr_cbdata; raidz_map_t *rm = zcr->zcr_cbdata;
size_t c = zcr->zcr_cbinfo; const size_t c = zcr->zcr_cbinfo;
size_t x; size_t x, offset;
const char *good = NULL; const abd_t *good = NULL;
char *bad; const abd_t *bad = rm->rm_col[c].rc_abd;
if (good_data == NULL) { if (good_data == NULL) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
@ -203,8 +198,6 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
*/ */
if (rm->rm_col[0].rc_gdata == NULL) { if (rm->rm_col[0].rc_gdata == NULL) {
abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
char *buf;
int offset;
/* /*
* Set up the rm_col[]s to generate the parity for * Set up the rm_col[]s to generate the parity for
@ -213,20 +206,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
*/ */
for (x = 0; x < rm->rm_firstdatacol; x++) { for (x = 0; x < rm->rm_firstdatacol; x++) {
bad_parity[x] = rm->rm_col[x].rc_abd; bad_parity[x] = rm->rm_col[x].rc_abd;
rm->rm_col[x].rc_gdata =
zio_buf_alloc(rm->rm_col[x].rc_size);
rm->rm_col[x].rc_abd = rm->rm_col[x].rc_abd =
abd_get_from_buf(rm->rm_col[x].rc_gdata, rm->rm_col[x].rc_gdata =
abd_alloc_sametype(rm->rm_col[x].rc_abd,
rm->rm_col[x].rc_size); rm->rm_col[x].rc_size);
} }
/* fill in the data columns from good_data */ /* fill in the data columns from good_data */
buf = (char *)good_data; offset = 0;
for (; x < rm->rm_cols; x++) { for (; x < rm->rm_cols; x++) {
abd_put(rm->rm_col[x].rc_abd); abd_put(rm->rm_col[x].rc_abd);
rm->rm_col[x].rc_abd = abd_get_from_buf(buf,
rm->rm_col[x].rc_size); rm->rm_col[x].rc_abd =
buf += rm->rm_col[x].rc_size; abd_get_offset_size((abd_t *)good_data,
offset, rm->rm_col[x].rc_size);
offset += rm->rm_col[x].rc_size;
} }
/* /*
@ -235,10 +229,8 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
vdev_raidz_generate_parity(rm); vdev_raidz_generate_parity(rm);
/* restore everything back to its original state */ /* restore everything back to its original state */
for (x = 0; x < rm->rm_firstdatacol; x++) { for (x = 0; x < rm->rm_firstdatacol; x++)
abd_put(rm->rm_col[x].rc_abd);
rm->rm_col[x].rc_abd = bad_parity[x]; rm->rm_col[x].rc_abd = bad_parity[x];
}
offset = 0; offset = 0;
for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
@ -251,19 +243,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
} }
ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL); ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
good = rm->rm_col[c].rc_gdata; good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0,
rm->rm_col[c].rc_size);
} else { } else {
/* adjust good_data to point at the start of our column */ /* adjust good_data to point at the start of our column */
good = good_data; offset = 0;
for (x = rm->rm_firstdatacol; x < c; x++) for (x = rm->rm_firstdatacol; x < c; x++)
good += rm->rm_col[x].rc_size; offset += rm->rm_col[x].rc_size;
good = abd_get_offset_size((abd_t *)good_data, offset,
rm->rm_col[c].rc_size);
} }
bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size);
/* we drop the ereport if it ends up that the data was good */ /* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size); abd_put((abd_t *)good);
} }
/* /*
@ -306,8 +300,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
size += rm->rm_col[c].rc_size; size += rm->rm_col[c].rc_size;
rm->rm_abd_copy = rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE);
abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size);
for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
raidz_col_t *col = &rm->rm_col[c]; raidz_col_t *col = &rm->rm_col[c];
@ -315,6 +308,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
col->rc_size); col->rc_size);
abd_copy(tmp, col->rc_abd, col->rc_size); abd_copy(tmp, col->rc_abd, col->rc_size);
abd_put(col->rc_abd); abd_put(col->rc_abd);
col->rc_abd = tmp; col->rc_abd = tmp;
@ -1757,9 +1751,8 @@ vdev_raidz_io_start(zio_t *zio)
* Report a checksum error for a child of a RAID-Z device. * Report a checksum error for a child of a RAID-Z device.
*/ */
static void static void
raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
{ {
void *buf;
vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
@ -1773,11 +1766,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
zbc.zbc_has_cksum = 0; zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected; zbc.zbc_injected = rm->rm_ecksuminjected;
buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size);
zfs_ereport_post_checksum(zio->io_spa, vd, zio, zfs_ereport_post_checksum(zio->io_spa, vd, zio,
rc->rc_offset, rc->rc_size, buf, bad_data, rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data,
&zbc); &zbc);
abd_return_buf(rc->rc_abd, buf, rc->rc_size);
} }
} }
@ -1810,7 +1801,7 @@ raidz_checksum_verify(zio_t *zio)
static int static int
raidz_parity_verify(zio_t *zio, raidz_map_t *rm) raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
{ {
void *orig[VDEV_RAIDZ_MAXPARITY]; abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int c, ret = 0; int c, ret = 0;
raidz_col_t *rc; raidz_col_t *rc;
@ -1825,8 +1816,9 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
rc = &rm->rm_col[c]; rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0) if (!rc->rc_tried || rc->rc_error != 0)
continue; continue;
orig[c] = zio_buf_alloc(rc->rc_size);
abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size); orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
abd_copy(orig[c], rc->rc_abd, rc->rc_size);
} }
vdev_raidz_generate_parity(rm); vdev_raidz_generate_parity(rm);
@ -1835,12 +1827,12 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
rc = &rm->rm_col[c]; rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0) if (!rc->rc_tried || rc->rc_error != 0)
continue; continue;
if (bcmp(orig[c], abd_to_buf(rc->rc_abd), rc->rc_size) != 0) { if (abd_cmp(orig[c], rc->rc_abd) != 0) {
raidz_checksum_error(zio, rc, orig[c]); raidz_checksum_error(zio, rc, orig[c]);
rc->rc_error = SET_ERROR(ECKSUM); rc->rc_error = SET_ERROR(ECKSUM);
ret++; ret++;
} }
zio_buf_free(orig[c], rc->rc_size); abd_free(orig[c]);
} }
return (ret); return (ret);
@ -1870,7 +1862,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
{ {
raidz_map_t *rm = zio->io_vsd; raidz_map_t *rm = zio->io_vsd;
raidz_col_t *rc; raidz_col_t *rc;
void *orig[VDEV_RAIDZ_MAXPARITY]; abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int tstore[VDEV_RAIDZ_MAXPARITY + 2]; int tstore[VDEV_RAIDZ_MAXPARITY + 2];
int *tgts = &tstore[1]; int *tgts = &tstore[1];
int curr, next, i, c, n; int curr, next, i, c, n;
@ -1919,7 +1911,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT(orig[i] != NULL); ASSERT(orig[i] != NULL);
} }
orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size); orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd,
rm->rm_col[0].rc_size);
curr = 0; curr = 0;
next = tgts[curr]; next = tgts[curr];
@ -1938,8 +1931,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT3S(c, >=, 0); ASSERT3S(c, >=, 0);
ASSERT3S(c, <, rm->rm_cols); ASSERT3S(c, <, rm->rm_cols);
rc = &rm->rm_col[c]; rc = &rm->rm_col[c];
abd_copy_to_buf(orig[i], rc->rc_abd, abd_copy(orig[i], rc->rc_abd, rc->rc_size);
rc->rc_size);
} }
/* /*
@ -1969,8 +1961,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
c = tgts[i]; c = tgts[i];
rc = &rm->rm_col[c]; rc = &rm->rm_col[c];
abd_copy_from_buf(rc->rc_abd, orig[i], abd_copy(rc->rc_abd, orig[i], rc->rc_size);
rc->rc_size);
} }
do { do {
@ -2007,9 +1998,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
} }
n--; n--;
done: done:
for (i = 0; i < n; i++) { for (i = 0; i < n; i++)
zio_buf_free(orig[i], rm->rm_col[0].rc_size); abd_free(orig[i]);
}
return (ret); return (ret);
} }

View File

@ -596,11 +596,11 @@ zei_range_total_size(zfs_ecksum_info_t *eip)
static zfs_ecksum_info_t * static zfs_ecksum_info_t *
annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
const uint8_t *goodbuf, const uint8_t *badbuf, size_t size, const abd_t *goodabd, const abd_t *badabd, size_t size,
boolean_t drop_if_identical) boolean_t drop_if_identical)
{ {
const uint64_t *good = (const uint64_t *)goodbuf; const uint64_t *good;
const uint64_t *bad = (const uint64_t *)badbuf; const uint64_t *bad;
uint64_t allset = 0; uint64_t allset = 0;
uint64_t allcleared = 0; uint64_t allcleared = 0;
@ -644,13 +644,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
} }
} }
if (badbuf == NULL || goodbuf == NULL) if (badabd == NULL || goodabd == NULL)
return (eip); return (eip);
ASSERT3U(size, ==, nui64s * sizeof (uint64_t)); ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
ASSERT3U(size, <=, UINT32_MAX); ASSERT3U(size, <=, UINT32_MAX);
good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size);
bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size);
/* build up the range list by comparing the two buffers. */ /* build up the range list by comparing the two buffers. */
for (idx = 0; idx < nui64s; idx++) { for (idx = 0; idx < nui64s; idx++) {
if (good[idx] == bad[idx]) { if (good[idx] == bad[idx]) {
@ -680,6 +683,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
*/ */
if (inline_size == 0 && drop_if_identical) { if (inline_size == 0 && drop_if_identical) {
kmem_free(eip, sizeof (*eip)); kmem_free(eip, sizeof (*eip));
abd_return_buf((abd_t *)goodabd, (void *)good, size);
abd_return_buf((abd_t *)badabd, (void *)bad, size);
return (NULL); return (NULL);
} }
@ -720,6 +725,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
eip->zei_ranges[range].zr_start *= sizeof (uint64_t); eip->zei_ranges[range].zr_start *= sizeof (uint64_t);
eip->zei_ranges[range].zr_end *= sizeof (uint64_t); eip->zei_ranges[range].zr_end *= sizeof (uint64_t);
} }
abd_return_buf((abd_t *)goodabd, (void *)good, size);
abd_return_buf((abd_t *)badabd, (void *)bad, size);
eip->zei_allowed_mingap *= sizeof (uint64_t); eip->zei_allowed_mingap *= sizeof (uint64_t);
inline_size *= sizeof (uint64_t); inline_size *= sizeof (uint64_t);
@ -827,8 +836,8 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
} }
void void
zfs_ereport_finish_checksum(zio_cksum_report_t *report, zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data,
const void *good_data, const void *bad_data, boolean_t drop_if_identical) const abd_t *bad_data, boolean_t drop_if_identical)
{ {
#ifdef _KERNEL #ifdef _KERNEL
zfs_ecksum_info_t *info; zfs_ecksum_info_t *info;
@ -870,7 +879,7 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
void void
zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length, struct zio *zio, uint64_t offset, uint64_t length,
const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc) const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
{ {
#ifdef _KERNEL #ifdef _KERNEL
nvlist_t *ereport = NULL; nvlist_t *ereport = NULL;

View File

@ -310,6 +310,12 @@ zio_data_buf_free(void *buf, size_t size)
kmem_cache_free(zio_data_buf_cache[c], buf); kmem_cache_free(zio_data_buf_cache[c], buf);
} }
static void
zio_abd_free(void *abd, size_t size)
{
abd_free((abd_t *)abd);
}
/* /*
* ========================================================================== * ==========================================================================
* Push and pop I/O transform buffers * Push and pop I/O transform buffers
@ -3332,7 +3338,7 @@ zio_vdev_io_done(zio_t *zio)
*/ */
static void static void
zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
const void *good_buf) const abd_t *good_buf)
{ {
/* no processing needed */ /* no processing needed */
zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE); zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE);
@ -3342,14 +3348,14 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
void void
zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
{ {
void *buf = zio_buf_alloc(zio->io_size); void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
abd_copy_to_buf(buf, zio->io_abd, zio->io_size); abd_copy(abd, zio->io_abd, zio->io_size);
zcr->zcr_cbinfo = zio->io_size; zcr->zcr_cbinfo = zio->io_size;
zcr->zcr_cbdata = buf; zcr->zcr_cbdata = abd;
zcr->zcr_finish = zio_vsd_default_cksum_finish; zcr->zcr_finish = zio_vsd_default_cksum_finish;
zcr->zcr_free = zio_buf_free; zcr->zcr_free = zio_abd_free;
} }
static int static int
@ -3706,7 +3712,7 @@ zio_done(zio_t *zio)
* Always attempt to keep stack usage minimal here since * Always attempt to keep stack usage minimal here since
* we can be called recurisvely up to 19 levels deep. * we can be called recurisvely up to 19 levels deep.
*/ */
uint64_t psize = zio->io_size; const uint64_t psize = zio->io_size;
zio_t *pio, *pio_next; zio_t *pio, *pio_next;
int c, w; int c, w;
zio_link_t *zl = NULL; zio_link_t *zl = NULL;
@ -3788,26 +3794,19 @@ zio_done(zio_t *zio)
zio_cksum_report_t *zcr = zio->io_cksum_report; zio_cksum_report_t *zcr = zio->io_cksum_report;
uint64_t align = zcr->zcr_align; uint64_t align = zcr->zcr_align;
uint64_t asize = P2ROUNDUP(psize, align); uint64_t asize = P2ROUNDUP(psize, align);
char *abuf = NULL;
abd_t *adata = zio->io_abd; abd_t *adata = zio->io_abd;
if (asize != psize) { if (asize != psize) {
adata = abd_alloc_linear(asize, B_TRUE); adata = abd_alloc(asize, B_TRUE);
abd_copy(adata, zio->io_abd, psize); abd_copy(adata, zio->io_abd, psize);
abd_zero_off(adata, psize, asize - psize); abd_zero_off(adata, psize, asize - psize);
} }
if (adata != NULL)
abuf = abd_borrow_buf_copy(adata, asize);
zio->io_cksum_report = zcr->zcr_next; zio->io_cksum_report = zcr->zcr_next;
zcr->zcr_next = NULL; zcr->zcr_next = NULL;
zcr->zcr_finish(zcr, abuf); zcr->zcr_finish(zcr, adata);
zfs_ereport_free_checksum(zcr); zfs_ereport_free_checksum(zcr);
if (adata != NULL)
abd_return_buf(adata, abuf, asize);
if (asize != psize) if (asize != psize)
abd_free(adata); abd_free(adata);
} }

View File

@ -263,7 +263,7 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
* a tuple which is guaranteed to be unique for the life of the pool. * a tuple which is guaranteed to be unique for the life of the pool.
*/ */
static void static void
zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
{ {
const dva_t *dva = BP_IDENTITY(bp); const dva_t *dva = BP_IDENTITY(bp);
uint64_t txg = BP_PHYSICAL_BIRTH(bp); uint64_t txg = BP_PHYSICAL_BIRTH(bp);
@ -315,6 +315,7 @@ void
zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
abd_t *abd, uint64_t size) abd_t *abd, uint64_t size)
{ {
static const uint64_t zec_magic = ZEC_MAGIC;
blkptr_t *bp = zio->io_bp; blkptr_t *bp = zio->io_bp;
uint64_t offset = zio->io_offset; uint64_t offset = zio->io_offset;
zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_checksum_info_t *ci = &zio_checksum_table[checksum];
@ -327,28 +328,47 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
zio_checksum_template_init(checksum, spa); zio_checksum_template_init(checksum, spa);
if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
zio_eck_t *eck; zio_eck_t eck;
void *data = abd_to_buf(abd); size_t eck_offset;
if (checksum == ZIO_CHECKSUM_ZILOG2) { if (checksum == ZIO_CHECKSUM_ZILOG2) {
zil_chain_t *zilc = data; zil_chain_t zilc;
abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ,
uint64_t); uint64_t);
eck = &zilc->zc_eck; eck = zilc.zc_eck;
eck_offset = offsetof(zil_chain_t, zc_eck);
} else { } else {
eck = (zio_eck_t *)((char *)data + size) - 1; eck_offset = size - sizeof (zio_eck_t);
abd_copy_to_buf_off(&eck, abd, eck_offset,
sizeof (zio_eck_t));
} }
if (checksum == ZIO_CHECKSUM_GANG_HEADER)
zio_checksum_gang_verifier(&eck->zec_cksum, bp); if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
else if (checksum == ZIO_CHECKSUM_LABEL) zio_checksum_gang_verifier(&eck.zec_cksum, bp);
zio_checksum_label_verifier(&eck->zec_cksum, offset); abd_copy_from_buf_off(abd, &eck.zec_cksum,
else eck_offset + offsetof(zio_eck_t, zec_cksum),
bp->blk_cksum = eck->zec_cksum; sizeof (zio_cksum_t));
eck->zec_magic = ZEC_MAGIC; } else if (checksum == ZIO_CHECKSUM_LABEL) {
zio_checksum_label_verifier(&eck.zec_cksum, offset);
abd_copy_from_buf_off(abd, &eck.zec_cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t));
} else {
bp->blk_cksum = eck.zec_cksum;
}
abd_copy_from_buf_off(abd, &zec_magic,
eck_offset + offsetof(zio_eck_t, zec_magic),
sizeof (zec_magic));
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
&cksum); &cksum);
eck->zec_cksum = cksum;
abd_copy_from_buf_off(abd, &cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t));
} else { } else {
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
&bp->blk_cksum); &bp->blk_cksum);
@ -356,12 +376,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
} }
int int
zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
abd_t *abd, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) enum zio_checksum checksum, abd_t *abd, uint64_t size, uint64_t offset,
zio_bad_cksum_t *info)
{ {
zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_checksum_info_t *ci = &zio_checksum_table[checksum];
int byteswap;
zio_cksum_t actual_cksum, expected_cksum; zio_cksum_t actual_cksum, expected_cksum;
zio_eck_t eck;
int byteswap;
if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
@ -369,34 +391,37 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
zio_checksum_template_init(checksum, spa); zio_checksum_template_init(checksum, spa);
if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
zio_eck_t *eck;
zio_cksum_t verifier; zio_cksum_t verifier;
size_t eck_offset; size_t eck_offset;
uint64_t data_size = size;
void *data = abd_borrow_buf_copy(abd, data_size);
if (checksum == ZIO_CHECKSUM_ZILOG2) { if (checksum == ZIO_CHECKSUM_ZILOG2) {
zil_chain_t *zilc = data; zil_chain_t zilc;
uint64_t nused; uint64_t nused;
eck = &zilc->zc_eck; abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
if (eck->zec_magic == ZEC_MAGIC) {
nused = zilc->zc_nused; eck = zilc.zc_eck;
} else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) { eck_offset = offsetof(zil_chain_t, zc_eck) +
nused = BSWAP_64(zilc->zc_nused); offsetof(zio_eck_t, zec_cksum);
if (eck.zec_magic == ZEC_MAGIC) {
nused = zilc.zc_nused;
} else if (eck.zec_magic == BSWAP_64(ZEC_MAGIC)) {
nused = BSWAP_64(zilc.zc_nused);
} else { } else {
abd_return_buf(abd, data, data_size);
return (SET_ERROR(ECKSUM)); return (SET_ERROR(ECKSUM));
} }
if (nused > data_size) { if (nused > size) {
abd_return_buf(abd, data, data_size);
return (SET_ERROR(ECKSUM)); return (SET_ERROR(ECKSUM));
} }
size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
} else { } else {
eck = (zio_eck_t *)((char *)data + data_size) - 1; eck_offset = size - sizeof (zio_eck_t);
abd_copy_to_buf_off(&eck, abd, eck_offset,
sizeof (zio_eck_t));
eck_offset += offsetof(zio_eck_t, zec_cksum);
} }
if (checksum == ZIO_CHECKSUM_GANG_HEADER) if (checksum == ZIO_CHECKSUM_GANG_HEADER)
@ -406,20 +431,21 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
else else
verifier = bp->blk_cksum; verifier = bp->blk_cksum;
byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC));
if (byteswap) if (byteswap)
byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
eck_offset = (size_t)(&eck->zec_cksum) - (size_t)data; expected_cksum = eck.zec_cksum;
expected_cksum = eck->zec_cksum;
eck->zec_cksum = verifier; abd_copy_from_buf_off(abd, &verifier, eck_offset,
abd_return_buf_copy(abd, data, data_size); sizeof (zio_cksum_t));
ci->ci_func[byteswap](abd, size, ci->ci_func[byteswap](abd, size,
spa->spa_cksum_tmpls[checksum], &actual_cksum); spa->spa_cksum_tmpls[checksum], &actual_cksum);
abd_copy_from_buf_off(abd, &expected_cksum,
eck_offset, sizeof (zio_cksum_t)); abd_copy_from_buf_off(abd, &expected_cksum, eck_offset,
sizeof (zio_cksum_t));
if (byteswap) { if (byteswap) {
byteswap_uint64_array(&expected_cksum, byteswap_uint64_array(&expected_cksum,