mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 10:54:35 +03:00
RAIDZ: Use cache blocking during parity math
RAIDZ parity is calculated by adding data one column at a time. It works OK for small blocks, but for large blocks results of previous addition may already be evicted from CPU caches to main memory, and in addition to extra memory write require extra read to get it back. This patch splits large parity operations into 64KB chunks, that should in most cases fit into CPU L2 caches from the last decade. I haven't touched more complicated cases of data reconstruction to not over complicate the code. Those should be relatively rare. My tests on Xeon Gold 6242R CPU with 1MB of L2 cache per core show up to 10/20% memory traffic reduction when writing to 4-wide RAIDZ/ RAIDZ2 blocks of ~4MB and up. Older CPUs with 256KB of L2 cache should see the effect even on smaller blocks. Wider vdevs may need bigger blocks to be affected. Reviewed-by: Brian Atkinson <batkinson@lanl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15448
This commit is contained in:
+10
-14
@@ -1017,12 +1017,12 @@ abd_cmp(abd_t *dabd, abd_t *sabd)
|
||||
* is the same when taking linear and when taking scatter
|
||||
*/
|
||||
void
|
||||
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
ssize_t csize, ssize_t dsize, const unsigned parity,
|
||||
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
|
||||
size_t csize, size_t dsize, const unsigned parity,
|
||||
void (*func_raidz_gen)(void **, const void *, size_t, size_t))
|
||||
{
|
||||
int i;
|
||||
ssize_t len, dlen;
|
||||
size_t len, dlen;
|
||||
struct abd_iter caiters[3];
|
||||
struct abd_iter daiter;
|
||||
void *caddrs[3];
|
||||
@@ -1033,16 +1033,15 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
ASSERT3U(parity, <=, 3);
|
||||
for (i = 0; i < parity; i++) {
|
||||
abd_verify(cabds[i]);
|
||||
ASSERT3U(csize, <=, cabds[i]->abd_size);
|
||||
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
|
||||
ASSERT3U(off + csize, <=, cabds[i]->abd_size);
|
||||
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off);
|
||||
}
|
||||
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
if (dsize > 0) {
|
||||
ASSERT(dabd);
|
||||
abd_verify(dabd);
|
||||
ASSERT3U(dsize, <=, dabd->abd_size);
|
||||
c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
|
||||
ASSERT3U(off + dsize, <=, dabd->abd_size);
|
||||
c_dabd = abd_init_abd_iter(dabd, &daiter, off);
|
||||
}
|
||||
|
||||
abd_enter_critical(flags);
|
||||
@@ -1064,7 +1063,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
dlen = 0;
|
||||
|
||||
/* must be progressive */
|
||||
ASSERT3S(len, >, 0);
|
||||
ASSERT3U(len, >, 0);
|
||||
/*
|
||||
* The iterated function likely will not do well if each
|
||||
* segment except the last one is not multiple of 512 (raidz).
|
||||
@@ -1089,9 +1088,6 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
}
|
||||
|
||||
csize -= len;
|
||||
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
ASSERT3S(csize, >=, 0);
|
||||
}
|
||||
abd_exit_critical(flags);
|
||||
}
|
||||
@@ -1108,13 +1104,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
*/
|
||||
void
|
||||
abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
||||
ssize_t tsize, const unsigned parity,
|
||||
size_t tsize, const unsigned parity,
|
||||
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
|
||||
const unsigned *mul),
|
||||
const unsigned *mul)
|
||||
{
|
||||
int i;
|
||||
ssize_t len;
|
||||
size_t len;
|
||||
struct abd_iter citers[3];
|
||||
struct abd_iter xiters[3];
|
||||
void *caddrs[3], *xaddrs[3];
|
||||
|
||||
Reference in New Issue
Block a user