Tiered early abort, zstd edition

It turns out that "do LZ4 and zstd-1 both fail" is a great heuristic
for "don't even bother trying higher zstd tiers".

By way of illustration:
$ cat /incompress | mbuffer | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_normal
summary: 39.8 GiByte in  3min 40.2sec - average of  185 MiB/s
$ echo 3 | sudo tee /sys/module/zzstd/parameters/zstd_lz4_pass
3
$ cat /incompress | mbuffer -m 4G | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_patched
summary: 39.8 GiByte in 48.6sec - average of  839 MiB/s
$ sudo zfs list -p -o name,used,lused,ratio evenfaster/lowcomp_1M_zstd12_normal evenfaster/lowcomp_1M_zstd12_patched
NAME                                         USED        LUSED  RATIO
evenfaster/lowcomp_1M_zstd12_normal   39549931520  42721221632   1.08
evenfaster/lowcomp_1M_zstd12_patched  39626399744  42721217536   1.07
$ python3 -c "print(39626399744 - 39549931520)"
76468224
$

I'll take 76 MB out of 42 GB for > 4x speedup.

Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Kjeld Schouten <kjeld@schouten-lebbing.nl>
Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
Closes #13244
This commit is contained in:
Rich Ercolani 2022-05-24 12:43:22 -04:00 committed by GitHub
parent 2e05765006
commit f375b23c02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 134 additions and 6 deletions

View File

@ -78,6 +78,8 @@ typedef struct zfs_zstd_meta {
* kstat helper macros * kstat helper macros
*/ */
#define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64) #define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64)
#define ZSTDSTAT_ZERO(stat) \
(atomic_store_64(&zstd_stats.stat.value.ui64, 0))
#define ZSTDSTAT_ADD(stat, val) \ #define ZSTDSTAT_ADD(stat, val) \
atomic_add_64(&zstd_stats.stat.value.ui64, (val)) atomic_add_64(&zstd_stats.stat.value.ui64, (val))
#define ZSTDSTAT_SUB(stat, val) \ #define ZSTDSTAT_SUB(stat, val) \
@ -90,6 +92,8 @@ void zstd_fini(void);
size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
size_t d_len, int level); size_t d_len, int level);
size_t zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len,
size_t d_len, int level);
int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level); int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
size_t d_len, uint8_t *level); size_t d_len, uint8_t *level);

View File

@ -2129,6 +2129,14 @@ However, if there are fewer than
metaslabs in the vdev, this functionality is disabled. metaslabs in the vdev, this functionality is disabled.
This ensures that we don't set aside an unreasonable amount of space for the ZIL. This ensures that we don't set aside an unreasonable amount of space for the ZIL.
. .
.It Sy zfs_zstd_earlyabort_pass Ns = Ns Sy 1 Pq int
Whether heuristic for detection of incompressible data with zstd levels >= 3
using LZ4 and zstd-1 passes is enabled.
.
.It Sy zfs_zstd_abort_size Ns = Ns Sy 131072 Pq int
Minimal uncompressed size (inclusive) of a record before the early abort
heuristic will be attempted.
.
.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int .It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int
If non-zero, the zio deadman will produce debugging messages If non-zero, the zio deadman will produce debugging messages
.Pq see Sy zfs_dbgmsg_enable .Pq see Sy zfs_dbgmsg_enable

View File

@ -66,7 +66,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
{"gzip-9", 9, gzip_compress, gzip_decompress, NULL}, {"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
{"zle", 64, zle_compress, zle_decompress, NULL}, {"zle", 64, zle_compress, zle_decompress, NULL},
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL}, {"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress, {"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress_wrap,
zfs_zstd_decompress, zfs_zstd_decompress_level}, zfs_zstd_decompress, zfs_zstd_decompress_level},
}; };

View File

@ -50,6 +50,10 @@
#include "lib/zstd.h" #include "lib/zstd.h"
#include "lib/common/zstd_errors.h" #include "lib/common/zstd_errors.h"
static int zstd_earlyabort_pass = 1;
static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
static unsigned int zstd_abort_size = (128 * 1024);
static kstat_t *zstd_ksp = NULL; static kstat_t *zstd_ksp = NULL;
typedef struct zstd_stats { typedef struct zstd_stats {
@ -62,6 +66,21 @@ typedef struct zstd_stats {
kstat_named_t zstd_stat_dec_header_inval; kstat_named_t zstd_stat_dec_header_inval;
kstat_named_t zstd_stat_com_fail; kstat_named_t zstd_stat_com_fail;
kstat_named_t zstd_stat_dec_fail; kstat_named_t zstd_stat_dec_fail;
/*
* LZ4 first-pass early abort verdict
*/
kstat_named_t zstd_stat_lz4pass_allowed;
kstat_named_t zstd_stat_lz4pass_rejected;
/*
* zstd-1 second-pass early abort verdict
*/
kstat_named_t zstd_stat_zstdpass_allowed;
kstat_named_t zstd_stat_zstdpass_rejected;
/*
* We excluded this from early abort for some reason
*/
kstat_named_t zstd_stat_passignored;
kstat_named_t zstd_stat_passignored_size;
kstat_named_t zstd_stat_buffers; kstat_named_t zstd_stat_buffers;
kstat_named_t zstd_stat_size; kstat_named_t zstd_stat_size;
} zstd_stats_t; } zstd_stats_t;
@ -76,10 +95,44 @@ static zstd_stats_t zstd_stats = {
{ "decompress_header_invalid", KSTAT_DATA_UINT64 }, { "decompress_header_invalid", KSTAT_DATA_UINT64 },
{ "compress_failed", KSTAT_DATA_UINT64 }, { "compress_failed", KSTAT_DATA_UINT64 },
{ "decompress_failed", KSTAT_DATA_UINT64 }, { "decompress_failed", KSTAT_DATA_UINT64 },
{ "lz4pass_allowed", KSTAT_DATA_UINT64 },
{ "lz4pass_rejected", KSTAT_DATA_UINT64 },
{ "zstdpass_allowed", KSTAT_DATA_UINT64 },
{ "zstdpass_rejected", KSTAT_DATA_UINT64 },
{ "passignored", KSTAT_DATA_UINT64 },
{ "passignored_size", KSTAT_DATA_UINT64 },
{ "buffers", KSTAT_DATA_UINT64 }, { "buffers", KSTAT_DATA_UINT64 },
{ "size", KSTAT_DATA_UINT64 }, { "size", KSTAT_DATA_UINT64 },
}; };
#ifdef _KERNEL
static int
kstat_zstd_update(kstat_t *ksp, int rw)
{
ASSERT(ksp != NULL);
if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_com_inval);
ZSTDSTAT_ZERO(zstd_stat_dec_inval);
ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
ZSTDSTAT_ZERO(zstd_stat_com_fail);
ZSTDSTAT_ZERO(zstd_stat_dec_fail);
ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
ZSTDSTAT_ZERO(zstd_stat_passignored);
ZSTDSTAT_ZERO(zstd_stat_passignored_size);
}
return (0);
}
#endif
/* Enums describing the allocator type specified by kmem_type in zstd_kmem */ /* Enums describing the allocator type specified by kmem_type in zstd_kmem */
enum zstd_kmem_type { enum zstd_kmem_type {
ZSTD_KMEM_UNKNOWN = 0, ZSTD_KMEM_UNKNOWN = 0,
@ -377,6 +430,64 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
} }
size_t
zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
int level)
{
int16_t zstd_level;
if (zstd_enum_to_level(level, &zstd_level)) {
ZSTDSTAT_BUMP(zstd_stat_com_inval);
return (s_len);
}
/*
* A zstd early abort heuristic.
*
* - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
* 128k), don't try any of this, just go.
* (because experimentally that was a reasonable cutoff for a perf win
* with tiny ratio change)
* - First, we try LZ4 compression, and if it doesn't early abort, we
* jump directly to whatever compression level we intended to try.
* - Second, we try zstd-1 - if that errors out (usually, but not
* exclusively, if it would overflow), we give up early.
*
* If it works, instead we go on and compress anyway.
*
* Why two passes? LZ4 alone gets you a lot of the way, but on highly
* compressible data, it was losing up to 8.5% of the compressed
* savings versus no early abort, and all the zstd-fast levels are
* worse indications on their own than LZ4, and don't improve the LZ4
* pass noticably if stacked like this.
*/
size_t actual_abort_size = zstd_abort_size;
if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
s_len >= actual_abort_size) {
int pass_len = 1;
pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0);
if (pass_len < d_len) {
ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
goto keep_trying;
}
ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);
pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
ZIO_ZSTD_LEVEL_1);
if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
return (s_len);
}
ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
} else {
ZSTDSTAT_BUMP(zstd_stat_passignored);
if (s_len < actual_abort_size) {
ZSTDSTAT_BUMP(zstd_stat_passignored_size);
}
}
keep_trying:
return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));
}
/* Compress block using zstd */ /* Compress block using zstd */
size_t size_t
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
@ -437,8 +548,10 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
* too small, that is not a failure. Everything else is a * too small, that is not a failure. Everything else is a
* failure, so increment the compression failure counter. * failure, so increment the compression failure counter.
*/ */
if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) { int err = ZSTD_getErrorCode(c_len);
if (err != ZSTD_error_dstSize_tooSmall) {
ZSTDSTAT_BUMP(zstd_stat_com_fail); ZSTDSTAT_BUMP(zstd_stat_com_fail);
dprintf("Error: %s", ZSTD_getErrorString(err));
} }
return (s_len); return (s_len);
} }
@ -753,6 +866,9 @@ zstd_init(void)
if (zstd_ksp != NULL) { if (zstd_ksp != NULL) {
zstd_ksp->ks_data = &zstd_stats; zstd_ksp->ks_data = &zstd_stats;
kstat_install(zstd_ksp); kstat_install(zstd_ksp);
#ifdef _KERNEL
zstd_ksp->ks_update = kstat_zstd_update;
#endif
} }
return (0); return (0);
@ -781,8 +897,8 @@ module_init(zstd_init);
module_exit(zstd_fini); module_exit(zstd_fini);
#endif #endif
EXPORT_SYMBOL(zfs_zstd_compress); ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, INT, ZMOD_RW,
EXPORT_SYMBOL(zfs_zstd_decompress_level); "Enable early abort attempts when using zstd");
EXPORT_SYMBOL(zfs_zstd_decompress); ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
EXPORT_SYMBOL(zfs_zstd_cache_reap_now); "Minimal size of block to attempt early abort");
#endif #endif