mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-25 18:59:33 +03:00
Tiered early abort, zstd edition
It turns out that "do LZ4 and zstd-1 both fail" is a great heuristic for "don't even bother trying higher zstd tiers". By way of illustration: $ cat /incompress | mbuffer | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_normal summary: 39.8 GiByte in 3min 40.2sec - average of 185 MiB/s $ echo 3 | sudo tee /sys/module/zzstd/parameters/zstd_lz4_pass 3 $ cat /incompress | mbuffer -m 4G | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_patched summary: 39.8 GiByte in 48.6sec - average of 839 MiB/s $ sudo zfs list -p -o name,used,lused,ratio evenfaster/lowcomp_1M_zstd12_normal evenfaster/lowcomp_1M_zstd12_patched NAME USED LUSED RATIO evenfaster/lowcomp_1M_zstd12_normal 39549931520 42721221632 1.08 evenfaster/lowcomp_1M_zstd12_patched 39626399744 42721217536 1.07 $ python3 -c "print(39626399744 - 39549931520)" 76468224 $ I'll take 76 MB out of 42 GB for > 4x speedup. Reviewed-by: Allan Jude <allan@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Melikov <mail@gmelikov.ru> Reviewed-by: Kjeld Schouten <kjeld@schouten-lebbing.nl> Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz> Signed-off-by: Rich Ercolani <rincebrain@gmail.com> Closes #13244
This commit is contained in:
parent
2e05765006
commit
f375b23c02
@ -78,6 +78,8 @@ typedef struct zfs_zstd_meta {
|
|||||||
* kstat helper macros
|
* kstat helper macros
|
||||||
*/
|
*/
|
||||||
#define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64)
|
#define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64)
|
||||||
|
#define ZSTDSTAT_ZERO(stat) \
|
||||||
|
(atomic_store_64(&zstd_stats.stat.value.ui64, 0))
|
||||||
#define ZSTDSTAT_ADD(stat, val) \
|
#define ZSTDSTAT_ADD(stat, val) \
|
||||||
atomic_add_64(&zstd_stats.stat.value.ui64, (val))
|
atomic_add_64(&zstd_stats.stat.value.ui64, (val))
|
||||||
#define ZSTDSTAT_SUB(stat, val) \
|
#define ZSTDSTAT_SUB(stat, val) \
|
||||||
@ -90,6 +92,8 @@ void zstd_fini(void);
|
|||||||
|
|
||||||
size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
|
size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
|
||||||
size_t d_len, int level);
|
size_t d_len, int level);
|
||||||
|
size_t zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len,
|
||||||
|
size_t d_len, int level);
|
||||||
int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
|
int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
|
||||||
int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
|
int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
|
||||||
size_t d_len, uint8_t *level);
|
size_t d_len, uint8_t *level);
|
||||||
|
@ -2129,6 +2129,14 @@ However, if there are fewer than
|
|||||||
metaslabs in the vdev, this functionality is disabled.
|
metaslabs in the vdev, this functionality is disabled.
|
||||||
This ensures that we don't set aside an unreasonable amount of space for the ZIL.
|
This ensures that we don't set aside an unreasonable amount of space for the ZIL.
|
||||||
.
|
.
|
||||||
|
.It Sy zfs_zstd_earlyabort_pass Ns = Ns Sy 1 Pq int
|
||||||
|
Whether heuristic for detection of incompressible data with zstd levels >= 3
|
||||||
|
using LZ4 and zstd-1 passes is enabled.
|
||||||
|
.
|
||||||
|
.It Sy zfs_zstd_abort_size Ns = Ns Sy 131072 Pq int
|
||||||
|
Minimal uncompressed size (inclusive) of a record before the early abort
|
||||||
|
heuristic will be attempted.
|
||||||
|
.
|
||||||
.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||||
If non-zero, the zio deadman will produce debugging messages
|
If non-zero, the zio deadman will produce debugging messages
|
||||||
.Pq see Sy zfs_dbgmsg_enable
|
.Pq see Sy zfs_dbgmsg_enable
|
||||||
|
@ -66,7 +66,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
|
|||||||
{"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
|
{"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
|
||||||
{"zle", 64, zle_compress, zle_decompress, NULL},
|
{"zle", 64, zle_compress, zle_decompress, NULL},
|
||||||
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
|
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
|
||||||
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress,
|
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress_wrap,
|
||||||
zfs_zstd_decompress, zfs_zstd_decompress_level},
|
zfs_zstd_decompress, zfs_zstd_decompress_level},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -50,6 +50,10 @@
|
|||||||
#include "lib/zstd.h"
|
#include "lib/zstd.h"
|
||||||
#include "lib/common/zstd_errors.h"
|
#include "lib/common/zstd_errors.h"
|
||||||
|
|
||||||
|
static int zstd_earlyabort_pass = 1;
|
||||||
|
static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
|
||||||
|
static unsigned int zstd_abort_size = (128 * 1024);
|
||||||
|
|
||||||
static kstat_t *zstd_ksp = NULL;
|
static kstat_t *zstd_ksp = NULL;
|
||||||
|
|
||||||
typedef struct zstd_stats {
|
typedef struct zstd_stats {
|
||||||
@ -62,6 +66,21 @@ typedef struct zstd_stats {
|
|||||||
kstat_named_t zstd_stat_dec_header_inval;
|
kstat_named_t zstd_stat_dec_header_inval;
|
||||||
kstat_named_t zstd_stat_com_fail;
|
kstat_named_t zstd_stat_com_fail;
|
||||||
kstat_named_t zstd_stat_dec_fail;
|
kstat_named_t zstd_stat_dec_fail;
|
||||||
|
/*
|
||||||
|
* LZ4 first-pass early abort verdict
|
||||||
|
*/
|
||||||
|
kstat_named_t zstd_stat_lz4pass_allowed;
|
||||||
|
kstat_named_t zstd_stat_lz4pass_rejected;
|
||||||
|
/*
|
||||||
|
* zstd-1 second-pass early abort verdict
|
||||||
|
*/
|
||||||
|
kstat_named_t zstd_stat_zstdpass_allowed;
|
||||||
|
kstat_named_t zstd_stat_zstdpass_rejected;
|
||||||
|
/*
|
||||||
|
* We excluded this from early abort for some reason
|
||||||
|
*/
|
||||||
|
kstat_named_t zstd_stat_passignored;
|
||||||
|
kstat_named_t zstd_stat_passignored_size;
|
||||||
kstat_named_t zstd_stat_buffers;
|
kstat_named_t zstd_stat_buffers;
|
||||||
kstat_named_t zstd_stat_size;
|
kstat_named_t zstd_stat_size;
|
||||||
} zstd_stats_t;
|
} zstd_stats_t;
|
||||||
@ -76,10 +95,44 @@ static zstd_stats_t zstd_stats = {
|
|||||||
{ "decompress_header_invalid", KSTAT_DATA_UINT64 },
|
{ "decompress_header_invalid", KSTAT_DATA_UINT64 },
|
||||||
{ "compress_failed", KSTAT_DATA_UINT64 },
|
{ "compress_failed", KSTAT_DATA_UINT64 },
|
||||||
{ "decompress_failed", KSTAT_DATA_UINT64 },
|
{ "decompress_failed", KSTAT_DATA_UINT64 },
|
||||||
|
{ "lz4pass_allowed", KSTAT_DATA_UINT64 },
|
||||||
|
{ "lz4pass_rejected", KSTAT_DATA_UINT64 },
|
||||||
|
{ "zstdpass_allowed", KSTAT_DATA_UINT64 },
|
||||||
|
{ "zstdpass_rejected", KSTAT_DATA_UINT64 },
|
||||||
|
{ "passignored", KSTAT_DATA_UINT64 },
|
||||||
|
{ "passignored_size", KSTAT_DATA_UINT64 },
|
||||||
{ "buffers", KSTAT_DATA_UINT64 },
|
{ "buffers", KSTAT_DATA_UINT64 },
|
||||||
{ "size", KSTAT_DATA_UINT64 },
|
{ "size", KSTAT_DATA_UINT64 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
static int
|
||||||
|
kstat_zstd_update(kstat_t *ksp, int rw)
|
||||||
|
{
|
||||||
|
ASSERT(ksp != NULL);
|
||||||
|
|
||||||
|
if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_com_inval);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_dec_inval);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_com_fail);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_dec_fail);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_passignored);
|
||||||
|
ZSTDSTAT_ZERO(zstd_stat_passignored_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
|
/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
|
||||||
enum zstd_kmem_type {
|
enum zstd_kmem_type {
|
||||||
ZSTD_KMEM_UNKNOWN = 0,
|
ZSTD_KMEM_UNKNOWN = 0,
|
||||||
@ -377,6 +430,64 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t
|
||||||
|
zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
|
||||||
|
int level)
|
||||||
|
{
|
||||||
|
int16_t zstd_level;
|
||||||
|
if (zstd_enum_to_level(level, &zstd_level)) {
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_com_inval);
|
||||||
|
return (s_len);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* A zstd early abort heuristic.
|
||||||
|
*
|
||||||
|
* - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
|
||||||
|
* 128k), don't try any of this, just go.
|
||||||
|
* (because experimentally that was a reasonable cutoff for a perf win
|
||||||
|
* with tiny ratio change)
|
||||||
|
* - First, we try LZ4 compression, and if it doesn't early abort, we
|
||||||
|
* jump directly to whatever compression level we intended to try.
|
||||||
|
* - Second, we try zstd-1 - if that errors out (usually, but not
|
||||||
|
* exclusively, if it would overflow), we give up early.
|
||||||
|
*
|
||||||
|
* If it works, instead we go on and compress anyway.
|
||||||
|
*
|
||||||
|
* Why two passes? LZ4 alone gets you a lot of the way, but on highly
|
||||||
|
* compressible data, it was losing up to 8.5% of the compressed
|
||||||
|
* savings versus no early abort, and all the zstd-fast levels are
|
||||||
|
* worse indications on their own than LZ4, and don't improve the LZ4
|
||||||
|
* pass noticably if stacked like this.
|
||||||
|
*/
|
||||||
|
size_t actual_abort_size = zstd_abort_size;
|
||||||
|
if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
|
||||||
|
s_len >= actual_abort_size) {
|
||||||
|
int pass_len = 1;
|
||||||
|
pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0);
|
||||||
|
if (pass_len < d_len) {
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
|
||||||
|
goto keep_trying;
|
||||||
|
}
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);
|
||||||
|
|
||||||
|
pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
|
||||||
|
ZIO_ZSTD_LEVEL_1);
|
||||||
|
if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
|
||||||
|
return (s_len);
|
||||||
|
}
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
|
||||||
|
} else {
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_passignored);
|
||||||
|
if (s_len < actual_abort_size) {
|
||||||
|
ZSTDSTAT_BUMP(zstd_stat_passignored_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
keep_trying:
|
||||||
|
return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* Compress block using zstd */
|
/* Compress block using zstd */
|
||||||
size_t
|
size_t
|
||||||
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
|
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
|
||||||
@ -437,8 +548,10 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
|
|||||||
* too small, that is not a failure. Everything else is a
|
* too small, that is not a failure. Everything else is a
|
||||||
* failure, so increment the compression failure counter.
|
* failure, so increment the compression failure counter.
|
||||||
*/
|
*/
|
||||||
if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
|
int err = ZSTD_getErrorCode(c_len);
|
||||||
|
if (err != ZSTD_error_dstSize_tooSmall) {
|
||||||
ZSTDSTAT_BUMP(zstd_stat_com_fail);
|
ZSTDSTAT_BUMP(zstd_stat_com_fail);
|
||||||
|
dprintf("Error: %s", ZSTD_getErrorString(err));
|
||||||
}
|
}
|
||||||
return (s_len);
|
return (s_len);
|
||||||
}
|
}
|
||||||
@ -753,6 +866,9 @@ zstd_init(void)
|
|||||||
if (zstd_ksp != NULL) {
|
if (zstd_ksp != NULL) {
|
||||||
zstd_ksp->ks_data = &zstd_stats;
|
zstd_ksp->ks_data = &zstd_stats;
|
||||||
kstat_install(zstd_ksp);
|
kstat_install(zstd_ksp);
|
||||||
|
#ifdef _KERNEL
|
||||||
|
zstd_ksp->ks_update = kstat_zstd_update;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
@ -781,8 +897,8 @@ module_init(zstd_init);
|
|||||||
module_exit(zstd_fini);
|
module_exit(zstd_fini);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
EXPORT_SYMBOL(zfs_zstd_compress);
|
ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, INT, ZMOD_RW,
|
||||||
EXPORT_SYMBOL(zfs_zstd_decompress_level);
|
"Enable early abort attempts when using zstd");
|
||||||
EXPORT_SYMBOL(zfs_zstd_decompress);
|
ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
|
||||||
EXPORT_SYMBOL(zfs_zstd_cache_reap_now);
|
"Minimal size of block to attempt early abort");
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user