mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-15 20:50:30 +03:00
db2b1fdb79
This is the supporting infrastructure for the upcoming dedup features. Traditionally, dedup objects live directly in the MOS root. While their details vary (checksum, type and class), they are all the same "kind" of thing - a store of dedup entries. The new features are more varied than that, and are better thought of as a set of related stores for the overall state of a dedup table. This adds a new feature flag, SPA_FEATURE_FAST_DEDUP. Enabling this will cause new DDTs to be created as a ZAP in the MOS root, named DDT-<checksum>. The is used as the root object for the normal type/class store objects, but will also be a place for any storage required by new features. This commit adds two new fields to ddt_t, for version and flags. These are intended to describe the structure and features of the overall dedup table, and are stored as-is in the DDT root. In this commit, flags are always zero, but the intent is that they can be used to hang optional logic or state onto for new dedup features. Version is always 1. For a "legacy" dedup table, where no DDT root directory exists, the version will be 0. ddt_configure() is expected to determine the version and flags features currently in operation based on whether or not the fast_dedup feature is enabled, and from what's available on disk. In this way, its possible to support both old and new tables. This also provides a migration path. A legacy setup can be upgraded to FDT by creating the DDT root ZAP, moving the existing objects into it, and setting version and flags appropriately. There's no support for that here, but it would be straightforward to add later and allows the possibility that newer features could be applied to existing dedup tables. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15892
208 lines
5.6 KiB
C
208 lines
5.6 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
/*
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
/*
|
|
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
|
|
* Copyright (c) 2019, Klara Inc.
|
|
* Copyright (c) 2019, Allan Jude
|
|
*/
|
|
|
|
#include <sys/zfs_context.h>
|
|
#include <sys/spa.h>
|
|
#include <sys/zfeature.h>
|
|
#include <sys/zio.h>
|
|
#include <sys/zio_compress.h>
|
|
#include <sys/zstd/zstd.h>
|
|
|
|
/*
|
|
* If nonzero, every 1/X decompression attempts will fail, simulating
|
|
* an undetected memory error.
|
|
*/
|
|
static unsigned long zio_decompress_fail_fraction = 0;
|
|
|
|
/*
|
|
* Compression vectors.
|
|
*
|
|
* NOTE: DO NOT CHANGE THE NAMES OF THESE COMPRESSION FUNCTIONS.
|
|
* THEY ARE USED AS ZAP KEY NAMES BY FAST DEDUP AND THEREFORE
|
|
* PART OF THE ON-DISK FORMAT.
|
|
*/
|
|
zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
|
|
{"inherit", 0, NULL, NULL, NULL},
|
|
{"on", 0, NULL, NULL, NULL},
|
|
{"uncompressed", 0, NULL, NULL, NULL},
|
|
{"lzjb", 0, lzjb_compress, lzjb_decompress, NULL},
|
|
{"empty", 0, NULL, NULL, NULL},
|
|
{"gzip-1", 1, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-2", 2, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-3", 3, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-4", 4, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-5", 5, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-6", 6, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-7", 7, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-8", 8, gzip_compress, gzip_decompress, NULL},
|
|
{"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
|
|
{"zle", 64, zle_compress, zle_decompress, NULL},
|
|
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
|
|
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress_wrap,
|
|
zfs_zstd_decompress, zfs_zstd_decompress_level},
|
|
};
|
|
|
|
uint8_t
|
|
zio_complevel_select(spa_t *spa, enum zio_compress compress, uint8_t child,
|
|
uint8_t parent)
|
|
{
|
|
(void) spa;
|
|
uint8_t result;
|
|
|
|
if (!ZIO_COMPRESS_HASLEVEL(compress))
|
|
return (0);
|
|
|
|
result = child;
|
|
if (result == ZIO_COMPLEVEL_INHERIT)
|
|
result = parent;
|
|
|
|
return (result);
|
|
}
|
|
|
|
enum zio_compress
|
|
zio_compress_select(spa_t *spa, enum zio_compress child,
|
|
enum zio_compress parent)
|
|
{
|
|
enum zio_compress result;
|
|
|
|
ASSERT(child < ZIO_COMPRESS_FUNCTIONS);
|
|
ASSERT(parent < ZIO_COMPRESS_FUNCTIONS);
|
|
ASSERT(parent != ZIO_COMPRESS_INHERIT);
|
|
|
|
result = child;
|
|
if (result == ZIO_COMPRESS_INHERIT)
|
|
result = parent;
|
|
|
|
if (result == ZIO_COMPRESS_ON) {
|
|
if (spa_feature_is_active(spa, SPA_FEATURE_LZ4_COMPRESS))
|
|
result = ZIO_COMPRESS_LZ4_ON_VALUE;
|
|
else
|
|
result = ZIO_COMPRESS_LEGACY_ON_VALUE;
|
|
}
|
|
|
|
return (result);
|
|
}
|
|
|
|
size_t
|
|
zio_compress_data(enum zio_compress c, abd_t *src, void **dst, size_t s_len,
|
|
uint8_t level)
|
|
{
|
|
size_t c_len, d_len;
|
|
uint8_t complevel;
|
|
zio_compress_info_t *ci = &zio_compress_table[c];
|
|
|
|
ASSERT3U(c, <, ZIO_COMPRESS_FUNCTIONS);
|
|
ASSERT3U(ci->ci_compress, !=, NULL);
|
|
ASSERT3U(s_len, >, 0);
|
|
|
|
/* Compress at least 12.5% */
|
|
d_len = s_len - (s_len >> 3);
|
|
|
|
complevel = ci->ci_level;
|
|
|
|
if (c == ZIO_COMPRESS_ZSTD) {
|
|
/* If we don't know the level, we can't compress it */
|
|
if (level == ZIO_COMPLEVEL_INHERIT)
|
|
return (s_len);
|
|
|
|
if (level == ZIO_COMPLEVEL_DEFAULT)
|
|
complevel = ZIO_ZSTD_LEVEL_DEFAULT;
|
|
else
|
|
complevel = level;
|
|
|
|
ASSERT3U(complevel, !=, ZIO_COMPLEVEL_INHERIT);
|
|
}
|
|
|
|
if (*dst == NULL)
|
|
*dst = zio_buf_alloc(s_len);
|
|
|
|
/* No compression algorithms can read from ABDs directly */
|
|
void *tmp = abd_borrow_buf_copy(src, s_len);
|
|
c_len = ci->ci_compress(tmp, *dst, s_len, d_len, complevel);
|
|
abd_return_buf(src, tmp, s_len);
|
|
|
|
if (c_len > d_len)
|
|
return (s_len);
|
|
|
|
ASSERT3U(c_len, <=, d_len);
|
|
return (c_len);
|
|
}
|
|
|
|
int
|
|
zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
|
|
size_t s_len, size_t d_len, uint8_t *level)
|
|
{
|
|
zio_compress_info_t *ci = &zio_compress_table[c];
|
|
if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL)
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
if (ci->ci_decompress_level != NULL && level != NULL)
|
|
return (ci->ci_decompress_level(src, dst, s_len, d_len, level));
|
|
|
|
return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
|
|
}
|
|
|
|
int
|
|
zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
|
|
size_t s_len, size_t d_len, uint8_t *level)
|
|
{
|
|
void *tmp = abd_borrow_buf_copy(src, s_len);
|
|
int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len, level);
|
|
abd_return_buf(src, tmp, s_len);
|
|
|
|
/*
|
|
* Decompression shouldn't fail, because we've already verified
|
|
* the checksum. However, for extra protection (e.g. against bitflips
|
|
* in non-ECC RAM), we handle this error (and test it).
|
|
*/
|
|
if (zio_decompress_fail_fraction != 0 &&
|
|
random_in_range(zio_decompress_fail_fraction) == 0)
|
|
ret = SET_ERROR(EINVAL);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
int
|
|
zio_compress_to_feature(enum zio_compress comp)
|
|
{
|
|
switch (comp) {
|
|
case ZIO_COMPRESS_ZSTD:
|
|
return (SPA_FEATURE_ZSTD_COMPRESS);
|
|
default:
|
|
break;
|
|
}
|
|
return (SPA_FEATURE_NONE);
|
|
}
|