mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Fix cross-endian interoperability of zstd
It turns out that layouts of union bitfields are a pain, and the current code results in an inconsistent layout between BE and LE systems, leading to zstd-active datasets on one erroring out on the other. Switch everyone over to the LE layout, and add compatibility code to read both. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Rich Ercolani <rincebrain@gmail.com> Closes #12008 Closes #12022
This commit is contained in:
		
							parent
							
								
									4e8a639d5f
								
							
						
					
					
						commit
						fb823061b0
					
				| @ -2211,7 +2211,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, | ||||
| 		(void) snprintf(blkbuf + strlen(blkbuf), | ||||
| 		    buflen - strlen(blkbuf), | ||||
| 		    " ZSTD:size=%u:version=%u:level=%u:EMBEDDED", | ||||
| 		    zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level); | ||||
| 		    zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), | ||||
| 		    zfs_get_hdrlevel(&zstd_hdr)); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| @ -2235,7 +2236,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, | ||||
| 	(void) snprintf(blkbuf + strlen(blkbuf), | ||||
| 	    buflen - strlen(blkbuf), | ||||
| 	    " ZSTD:size=%u:version=%u:level=%u:NORMAL", | ||||
| 	    zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level); | ||||
| 	    zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), | ||||
| 	    zfs_get_hdrlevel(&zstd_hdr)); | ||||
| 
 | ||||
| 	abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp)); | ||||
| } | ||||
|  | ||||
| @ -56,21 +56,24 @@ typedef struct zfs_zstd_header { | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Version and compression level | ||||
| 	 * We use a union to be able to big endian encode a single 32 bit | ||||
| 	 * unsigned integer, but still access the individual bitmasked | ||||
| 	 * components easily. | ||||
| 	 * We used to use a union to reference compression level | ||||
| 	 * and version easily, but as it turns out, relying on the | ||||
| 	 * ordering of bitfields is not remotely portable. | ||||
| 	 * So now we have get/set functions in zfs_zstd.c for | ||||
| 	 * manipulating this in just the right way forever. | ||||
| 	 */ | ||||
| 	union { | ||||
| 		uint32_t raw_version_level; | ||||
| 		struct { | ||||
| 			uint32_t version : 24; | ||||
| 			uint8_t level; | ||||
| 		}; | ||||
| 	}; | ||||
| 
 | ||||
| 	uint32_t raw_version_level; | ||||
| 	char data[]; | ||||
| } zfs_zstdhdr_t; | ||||
| 
 | ||||
| /*
 | ||||
|  * Simple struct to pass the data from raw_version_level around. | ||||
|  */ | ||||
| typedef struct zfs_zstd_meta { | ||||
| 	uint8_t level; | ||||
| 	uint32_t version; | ||||
| } zfs_zstdmeta_t; | ||||
| 
 | ||||
| /*
 | ||||
|  * kstat helper macros | ||||
|  */ | ||||
| @ -94,6 +97,129 @@ int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, | ||||
|     size_t d_len, int n); | ||||
| void zfs_zstd_cache_reap_now(void); | ||||
| 
 | ||||
| /*
 | ||||
|  * So, the reason we have all these complicated set/get functions is that | ||||
|  * originally, in the zstd "header" we wrote out to disk, we used a 32-bit | ||||
|  * bitfield to store the "level" (8 bits) and "version" (24 bits). | ||||
|  * | ||||
|  * Unfortunately, bitfields make few promises about how they're arranged in | ||||
|  * memory... | ||||
|  * | ||||
|  * By way of example, if we were using version 1.4.5 and level 3, it'd be | ||||
|  * level = 0x03, version = 10405/0x0028A5, which gets broken into Vhigh = 0x00, | ||||
|  * Vmid = 0x28, Vlow = 0xA5. We include these positions below to help follow | ||||
|  * which data winds up where. | ||||
|  * | ||||
|  * As a consequence, we wound up with little endian platforms with a layout | ||||
|  * like this in memory: | ||||
|  * | ||||
|  *      0       8      16      24      32 | ||||
|  *      +-------+-------+-------+-------+ | ||||
|  *      | Vlow  | Vmid  | Vhigh | level | | ||||
|  *      +-------+-------+-------+-------+ | ||||
|  *        =A5     =28     =00     =03 | ||||
|  * | ||||
|  * ...and then, after being run through BE_32(), serializing this out to | ||||
|  * disk: | ||||
|  * | ||||
|  *      0       8      16      24      32 | ||||
|  *      +-------+-------+-------+-------+ | ||||
|  *      | level | Vhigh | Vmid  | Vlow  | | ||||
|  *      +-------+-------+-------+-------+ | ||||
|  *        =03     =00     =28     =A5 | ||||
|  * | ||||
|  * while on big-endian systems, since BE_32() is a noop there, both in | ||||
|  * memory and on disk, we wind up with: | ||||
|  * | ||||
|  *      0       8      16      24      32 | ||||
|  *      +-------+-------+-------+-------+ | ||||
|  *      | Vhigh | Vmid  | Vlow  | level | | ||||
|  *      +-------+-------+-------+-------+ | ||||
|  *        =00     =28     =A5     =03 | ||||
|  * | ||||
|  * (Vhigh is always 0 until version exceeds 6.55.35. Vmid and Vlow are the | ||||
|  * other two bytes of the "version" data.) | ||||
|  * | ||||
|  * So now we use the BF32_SET macros to get consistent behavior (the | ||||
|  * ondisk LE encoding, since x86 currently rules the world) across | ||||
|  * platforms, but the "get" behavior requires that we check each of the | ||||
|  * bytes in the aforementioned former-bitfield for 0x00, and from there, | ||||
|  * we can know which possible layout we're dealing with. (Only the two | ||||
|  * that have been observed in the wild are illustrated above, but handlers | ||||
|  * for all 4 positions of 0x00 are implemented. | ||||
|  */ | ||||
| 
 | ||||
| static inline void | ||||
| zfs_get_hdrmeta(const zfs_zstdhdr_t *blob, zfs_zstdmeta_t *res) | ||||
| { | ||||
| 	uint32_t raw = blob->raw_version_level; | ||||
| 	uint8_t findme = 0xff; | ||||
| 	int shift; | ||||
| 	for (shift = 0; shift < 4; shift++) { | ||||
| 		findme = BF32_GET(raw, 8*shift, 8); | ||||
| 		if (findme == 0) | ||||
| 			break; | ||||
| 	} | ||||
| 	switch (shift) { | ||||
| 	case 0: | ||||
| 		res->level = BF32_GET(raw, 24, 8); | ||||
| 		res->version = BSWAP_32(raw); | ||||
| 		res->version = BF32_GET(res->version, 8, 24); | ||||
| 		break; | ||||
| 	case 1: | ||||
| 		res->level = BF32_GET(raw, 0, 8); | ||||
| 		res->version = BSWAP_32(raw); | ||||
| 		res->version = BF32_GET(res->version, 0, 24); | ||||
| 		break; | ||||
| 	case 2: | ||||
| 		res->level = BF32_GET(raw, 24, 8); | ||||
| 		res->version = BF32_GET(raw, 0, 24); | ||||
| 		break; | ||||
| 	case 3: | ||||
| 		res->level = BF32_GET(raw, 0, 8); | ||||
| 		res->version = BF32_GET(raw, 8, 24); | ||||
| 		break; | ||||
| 	default: | ||||
| 		res->level = 0; | ||||
| 		res->version = 0; | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static inline uint8_t | ||||
| zfs_get_hdrlevel(const zfs_zstdhdr_t *blob) | ||||
| { | ||||
| 	uint8_t level = 0; | ||||
| 	zfs_zstdmeta_t res; | ||||
| 	zfs_get_hdrmeta(blob, &res); | ||||
| 	level = res.level; | ||||
| 	return (level); | ||||
| } | ||||
| 
 | ||||
| static inline uint32_t | ||||
| zfs_get_hdrversion(const zfs_zstdhdr_t *blob) | ||||
| { | ||||
| 	uint32_t version = 0; | ||||
| 	zfs_zstdmeta_t res; | ||||
| 	zfs_get_hdrmeta(blob, &res); | ||||
| 	version = res.version; | ||||
| 	return (version); | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| zfs_set_hdrversion(zfs_zstdhdr_t *blob, uint32_t version) | ||||
| { | ||||
| 	BF32_SET(blob->raw_version_level, 0, 24, version); | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| zfs_set_hdrlevel(zfs_zstdhdr_t *blob, uint8_t level) | ||||
| { | ||||
| 	BF32_SET(blob->raw_version_level, 24, 8, level); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| #ifdef	__cplusplus | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @ -33,6 +33,7 @@ $(obj)/zfs_zstd.o: c_flags += -include $(zstd_include)/zstd_compat_wrapper.h | ||||
| 
 | ||||
| $(MODULE)-objs += zfs_zstd.o | ||||
| $(MODULE)-objs += lib/zstd.o | ||||
| $(MODULE)-objs += zstd_sparc.o | ||||
| 
 | ||||
| all: | ||||
| 	mkdir -p lib | ||||
|  | ||||
							
								
								
									
										4
									
								
								module/zstd/include/sparc_compat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								module/zstd/include/sparc_compat.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | ||||
| #if defined(__sparc) | ||||
| uint64_t __bswapdi2(uint64_t in); | ||||
| uint32_t __bswapsi2(uint32_t in); | ||||
| #endif | ||||
| @ -361,6 +361,7 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) | ||||
| 	return (1); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Compress block using zstd */ | ||||
| size_t | ||||
| zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, | ||||
| @ -458,8 +459,8 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, | ||||
| 	 * As soon as such incompatibility occurs, handling code needs to be | ||||
| 	 * added, differentiating between the versions. | ||||
| 	 */ | ||||
| 	hdr->version = ZSTD_VERSION_NUMBER; | ||||
| 	hdr->level = level; | ||||
| 	zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER); | ||||
| 	zfs_set_hdrlevel(hdr, level); | ||||
| 	hdr->raw_version_level = BE_32(hdr->raw_version_level); | ||||
| 
 | ||||
| 	return (c_len + sizeof (*hdr)); | ||||
| @ -485,6 +486,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, | ||||
| 	 * not modify the original data that may be used again later. | ||||
| 	 */ | ||||
| 	hdr_copy.raw_version_level = BE_32(hdr->raw_version_level); | ||||
| 	uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * NOTE: We ignore the ZSTD version for now. As soon as any | ||||
| @ -497,13 +499,13 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, | ||||
| 	 * An invalid level is a strong indicator for data corruption! In such | ||||
| 	 * case return an error so the upper layers can try to fix it. | ||||
| 	 */ | ||||
| 	if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) { | ||||
| 	if (zstd_enum_to_level(curlevel, &zstd_level)) { | ||||
| 		ZSTDSTAT_BUMP(zstd_stat_dec_inval); | ||||
| 		return (1); | ||||
| 	} | ||||
| 
 | ||||
| 	ASSERT3U(d_len, >=, s_len); | ||||
| 	ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT); | ||||
| 	ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT); | ||||
| 
 | ||||
| 	/* Invalid compressed buffer size encoded at start */ | ||||
| 	if (c_len + sizeof (*hdr) > s_len) { | ||||
| @ -534,7 +536,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, | ||||
| 	} | ||||
| 
 | ||||
| 	if (level) { | ||||
| 		*level = hdr_copy.level; | ||||
| 		*level = curlevel; | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| @ -764,7 +766,7 @@ module_exit(zstd_fini); | ||||
| 
 | ||||
| ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS"); | ||||
| ZFS_MODULE_LICENSE("Dual BSD/GPL"); | ||||
| ZFS_MODULE_VERSION(ZSTD_VERSION_STRING); | ||||
| ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a"); | ||||
| 
 | ||||
| EXPORT_SYMBOL(zfs_zstd_compress); | ||||
| EXPORT_SYMBOL(zfs_zstd_decompress_level); | ||||
|  | ||||
							
								
								
									
										11
									
								
								module/zstd/zstd_sparc.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								module/zstd/zstd_sparc.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| #ifdef __sparc__ | ||||
| #include <stdint.h> | ||||
| #include <sys/byteorder.h> | ||||
| #include "include/sparc_compat.h" | ||||
| uint64_t __bswapdi2(uint64_t in) { | ||||
| 	return (BSWAP_64(in)); | ||||
| } | ||||
| uint32_t __bswapsi2(uint32_t in) { | ||||
| 	return (BSWAP_32(in)); | ||||
| } | ||||
| #endif | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Rich Ercolani
						Rich Ercolani