mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-27 03:19:35 +03:00
63f4bfd6ac
There should be no risk of us accidentally hitting this since we'd need maliciously malformed data to wind up in the pipeline, or a very unfortunate random bit flip at exactly the right moment. Still since we can handle it we should. Reviewed-by: Igor Kozhukhov <igor@dilos.org> Reviewed-by: George Melikov <mail@gmelikov.ru> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Adam Moss <c@yotes.com> Signed-off-by: Rich Ercolani <rincebrain@gmail.com> Closes #12947
988 lines
39 KiB
C
988 lines
39 KiB
C
/*
|
|
LZ4 - Fast LZ compression algorithm
|
|
Copyright (C) 2011-present, Yann Collet.
|
|
|
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
in the documentation and/or other materials provided with the
|
|
distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
You can contact the author at :
|
|
- LZ4 homepage : http://www.lz4.org
|
|
- LZ4 source repository : https://github.com/lz4/lz4
|
|
*/
|
|
|
|
/*
|
|
* This file contains unmodified code from lz4 1.9.3's decompressor, plus
|
|
* associated macros and constants.
|
|
*
|
|
* It also contains a couple of defines from the old lz4.c to make things
|
|
* fit together smoothly.
|
|
*
|
|
*/
|
|
|
|
#include <sys/zfs_context.h>
|
|
|
|
int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
|
|
int isize, int maxOutputSize);
|
|
|
|
/*
|
|
* Tuning parameters
|
|
*/
|
|
|
|
/*
|
|
* COMPRESSIONLEVEL: Increasing this value improves compression ratio
|
|
* Lowering this value reduces memory usage. Reduced memory usage
|
|
* typically improves speed, due to cache effect (ex: L1 32KB for Intel,
|
|
* L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
|
|
* (examples : 12 -> 16KB ; 17 -> 512KB)
|
|
*/
|
|
#define COMPRESSIONLEVEL 12
|
|
|
|
/*
|
|
* NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
|
|
* algorithm skip faster data segments considered "incompressible".
|
|
* This may decrease compression ratio dramatically, but will be
|
|
* faster on incompressible data. Increasing this value will make
|
|
* the algorithm search more before declaring a segment "incompressible".
|
|
* This could improve compression a bit, but will be slower on
|
|
* incompressible data. The default value (6) is recommended.
|
|
*/
|
|
#define NOTCOMPRESSIBLE_CONFIRMATION 6
|
|
|
|
/*
|
|
* Little Endian or Big Endian?
|
|
* Note: overwrite the below #define if you know your architecture endianness.
|
|
*/
|
|
#if defined(_ZFS_BIG_ENDIAN)
|
|
#define LZ4_BIG_ENDIAN 1
|
|
#else
|
|
/*
|
|
* Little Endian assumed. PDP Endian and other very rare endian format
|
|
* are unsupported.
|
|
*/
|
|
#undef LZ4_BIG_ENDIAN
|
|
#endif
|
|
|
|
/*-************************************
|
|
* CPU Feature Detection
|
|
**************************************/
|
|
/* LZ4_FORCE_MEMORY_ACCESS
|
|
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
|
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
|
|
* The below switch allow to select different access method for improved performance.
|
|
* Method 0 (default) : use `memcpy()`. Safe and portable.
|
|
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
|
|
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
|
|
* Method 2 : direct access. This method is portable but violate C standard.
|
|
* It can generate buggy code on targets which assembly generation depends on alignment.
|
|
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
|
|
* See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
|
|
* Prefer these methods in priority order (0 > 1 > 2)
|
|
*/
|
|
#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
|
|
# if defined(__GNUC__) && \
|
|
( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
|
|
|| defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
|
|
# define LZ4_FORCE_MEMORY_ACCESS 2
|
|
# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
|
|
# define LZ4_FORCE_MEMORY_ACCESS 1
|
|
# endif
|
|
#endif
|
|
|
|
/*
|
|
* LZ4_FORCE_SW_BITCOUNT
|
|
* Define this parameter if your target system or compiler does not support hardware bit count
|
|
*/
|
|
/*
|
|
* Illumos : we can't use GCC's __builtin_ctz family of builtins in the
|
|
* kernel
|
|
* Linux : we can use GCC's __builtin_ctz family of builtins in the
|
|
* kernel
|
|
*/
|
|
#undef LZ4_FORCE_SW_BITCOUNT
|
|
#if defined(__sunos__)
|
|
#define LZ4_FORCE_SW_BITCOUNT
|
|
#endif
|
|
|
|
/*
|
|
* Compiler Options
|
|
*/
|
|
/* Disable restrict */
|
|
#define restrict
|
|
|
|
/*
|
|
* Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
|
|
* torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
|
|
*/
|
|
#ifdef GCC_VERSION
|
|
#undef GCC_VERSION
|
|
#endif
|
|
|
|
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
|
|
|
#ifndef LZ4_FORCE_INLINE
|
|
# ifdef _MSC_VER /* Visual Studio */
|
|
# define LZ4_FORCE_INLINE static __forceinline
|
|
# else
|
|
# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
|
# ifdef __GNUC__
|
|
# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
|
|
# else
|
|
# define LZ4_FORCE_INLINE static inline
|
|
# endif
|
|
# else
|
|
# define LZ4_FORCE_INLINE static
|
|
# endif /* __STDC_VERSION__ */
|
|
# endif /* _MSC_VER */
|
|
#endif /* LZ4_FORCE_INLINE */
|
|
|
|
/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
|
|
* gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
|
|
* together with a simple 8-byte copy loop as a fall-back path.
|
|
* However, this optimization hurts the decompression speed by >30%,
|
|
* because the execution does not go to the optimized loop
|
|
* for typical compressible data, and all of the preamble checks
|
|
* before going to the fall-back path become useless overhead.
|
|
* This optimization happens only with the -O3 flag, and -O2 generates
|
|
* a simple 8-byte copy loop.
|
|
* With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
|
|
* functions are annotated with __attribute__((optimize("O2"))),
|
|
* and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
|
|
* of LZ4_wildCopy8 does not affect the compression speed.
|
|
*/
|
|
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
|
|
# define LZ4_FORCE_O2 __attribute__((optimize("O2")))
|
|
# undef LZ4_FORCE_INLINE
|
|
# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
|
|
#else
|
|
# define LZ4_FORCE_O2
|
|
#endif
|
|
|
|
#ifndef expect
|
|
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
|
|
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
|
|
#else
|
|
# define expect(expr,value) (expr)
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef likely
|
|
#define likely(expr) expect((expr) != 0, 1)
|
|
#endif
|
|
|
|
#ifndef unlikely
|
|
#define unlikely(expr) expect((expr) != 0, 0)
|
|
#endif
|
|
|
|
#ifndef _KERNEL
|
|
#include <stdlib.h> /* malloc, calloc, free */
|
|
#include <string.h> /* memset, memcpy */
|
|
#endif
|
|
#define ALLOC(s) malloc(s)
|
|
#define ALLOC_AND_ZERO(s) calloc(1,s)
|
|
#define FREEMEM(p) free(p)
|
|
|
|
#define MEM_INIT(p,v,s) memset((p),(v),(s))
|
|
|
|
|
|
/*-************************************
|
|
* Common Constants
|
|
**************************************/
|
|
#define MINMATCH 4
|
|
|
|
#define WILDCOPYLENGTH 8
|
|
#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
|
#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
|
#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
|
|
#define FASTLOOP_SAFE_DISTANCE 64
|
|
|
|
#define KB *(1 <<10)
|
|
#define MB *(1 <<20)
|
|
#define GB *(1U<<30)
|
|
|
|
#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
|
|
# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
|
|
#endif
|
|
|
|
#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
|
|
#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
|
|
# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
|
|
#endif
|
|
|
|
#define ML_BITS 4
|
|
#define ML_MASK ((1U<<ML_BITS)-1)
|
|
#define RUN_BITS (8-ML_BITS)
|
|
#define RUN_MASK ((1U<<RUN_BITS)-1)
|
|
|
|
#define DEBUGLOG(l, ...) {} /* disabled */
|
|
|
|
#ifndef assert
|
|
#define assert ASSERT
|
|
#endif
|
|
|
|
/*-************************************
|
|
* Types
|
|
**************************************/
|
|
#ifndef _KERNEL
|
|
#include <limits.h>
|
|
#endif
|
|
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
|
#ifndef _KERNEL
|
|
#include <stdint.h>
|
|
#endif
|
|
typedef uint8_t BYTE;
|
|
typedef uint16_t U16;
|
|
typedef uint32_t U32;
|
|
typedef int32_t S32;
|
|
typedef uint64_t U64;
|
|
typedef uintptr_t uptrval;
|
|
#else
|
|
# if UINT_MAX != 4294967295UL
|
|
# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
|
|
# endif
|
|
typedef unsigned char BYTE;
|
|
typedef unsigned short U16;
|
|
typedef unsigned int U32;
|
|
typedef signed int S32;
|
|
typedef unsigned long long U64;
|
|
typedef size_t uptrval; /* generally true, except OpenVMS-64 */
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
typedef U64 reg_t; /* 64-bits in x32 mode */
|
|
#else
|
|
typedef size_t reg_t; /* 32-bits in x32 mode */
|
|
#endif
|
|
|
|
typedef enum {
|
|
notLimited = 0,
|
|
limitedOutput = 1,
|
|
fillOutput = 2
|
|
} limitedOutput_directive;
|
|
|
|
|
|
/*-************************************
|
|
* Reading and writing into memory
|
|
**************************************/
|
|
|
|
/**
|
|
* LZ4 relies on memcpy with a constant size being inlined. In freestanding
|
|
* environments, the compiler can't assume the implementation of memcpy() is
|
|
* standard compliant, so it can't apply its specialized memcpy() inlining
|
|
* logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
|
|
* memcpy() as if it were standard compliant, so it can inline it in freestanding
|
|
* environments. This is needed when decompressing the Linux Kernel, for example.
|
|
*/
|
|
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
|
#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
|
|
#else
|
|
#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
|
|
#endif
|
|
|
|
static unsigned LZ4_isLittleEndian(void)
|
|
{
|
|
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
|
|
return one.c[0];
|
|
}
|
|
|
|
|
|
#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
|
|
/* lie to the compiler about data alignment; use with caution */
|
|
|
|
static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
|
|
|
|
static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
|
|
static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
|
|
|
|
#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
|
|
|
|
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
|
|
/* currently only defined for gcc and icc */
|
|
typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
|
|
|
|
static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
|
|
|
|
static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
|
|
|
|
#else /* safe and portable access using memcpy() */
|
|
|
|
static U16 LZ4_read16(const void* memPtr)
|
|
{
|
|
U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
|
|
}
|
|
|
|
static void LZ4_write32(void* memPtr, U32 value)
|
|
{
|
|
LZ4_memcpy(memPtr, &value, sizeof(value));
|
|
}
|
|
|
|
#endif /* LZ4_FORCE_MEMORY_ACCESS */
|
|
|
|
static U16 LZ4_readLE16(const void* memPtr)
|
|
{
|
|
if (LZ4_isLittleEndian()) {
|
|
return LZ4_read16(memPtr);
|
|
} else {
|
|
const BYTE* p = (const BYTE*)memPtr;
|
|
return (U16)((U16)p[0] + (p[1]<<8));
|
|
}
|
|
}
|
|
|
|
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
|
|
LZ4_FORCE_INLINE
|
|
void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
|
|
{
|
|
BYTE* d = (BYTE*)dstPtr;
|
|
const BYTE* s = (const BYTE*)srcPtr;
|
|
BYTE* const e = (BYTE*)dstEnd;
|
|
|
|
do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
|
|
}
|
|
|
|
static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
|
|
static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
|
|
|
|
|
|
#ifndef LZ4_FAST_DEC_LOOP
|
|
# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
|
# define LZ4_FAST_DEC_LOOP 1
|
|
# elif defined(__aarch64__) && !defined(__clang__)
|
|
/* On aarch64, we disable this optimization for clang because on certain
|
|
* mobile chipsets, performance is reduced with clang. For information
|
|
* refer to https://github.com/lz4/lz4/pull/707 */
|
|
# define LZ4_FAST_DEC_LOOP 1
|
|
# else
|
|
# define LZ4_FAST_DEC_LOOP 0
|
|
# endif
|
|
#endif
|
|
|
|
#if LZ4_FAST_DEC_LOOP
|
|
|
|
LZ4_FORCE_INLINE void
|
|
LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
|
|
{
|
|
assert(srcPtr + offset == dstPtr);
|
|
if (offset < 8) {
|
|
LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
|
|
dstPtr[0] = srcPtr[0];
|
|
dstPtr[1] = srcPtr[1];
|
|
dstPtr[2] = srcPtr[2];
|
|
dstPtr[3] = srcPtr[3];
|
|
srcPtr += inc32table[offset];
|
|
LZ4_memcpy(dstPtr+4, srcPtr, 4);
|
|
srcPtr -= dec64table[offset];
|
|
dstPtr += 8;
|
|
} else {
|
|
LZ4_memcpy(dstPtr, srcPtr, 8);
|
|
dstPtr += 8;
|
|
srcPtr += 8;
|
|
}
|
|
|
|
LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
|
|
}
|
|
|
|
/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
|
|
* this version copies two times 16 bytes (instead of one time 32 bytes)
|
|
* because it must be compatible with offsets >= 16. */
|
|
LZ4_FORCE_INLINE void
|
|
LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
|
|
{
|
|
BYTE* d = (BYTE*)dstPtr;
|
|
const BYTE* s = (const BYTE*)srcPtr;
|
|
BYTE* const e = (BYTE*)dstEnd;
|
|
|
|
do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
|
|
}
|
|
|
|
/* LZ4_memcpy_using_offset() presumes :
|
|
* - dstEnd >= dstPtr + MINMATCH
|
|
* - there is at least 8 bytes available to write after dstEnd */
|
|
LZ4_FORCE_INLINE void
|
|
LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
|
|
{
|
|
BYTE v[8];
|
|
|
|
assert(dstEnd >= dstPtr + MINMATCH);
|
|
|
|
switch(offset) {
|
|
case 1:
|
|
MEM_INIT(v, *srcPtr, 8);
|
|
break;
|
|
case 2:
|
|
LZ4_memcpy(v, srcPtr, 2);
|
|
LZ4_memcpy(&v[2], srcPtr, 2);
|
|
LZ4_memcpy(&v[4], v, 4);
|
|
break;
|
|
case 4:
|
|
LZ4_memcpy(v, srcPtr, 4);
|
|
LZ4_memcpy(&v[4], srcPtr, 4);
|
|
break;
|
|
default:
|
|
LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
|
|
return;
|
|
}
|
|
|
|
LZ4_memcpy(dstPtr, v, 8);
|
|
dstPtr += 8;
|
|
while (dstPtr < dstEnd) {
|
|
LZ4_memcpy(dstPtr, v, 8);
|
|
dstPtr += 8;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
/*-************************************
|
|
* Local Structures and types
|
|
**************************************/
|
|
typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
|
|
|
|
/**
|
|
* This enum distinguishes several different modes of accessing previous
|
|
* content in the stream.
|
|
*
|
|
* - noDict : There is no preceding content.
|
|
* - withPrefix64k : Table entries up to ctx->dictSize before the current blob
|
|
* blob being compressed are valid and refer to the preceding
|
|
* content (of length ctx->dictSize), which is available
|
|
* contiguously preceding in memory the content currently
|
|
* being compressed.
|
|
* - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
|
|
* else in memory, starting at ctx->dictionary with length
|
|
* ctx->dictSize.
|
|
* - usingDictCtx : Like usingExtDict, but everything concerning the preceding
|
|
* content is in a separate context, pointed to by
|
|
* ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
|
|
* entries in the current context that refer to positions
|
|
* preceding the beginning of the current compression are
|
|
* ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
|
|
* ->dictSize describe the location and size of the preceding
|
|
* content, and matches are found by looking in the ctx
|
|
* ->dictCtx->hashTable.
|
|
*/
|
|
typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
|
|
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
|
|
|
|
/*-*******************************
|
|
* Decompression functions
|
|
********************************/
|
|
|
|
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
|
|
typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
|
|
|
|
typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
|
|
|
|
LZ4_FORCE_INLINE unsigned
|
|
read_variable_length(const BYTE**ip, const BYTE* lencheck,
|
|
int loop_check, int initial_check,
|
|
variable_length_error* error)
|
|
{
|
|
U32 length = 0;
|
|
U32 s;
|
|
if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
|
|
*error = initial_error;
|
|
return length;
|
|
}
|
|
do {
|
|
s = **ip;
|
|
(*ip)++;
|
|
length += s;
|
|
if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
|
|
*error = loop_error;
|
|
return length;
|
|
}
|
|
} while (s==255);
|
|
|
|
return length;
|
|
}
|
|
|
|
#define LZ4_STATIC_ASSERT(c) ASSERT(c)
|
|
|
|
|
|
/*! LZ4_decompress_generic() :
|
|
* This generic decompression function covers all use cases.
|
|
* It shall be instantiated several times, using different sets of directives.
|
|
* Note that it is important for performance that this function really get inlined,
|
|
* in order to remove useless branches during compilation optimization.
|
|
*/
|
|
LZ4_FORCE_INLINE int
|
|
LZ4_decompress_generic(
|
|
const char* const src,
|
|
char* const dst,
|
|
int srcSize,
|
|
int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
|
|
|
|
endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */
|
|
earlyEnd_directive partialDecoding, /* full, partial */
|
|
dict_directive dict, /* noDict, withPrefix64k, usingExtDict */
|
|
const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
|
|
const BYTE* const dictStart, /* only if dict==usingExtDict */
|
|
const size_t dictSize /* note : = 0 if noDict */
|
|
)
|
|
{
|
|
if ((src == NULL) || (outputSize < 0)) { return -1; }
|
|
|
|
{ const BYTE* ip = (const BYTE*) src;
|
|
const BYTE* const iend = ip + srcSize;
|
|
|
|
BYTE* op = (BYTE*) dst;
|
|
BYTE* const oend = op + outputSize;
|
|
BYTE* cpy;
|
|
|
|
const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
|
|
|
|
const int safeDecode = (endOnInput==endOnInputSize);
|
|
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
|
|
|
|
|
|
/* Set up the "end" pointers for the shortcut. */
|
|
const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
|
|
const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
|
|
|
|
const BYTE* match;
|
|
size_t offset;
|
|
unsigned token;
|
|
size_t length;
|
|
|
|
|
|
DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
|
|
|
|
/* Special cases */
|
|
assert(lowPrefix <= op);
|
|
if ((endOnInput) && (unlikely(outputSize==0))) {
|
|
/* Empty output buffer */
|
|
if (partialDecoding) return 0;
|
|
return ((srcSize==1) && (*ip==0)) ? 0 : -1;
|
|
}
|
|
if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
|
|
if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
|
|
|
|
/* Currently the fast loop shows a regression on qualcomm arm chips. */
|
|
#if LZ4_FAST_DEC_LOOP
|
|
if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
|
|
DEBUGLOG(6, "skip fast decode loop");
|
|
goto safe_decode;
|
|
}
|
|
|
|
/* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
|
|
while (1) {
|
|
/* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
|
|
assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
|
|
if (endOnInput) { assert(ip < iend); }
|
|
token = *ip++;
|
|
length = token >> ML_BITS; /* literal length */
|
|
|
|
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
|
|
|
|
/* decode literal length */
|
|
if (length == RUN_MASK) {
|
|
variable_length_error error = ok;
|
|
length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
|
|
if (error == initial_error) { goto _output_error; }
|
|
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
|
|
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
|
|
|
|
/* copy literals */
|
|
cpy = op+length;
|
|
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
|
|
if (endOnInput) { /* LZ4_decompress_safe() */
|
|
if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
|
|
LZ4_wildCopy32(op, ip, cpy);
|
|
} else { /* LZ4_decompress_fast() */
|
|
if (cpy>oend-8) { goto safe_literal_copy; }
|
|
LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
|
|
* it doesn't know input length, and only relies on end-of-block properties */
|
|
}
|
|
ip += length; op = cpy;
|
|
} else {
|
|
cpy = op+length;
|
|
if (endOnInput) { /* LZ4_decompress_safe() */
|
|
DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
|
|
/* We don't need to check oend, since we check it once for each loop below */
|
|
if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
|
|
/* Literals can only be 14, but hope compilers optimize if we copy by a register size */
|
|
LZ4_memcpy(op, ip, 16);
|
|
} else { /* LZ4_decompress_fast() */
|
|
/* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
|
|
* it doesn't know input length, and relies on end-of-block properties */
|
|
LZ4_memcpy(op, ip, 8);
|
|
if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
|
|
}
|
|
ip += length; op = cpy;
|
|
}
|
|
|
|
/* get offset */
|
|
offset = LZ4_readLE16(ip); ip+=2;
|
|
match = op - offset;
|
|
assert(match <= op);
|
|
|
|
/* get matchlength */
|
|
length = token & ML_MASK;
|
|
|
|
if (length == ML_MASK) {
|
|
variable_length_error error = ok;
|
|
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
|
|
length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
|
|
if (error != ok) { goto _output_error; }
|
|
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
|
|
length += MINMATCH;
|
|
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
|
|
goto safe_match_copy;
|
|
}
|
|
} else {
|
|
length += MINMATCH;
|
|
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
|
|
goto safe_match_copy;
|
|
}
|
|
|
|
/* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
|
|
if ((dict == withPrefix64k) || (match >= lowPrefix)) {
|
|
if (offset >= 8) {
|
|
assert(match >= lowPrefix);
|
|
assert(match <= op);
|
|
assert(op + 18 <= oend);
|
|
|
|
LZ4_memcpy(op, match, 8);
|
|
LZ4_memcpy(op+8, match+8, 8);
|
|
LZ4_memcpy(op+16, match+16, 2);
|
|
op += length;
|
|
continue;
|
|
} } }
|
|
|
|
if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
|
|
/* match starting within external dictionary */
|
|
if ((dict==usingExtDict) && (match < lowPrefix)) {
|
|
if (unlikely(op+length > oend-LASTLITERALS)) {
|
|
if (partialDecoding) {
|
|
DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
|
|
length = MIN(length, (size_t)(oend-op));
|
|
} else {
|
|
goto _output_error; /* end-of-block condition violated */
|
|
} }
|
|
|
|
if (length <= (size_t)(lowPrefix-match)) {
|
|
/* match fits entirely within external dictionary : just copy */
|
|
memmove(op, dictEnd - (lowPrefix-match), length);
|
|
op += length;
|
|
} else {
|
|
/* match stretches into both external dictionary and current block */
|
|
size_t const copySize = (size_t)(lowPrefix - match);
|
|
size_t const restSize = length - copySize;
|
|
LZ4_memcpy(op, dictEnd - copySize, copySize);
|
|
op += copySize;
|
|
if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
|
|
BYTE* const endOfMatch = op + restSize;
|
|
const BYTE* copyFrom = lowPrefix;
|
|
while (op < endOfMatch) { *op++ = *copyFrom++; }
|
|
} else {
|
|
LZ4_memcpy(op, lowPrefix, restSize);
|
|
op += restSize;
|
|
} }
|
|
continue;
|
|
}
|
|
|
|
/* copy match within block */
|
|
cpy = op + length;
|
|
|
|
assert((op <= oend) && (oend-op >= 32));
|
|
if (unlikely(offset<16)) {
|
|
LZ4_memcpy_using_offset(op, match, cpy, offset);
|
|
} else {
|
|
LZ4_wildCopy32(op, match, cpy);
|
|
}
|
|
|
|
op = cpy; /* wildcopy correction */
|
|
}
|
|
safe_decode:
|
|
#endif
|
|
|
|
/* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
|
|
while (1) {
|
|
token = *ip++;
|
|
length = token >> ML_BITS; /* literal length */
|
|
|
|
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
|
|
|
|
/* A two-stage shortcut for the most common case:
|
|
* 1) If the literal length is 0..14, and there is enough space,
|
|
* enter the shortcut and copy 16 bytes on behalf of the literals
|
|
* (in the fast mode, only 8 bytes can be safely copied this way).
|
|
* 2) Further if the match length is 4..18, copy 18 bytes in a similar
|
|
* manner; but we ensure that there's enough space in the output for
|
|
* those 18 bytes earlier, upon entering the shortcut (in other words,
|
|
* there is a combined check for both stages).
|
|
*/
|
|
if ( (endOnInput ? length != RUN_MASK : length <= 8)
|
|
/* strictly "less than" on input, to re-enter the loop with at least one byte */
|
|
&& likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
|
|
/* Copy the literals */
|
|
LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
|
|
op += length; ip += length;
|
|
|
|
/* The second stage: prepare for match copying, decode full info.
|
|
* If it doesn't work out, the info won't be wasted. */
|
|
length = token & ML_MASK; /* match length */
|
|
offset = LZ4_readLE16(ip); ip += 2;
|
|
match = op - offset;
|
|
assert(match <= op); /* check overflow */
|
|
|
|
/* Do not deal with overlapping matches. */
|
|
if ( (length != ML_MASK)
|
|
&& (offset >= 8)
|
|
&& (dict==withPrefix64k || match >= lowPrefix) ) {
|
|
/* Copy the match. */
|
|
LZ4_memcpy(op + 0, match + 0, 8);
|
|
LZ4_memcpy(op + 8, match + 8, 8);
|
|
LZ4_memcpy(op +16, match +16, 2);
|
|
op += length + MINMATCH;
|
|
/* Both stages worked, load the next token. */
|
|
continue;
|
|
}
|
|
|
|
/* The second stage didn't work out, but the info is ready.
|
|
* Propel it right to the point of match copying. */
|
|
goto _copy_match;
|
|
}
|
|
|
|
/* decode literal length */
|
|
if (length == RUN_MASK) {
|
|
variable_length_error error = ok;
|
|
length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
|
|
if (error == initial_error) { goto _output_error; }
|
|
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
|
|
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
|
|
}
|
|
|
|
/* copy literals */
|
|
cpy = op+length;
|
|
#if LZ4_FAST_DEC_LOOP
|
|
safe_literal_copy:
|
|
#endif
|
|
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
|
|
if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
|
|
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
|
|
{
|
|
/* We've either hit the input parsing restriction or the output parsing restriction.
|
|
* In the normal scenario, decoding a full block, it must be the last sequence,
|
|
* otherwise it's an error (invalid input or dimensions).
|
|
* In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
|
|
*/
|
|
if (partialDecoding) {
|
|
/* Since we are partial decoding we may be in this block because of the output parsing
|
|
* restriction, which is not valid since the output buffer is allowed to be undersized.
|
|
*/
|
|
assert(endOnInput);
|
|
DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
|
|
DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
|
|
DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
|
|
DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
|
|
/* Finishing in the middle of a literals segment,
|
|
* due to lack of input.
|
|
*/
|
|
if (ip+length > iend) {
|
|
length = (size_t)(iend-ip);
|
|
cpy = op + length;
|
|
}
|
|
/* Finishing in the middle of a literals segment,
|
|
* due to lack of output space.
|
|
*/
|
|
if (cpy > oend) {
|
|
cpy = oend;
|
|
assert(op<=oend);
|
|
length = (size_t)(oend-op);
|
|
}
|
|
} else {
|
|
/* We must be on the last sequence because of the parsing limitations so check
|
|
* that we exactly regenerate the original size (must be exact when !endOnInput).
|
|
*/
|
|
if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
|
|
/* We must be on the last sequence (or invalid) because of the parsing limitations
|
|
* so check that we exactly consume the input and don't overrun the output buffer.
|
|
*/
|
|
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
|
|
DEBUGLOG(6, "should have been last run of literals")
|
|
DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
|
|
DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
|
|
goto _output_error;
|
|
}
|
|
}
|
|
memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
|
|
ip += length;
|
|
op += length;
|
|
/* Necessarily EOF when !partialDecoding.
|
|
* When partialDecoding, it is EOF if we've either
|
|
* filled the output buffer or
|
|
* can't proceed with reading an offset for following match.
|
|
*/
|
|
if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
|
|
break;
|
|
}
|
|
} else {
|
|
LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
|
|
ip += length; op = cpy;
|
|
}
|
|
|
|
/* get offset */
|
|
offset = LZ4_readLE16(ip); ip+=2;
|
|
match = op - offset;
|
|
|
|
/* get matchlength */
|
|
length = token & ML_MASK;
|
|
|
|
_copy_match:
|
|
if (length == ML_MASK) {
|
|
variable_length_error error = ok;
|
|
length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
|
|
if (error != ok) goto _output_error;
|
|
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
|
|
}
|
|
length += MINMATCH;
|
|
|
|
#if LZ4_FAST_DEC_LOOP
|
|
safe_match_copy:
|
|
#endif
|
|
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
|
|
/* match starting within external dictionary */
|
|
if ((dict==usingExtDict) && (match < lowPrefix)) {
|
|
if (unlikely(op+length > oend-LASTLITERALS)) {
|
|
if (partialDecoding) length = MIN(length, (size_t)(oend-op));
|
|
else goto _output_error; /* doesn't respect parsing restriction */
|
|
}
|
|
|
|
if (length <= (size_t)(lowPrefix-match)) {
|
|
/* match fits entirely within external dictionary : just copy */
|
|
memmove(op, dictEnd - (lowPrefix-match), length);
|
|
op += length;
|
|
} else {
|
|
/* match stretches into both external dictionary and current block */
|
|
size_t const copySize = (size_t)(lowPrefix - match);
|
|
size_t const restSize = length - copySize;
|
|
LZ4_memcpy(op, dictEnd - copySize, copySize);
|
|
op += copySize;
|
|
if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
|
|
BYTE* const endOfMatch = op + restSize;
|
|
const BYTE* copyFrom = lowPrefix;
|
|
while (op < endOfMatch) *op++ = *copyFrom++;
|
|
} else {
|
|
LZ4_memcpy(op, lowPrefix, restSize);
|
|
op += restSize;
|
|
} }
|
|
continue;
|
|
}
|
|
assert(match >= lowPrefix);
|
|
|
|
/* copy match within block */
|
|
cpy = op + length;
|
|
|
|
/* partialDecoding : may end anywhere within the block */
|
|
assert(op<=oend);
|
|
if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
|
|
size_t const mlen = MIN(length, (size_t)(oend-op));
|
|
const BYTE* const matchEnd = match + mlen;
|
|
BYTE* const copyEnd = op + mlen;
|
|
if (matchEnd > op) { /* overlap copy */
|
|
while (op < copyEnd) { *op++ = *match++; }
|
|
} else {
|
|
LZ4_memcpy(op, match, mlen);
|
|
}
|
|
op = copyEnd;
|
|
if (op == oend) { break; }
|
|
continue;
|
|
}
|
|
|
|
if (unlikely(offset<8)) {
|
|
LZ4_write32(op, 0); /* silence msan warning when offset==0 */
|
|
op[0] = match[0];
|
|
op[1] = match[1];
|
|
op[2] = match[2];
|
|
op[3] = match[3];
|
|
match += inc32table[offset];
|
|
LZ4_memcpy(op+4, match, 4);
|
|
match -= dec64table[offset];
|
|
} else {
|
|
LZ4_memcpy(op, match, 8);
|
|
match += 8;
|
|
}
|
|
op += 8;
|
|
|
|
if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
|
|
BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
|
|
if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
|
|
if (op < oCopyLimit) {
|
|
LZ4_wildCopy8(op, match, oCopyLimit);
|
|
match += oCopyLimit - op;
|
|
op = oCopyLimit;
|
|
}
|
|
while (op < cpy) { *op++ = *match++; }
|
|
} else {
|
|
LZ4_memcpy(op, match, 8);
|
|
if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
|
|
}
|
|
op = cpy; /* wildcopy correction */
|
|
}
|
|
|
|
/* end of decoding */
|
|
if (endOnInput) {
|
|
DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
|
|
return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
|
|
} else {
|
|
return (int) (((const char*)ip)-src); /* Nb of input bytes read */
|
|
}
|
|
|
|
/* Overflow error detected */
|
|
_output_error:
|
|
return (int) (-(((const char*)ip)-src))-1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* LZ4_uncompress_unknownOutputSize() :
|
|
* isize : is the input size, therefore the compressed size
|
|
* maxOutputSize : is the size of the destination buffer (which must be
|
|
* already allocated)
|
|
* return : the number of bytes decoded in the destination buffer
|
|
* (necessarily <= maxOutputSize). If the source stream is
|
|
* malformed, the function will stop decoding and return a
|
|
* negative result, indicating the byte position of the faulty
|
|
* instruction. This function never writes beyond dest +
|
|
* maxOutputSize, and is therefore protected against malicious
|
|
* data packets.
|
|
* note : Destination buffer must be already allocated.
|
|
* This version is slightly slower than real_LZ4_uncompress()
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
|
|
* wrapper for LZ4_decompress_safe which is a wrapper for
|
|
* LZ4_decompress_generic; this wrapper flattens that, rather than
|
|
* rewriting the callers.
|
|
*/
|
|
int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
|
|
{
|
|
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
|
|
endOnInputSize, decode_full_block, noDict,
|
|
(BYTE*)dest, NULL, 0);
|
|
}
|