Zstd: Update bundled library to v1.5.7 without further adjustments

This commit only replaces the bundled source and does not include any
ZFS integration changes. Because the build depends on integration
adjustments, it will fail until the accompanying integration commit is
applied.

Upstream release: https://github.com/facebook/zstd/releases/tag/v1.5.7

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Moch <mail@alexmoch.com>
Closes #18089
This commit is contained in:
Alexander Moch 2025-12-27 21:25:25 +01:00 committed by Brian Behlendorf
parent 54b141fab5
commit bbcddb127a
61 changed files with 26308 additions and 7794 deletions

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This file provides custom allocation primitives
*/
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "compiler.h" /* MEM_STATIC */
#define ZSTD_STATIC_LINKING_ONLY
#include "../zstd.h" /* ZSTD_customMem */
#ifndef ZSTD_ALLOCATIONS_H
#define ZSTD_ALLOCATIONS_H
/* custom memory allocation functions */
MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}
#endif /* ZSTD_ALLOCATIONS_H */

View File

@ -0,0 +1,205 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_BITS_H
#define ZSTD_BITS_H
#include "mem.h"
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
{
assert(val != 0);
{
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9};
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
{
assert(val != 0);
#if defined(_MSC_VER)
# if STATIC_BMI2
return (unsigned)_tzcnt_u32(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward(&r, val);
return (unsigned)r;
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)__builtin_ctz(val);
#elif defined(__ICCARM__)
return (unsigned)__builtin_ctz(val);
#else
return ZSTD_countTrailingZeros32_fallback(val);
#endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
{
assert(val != 0);
{
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31};
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
{
assert(val != 0);
#if defined(_MSC_VER)
# if STATIC_BMI2
return (unsigned)_lzcnt_u32(val);
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)(31 - r);
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)__builtin_clz(val);
#elif defined(__ICCARM__)
return (unsigned)__builtin_clz(val);
#else
return ZSTD_countLeadingZeros32_fallback(val);
#endif
}
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
{
assert(val != 0);
#if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return (unsigned)_tzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, val);
return (unsigned)r;
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
return (unsigned)__builtin_ctzll(val);
#elif defined(__ICCARM__)
return (unsigned)__builtin_ctzll(val);
#else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (leastSignificantWord == 0) {
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
} else {
return ZSTD_countTrailingZeros32(leastSignificantWord);
}
}
#endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
{
assert(val != 0);
#if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return (unsigned)_lzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanReverse64(&r, val);
return (unsigned)(63 - r);
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)(__builtin_clzll(val));
#elif defined(__ICCARM__)
return (unsigned)(__builtin_clzll(val));
#else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (mostSignificantWord == 0) {
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
} else {
return ZSTD_countLeadingZeros32(mostSignificantWord);
}
}
#endif
}
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
return ZSTD_countTrailingZeros64((U64)val) >> 3;
} else {
return ZSTD_countTrailingZeros32((U32)val) >> 3;
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
return ZSTD_countLeadingZeros64((U64)val) >> 3;
} else {
return ZSTD_countLeadingZeros32((U32)val) >> 3;
}
}
}
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
return 31 - ZSTD_countLeadingZeros32(val);
}
/* ZSTD_rotateRight_*():
* Rotates a bitfield to the right by "count" bits.
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
*/
MEM_STATIC
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
assert(count < 64);
count &= 0x3F; /* for fickle pattern recognition */
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
}
MEM_STATIC
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
assert(count < 32);
count &= 0x1F; /* for fickle pattern recognition */
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
}
MEM_STATIC
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
assert(count < 16);
count &= 0x0F; /* for fickle pattern recognition */
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
}
#endif /* ZSTD_BITS_H */

View File

@ -1,8 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* bitstream
* Part of FSE library
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -15,10 +14,6 @@
#ifndef BITSTREAM_H_MODULE
#define BITSTREAM_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/*
* This API consists of small unitary functions, which must be inlined for best performance.
* Since link-time-optimization is not available for all compilers,
@ -32,15 +27,17 @@ extern "C" {
#include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
#include "bits.h" /* ZSTD_highbit32 */
/*=========================================
* Target specific
=========================================*/
#if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
#elif defined(__ICCARM__)
# include <intrinsics.h>
#ifndef ZSTD_NO_INTRINSICS
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
# elif defined(__ICCARM__)
# include <intrinsics.h>
# endif
#endif
#define STREAM_ACCUMULATOR_MIN_32 25
@ -51,12 +48,13 @@ extern "C" {
/*-******************************************
* bitStream encoding API (write forward)
********************************************/
typedef size_t BitContainerType;
/* bitStream can mix input from multiple sources.
* A critical property of these streams is that they encode and decode in **reverse** direction.
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
*/
typedef struct {
size_t bitContainer;
BitContainerType bitContainer;
unsigned bitPos;
char* startPtr;
char* ptr;
@ -64,7 +62,7 @@ typedef struct {
} BIT_CStream_t;
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
@ -73,7 +71,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
*
* bits are first added to a local register.
* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
* Writing data into memory is an explicit operation, performed by the flushBits function.
* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
* After a flushBits, a maximum of 7 bits might still be stored into local register.
@ -90,28 +88,28 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
* bitStream decoding API (read backward)
**********************************************/
typedef struct {
size_t bitContainer;
BitContainerType bitContainer;
unsigned bitsConsumed;
const char* ptr;
const char* start;
const char* limitPtr;
} BIT_DStream_t;
typedef enum { BIT_DStream_unfinished = 0,
BIT_DStream_endOfBuffer = 1,
BIT_DStream_completed = 2,
BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
} BIT_DStream_status; /* result of BIT_reloadDStream() */
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/* Start by invoking BIT_initDStream().
* A chunk of the bitStream is then stored into a local register.
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
* You can then retrieve bitFields stored into the local register, **in reverse order**.
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@ -123,7 +121,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/*-****************************************
* unsafe API
******************************************/
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
@ -132,38 +130,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 */
/*-**************************************************************
* Internal functions
****************************************************************/
MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*===== Local Constants =====*/
static const unsigned BIT_mask[] = {
0, 1, 3, 7, 0xF, 0x1F,
@ -193,16 +159,31 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0;
}
FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
{
#if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)
# if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)
return _bzhi_u64(bitContainer, nbBits);
# else
DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));
return _bzhi_u32(bitContainer, nbBits);
# endif
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_addBits() :
* can add up to 31 bits into `bitC`.
* Note : does not check for register overflow ! */
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
BitContainerType value, unsigned nbBits)
{
MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
bitC->bitPos += nbBits;
}
@ -210,7 +191,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
* works only if `value` is _clean_,
* meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
BitContainerType value, unsigned nbBits)
{
assert((value>>nbBits) == 0);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
@ -257,7 +238,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
BIT_addBitsFast(bitC, 1, 1); /* endMark */
BIT_flushBits(bitC);
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
}
@ -272,7 +253,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
*/
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
{
if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
bitD->start = (const char*)srcBuffer;
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@ -281,35 +262,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else {
bitD->ptr = bitD->start;
bitD->bitContainer = *(const BYTE*)(bitD->start);
switch(srcSize)
{
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
/* fall-through */
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
ZSTD_FALLTHROUGH;
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
/* fall-through */
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
ZSTD_FALLTHROUGH;
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
/* fall-through */
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
ZSTD_FALLTHROUGH;
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
/* fall-through */
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
ZSTD_FALLTHROUGH;
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
/* fall-through */
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
ZSTD_FALLTHROUGH;
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
/* fall-through */
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
ZSTD_FALLTHROUGH;
default: break;
}
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
}
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@ -318,23 +299,26 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
return srcSize;
}
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
{
return bitContainer >> start;
}
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
{
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE);
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
* than accessing memory. When bmi2 instruction is not present, we consider
* such cpus old (pre-Haswell, 2013) and their performance is not of that
* importance.
*/
#if defined(__x86_64__) || defined(_M_X64)
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
#else
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
}
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_lookBits() :
@ -343,7 +327,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
* On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56.
* @return : value extracted */
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
/* arbitrate between double-shift and shift+mask */
#if 1
@ -359,14 +343,14 @@ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
/*! BIT_lookBitsFast() :
* unsafe version; only works if nbBits >= 1 */
MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
{
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
assert(nbBits >= 1);
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{
bitD->bitsConsumed += nbBits;
}
@ -375,23 +359,38 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBits(bitD, nbBits);
BitContainerType const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);
return value;
}
/*! BIT_readBitsFast() :
* unsafe version; only works only if nbBits >= 1 */
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
* unsafe version; only works if nbBits >= 1 */
MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBitsFast(bitD, nbBits);
BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
assert(nbBits >= 1);
BIT_skipBits(bitD, nbBits);
return value;
}
/*! BIT_reloadDStream_internal() :
* Simple variant of BIT_reloadDStream(), with two conditions:
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
*/
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
{
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
bitD->ptr -= bitD->bitsConsumed >> 3;
assert(bitD->ptr >= bitD->start);
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
return BIT_DStream_unfinished;
}
/*! BIT_reloadDStreamFast() :
* Similar to BIT_reloadDStream(), but with two differences:
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@ -402,31 +401,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
{
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
return BIT_DStream_overflow;
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
bitD->ptr -= bitD->bitsConsumed >> 3;
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
return BIT_DStream_unfinished;
return BIT_reloadDStream_internal(bitD);
}
/*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer.
* This function is safe, it guarantees it will not never beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
static const BitContainerType zeroFilled = 0;
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
/* overflow detected, erroneous scenario or end of stream: no update */
return BIT_DStream_overflow;
}
assert(bitD->ptr >= bitD->start);
if (bitD->ptr >= bitD->limitPtr) {
return BIT_reloadDStreamFast(bitD);
return BIT_reloadDStream_internal(bitD);
}
if (bitD->ptr == bitD->start) {
/* reached end of bitStream => no update */
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
return BIT_DStream_completed;
}
/* start < ptr < limitPtr */
/* start < ptr < limitPtr => cautious update */
{ U32 nbBytes = bitD->bitsConsumed >> 3;
BIT_DStream_status result = BIT_DStream_unfinished;
if (bitD->ptr - nbBytes < bitD->start) {
@ -448,8 +451,4 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
}
#if defined (__cplusplus)
}
#endif
#endif /* BITSTREAM_H_MODULE */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,6 +11,10 @@
#ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H
#include <stddef.h>
#include "portability_macros.h"
/*-*******************************************************
* Compiler specifics
*********************************************************/
@ -24,7 +27,7 @@
# define INLINE_KEYWORD
#endif
#if defined(__GNUC__) || defined(__ICCARM__)
#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@ -39,12 +42,30 @@
#endif
/**
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
This explicitly marks such functions as __cdecl so that the code will still compile
if a CC other than __cdecl has been made the default.
*/
#if defined(_MSC_VER)
# define WIN_CDECL __cdecl
#else
# define WIN_CDECL
#endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
/**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant
* branches.
*/
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
/**
* HINT_INLINE is used to help the compiler generate better code. It is *not*
* used for "templates", so it can be tweaked based on the compilers
@ -59,85 +80,95 @@
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
# define HINT_INLINE static INLINE_KEYWORD
#else
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
# define HINT_INLINE FORCE_INLINE_TEMPLATE
#endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
/* "soft" inline :
* The compiler is free to select if it's a good idea to inline or not.
* The main objective is to silence compiler warnings
* when a defined function in included but not used.
*
* Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
* Updating the prefix is probably preferable, but requires a fairly large codemod,
* since this name is used everywhere.
*/
#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
# define MEM_STATIC static __inline UNUSED_ATTR
#elif defined(__IAR_SYSTEMS_ICC__)
# define MEM_STATIC static inline UNUSED_ATTR
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define MEM_STATIC static inline
#elif defined(_MSC_VER)
# define MEM_STATIC static __inline
#else
# define UNUSED_ATTR
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#endif
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
#else
# if defined(__GNUC__) || defined(__ICCARM__)
# if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define FORCE_NOINLINE static __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
# endif
#endif
/* target attribute */
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#if defined(__GNUC__) || defined(__ICCARM__)
#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
/* Target attribute for BMI2 dynamic dispatch.
* Enable lzcnt, bmi, and bmi2.
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
*/
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
/* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# elif defined(__aarch64__)
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
# else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
# endif
#endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
#define PREFETCH_AREA(p, s) { \
const char* const _ptr = (const char*)(p); \
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH_L2(_ptr + _pos); \
} \
}
#define PREFETCH_AREA(p, s) \
do { \
const char* const _ptr = (const char*)(p); \
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH_L2(_ptr + _pos); \
} \
} while (0)
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else
@ -160,6 +191,12 @@
#define UNLIKELY(x) (x)
#endif
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
#else
# define ZSTD_UNREACHABLE do { assert(0); } while (0)
#endif
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */
@ -170,4 +207,258 @@
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
#endif
/* compile time determination of SIMD support */
#if !defined(ZSTD_NO_INTRINSICS)
# if defined(__AVX2__)
# define ZSTD_ARCH_X86_AVX2
# endif
# if defined(__SSE2__) || defined(_M_X64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
# define ZSTD_ARCH_X86_SSE2
# endif
# if defined(__ARM_NEON) || defined(_M_ARM64)
# define ZSTD_ARCH_ARM_NEON
# endif
#
# if defined(ZSTD_ARCH_X86_AVX2)
# include <immintrin.h>
# endif
# if defined(ZSTD_ARCH_X86_SSE2)
# include <emmintrin.h>
# elif defined(ZSTD_ARCH_ARM_NEON)
# include <arm_neon.h>
# endif
#endif
/* C-language Attributes are added in C23. */
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
#else
# define ZSTD_HAS_C_ATTRIBUTE(x) 0
#endif
/* Only use C++ attributes in C++. Some compilers report support for C++
* attributes when compiling with C.
*/
#if defined(__cplusplus) && defined(__has_cpp_attribute)
# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
#else
# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
#endif
/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
* - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
* - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
* - Else: __attribute__((__fallthrough__))
*/
#ifndef ZSTD_FALLTHROUGH
# if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
# define ZSTD_FALLTHROUGH [[fallthrough]]
# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
# define ZSTD_FALLTHROUGH [[fallthrough]]
# elif __has_attribute(__fallthrough__)
/* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
* gcc complains about: a label can only be part of a statement and a declaration is not a statement.
*/
# define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
# else
# define ZSTD_FALLTHROUGH
# endif
#endif
/*-**************************************************************
* Alignment
*****************************************************************/
/* @return 1 if @u is a 2^n value, 0 otherwise
* useful to check a value is valid for alignment restrictions */
MEM_STATIC int ZSTD_isPower2(size_t u) {
return (u & (u-1)) == 0;
}
/* this test was initially positioned in mem.h,
* but this file is removed (or replaced) for linux kernel
* so it's now hosted in compiler.h,
* which remains valid for both user & kernel spaces.
*/
#ifndef ZSTD_ALIGNOF
# if defined(__GNUC__) || defined(_MSC_VER)
/* covers gcc, clang & MSVC */
/* note : this section must come first, before C11,
* due to a limitation in the kernel source generator */
# define ZSTD_ALIGNOF(T) __alignof(T)
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
/* C11 support */
# include <stdalign.h>
# define ZSTD_ALIGNOF(T) alignof(T)
# else
/* No known support for alignof() - imperfect backup */
# define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
# endif
#endif /* ZSTD_ALIGNOF */
#ifndef ZSTD_ALIGNED
/* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */
# if defined(__GNUC__) || defined(__clang__)
# define ZSTD_ALIGNED(a) __attribute__((aligned(a)))
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
# define ZSTD_ALIGNED(a) _Alignas(a)
#elif defined(_MSC_VER)
# define ZSTD_ALIGNED(n) __declspec(align(n))
# else
/* this compiler will require its own alignment instruction */
# define ZSTD_ALIGNED(...)
# endif
#endif /* ZSTD_ALIGNED */
/*-**************************************************************
* Sanitizer
*****************************************************************/
/**
* Zstd relies on pointer overflow in its decompressor.
* We add this attribute to functions that rely on pointer overflow.
*/
#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
# if __has_attribute(no_sanitize)
# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
/* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
# else
/* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
# endif
# else
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
# endif
#endif
/**
* Helper function to perform a wrapped pointer difference without triggering
* UBSAN.
*
* @returns lhs - rhs with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
{
return lhs - rhs;
}
/**
* Helper function to perform a wrapped pointer add without triggering UBSAN.
*
* @return ptr + add with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
{
return ptr + add;
}
/**
* Helper function to perform a wrapped pointer subtraction without triggering
* UBSAN.
*
* @return ptr - sub with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
{
return ptr - sub;
}
/**
* Helper function to add to a pointer that works around C's undefined behavior
* of adding 0 to NULL.
*
* @returns `ptr + add` except it defines `NULL + 0 == NULL`.
*/
MEM_STATIC
unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
{
return add > 0 ? ptr + add : ptr;
}
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
* abundance of caution, disable our custom poisoning on mingw. */
#ifdef __MINGW32__
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
#endif
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
#endif
#endif
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stddef.h> /* size_t */
#define ZSTD_DEPS_NEED_STDINT
#include "zstd_deps.h" /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
/* Print shadow and origin for the memory range to stderr in a human-readable
format. */
void __msan_print_shadow(const volatile void *x, size_t size);
#endif
#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stddef.h> /* size_t */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
#endif /* ZSTD_COMPILER_H */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2018-2020, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -17,8 +16,6 @@
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
*/
#include <string.h>
#include "mem.h"
#ifdef _MSC_VER
@ -38,6 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
U32 f7b = 0;
U32 f7c = 0;
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
#if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16
int reg[4];
__cpuid((int*)reg, 0);
{
@ -53,6 +51,41 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
f7c = (U32)reg[2];
}
}
#else
/* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
* which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
* to due to being a reserved register. So in that case, do the `cpuid`
* ourselves. Clang supports inline assembly anyway.
*/
U32 n;
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"popq %%rbx\n\t"
: "=a"(n)
: "a"(0)
: "rcx", "rdx");
if (n >= 1) {
U32 f1a;
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"popq %%rbx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
: "a"(1)
:);
}
if (n >= 7) {
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"movq %%rbx, %%rax\n\t"
"popq %%rbx"
: "=a"(f7b), "=c"(f7c)
: "a"(7), "c"(0)
: "rdx");
}
#endif
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
/* The following block like the normal cpuid branch below, but gcc
* reserves ebx for use of its pic register so we must specially

View File

@ -1,8 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* debug
* Part of FSE library
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -33,10 +32,6 @@
#ifndef DEBUG_H_12987983217
#define DEBUG_H_12987983217
#if defined (__cplusplus)
extern "C" {
#endif
/* static assert is triggered at compile time, leaving no runtime artefact.
* static assert only works with compile-time constants.
@ -52,15 +47,6 @@ extern "C" {
#endif
/* DEBUGFILE can be defined externally,
* typically through compiler command line.
* note : currently useless.
* Value must be stderr or stdout */
#ifndef DEBUGFILE
# define DEBUGFILE stderr
#endif
/* recommended values for DEBUGLEVEL :
* 0 : release mode, no debug, all run-time checks disabled
* 1 : enables assert() only, no display
@ -77,7 +63,8 @@ extern "C" {
*/
#if (DEBUGLEVEL>=1)
# include <assert.h>
# define ZSTD_DEPS_NEED_ASSERT
# include "zstd_deps.h"
#else
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
# define assert(condition) ((void)0) /* disable assert (default) */
@ -85,7 +72,8 @@ extern "C" {
#endif
#if (DEBUGLEVEL>=2)
# include <stdio.h>
# define ZSTD_DEPS_NEED_IO
# include "zstd_deps.h"
extern int g_debuglevel; /* the variable is only declared,
it actually lives in debug.c,
and is shared by the whole process.
@ -93,23 +81,27 @@ extern int g_debuglevel; /* the variable is only declared,
It's useful when enabling very verbose levels
on selective conditions (such as position in src) */
# define RAWLOG(l, ...) { \
if (l<=g_debuglevel) { \
fprintf(stderr, __VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if (l<=g_debuglevel) { \
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
fprintf(stderr, " \n"); \
} }
# define RAWLOG(l, ...) \
do { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__VA_ARGS__); \
} \
} while (0)
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define LINE_AS_STRING TOSTRING(__LINE__)
# define DEBUGLOG(l, ...) \
do { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
ZSTD_DEBUG_PRINT(" \n"); \
} \
} while (0)
#else
# define RAWLOG(l, ...) {} /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */
#endif
#if defined (__cplusplus)
}
# define RAWLOG(l, ...) do { } while (0) /* disabled */
# define DEBUGLOG(l, ...) do { } while (0) /* disabled */
#endif
#endif /* DEBUG_H_12987983217 */

View File

@ -1,7 +1,6 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* Common functions of New Generation Entropy library
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -20,8 +19,8 @@
#include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
#include "huf.h"
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
/*=== Version ===*/
@ -39,8 +38,9 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
const BYTE* const istart = (const BYTE*) headerBuffer;
const BYTE* const iend = istart + hbSize;
@ -51,23 +51,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
U32 bitStream;
int bitCount;
unsigned charnum = 0;
unsigned const maxSV1 = *maxSVPtr + 1;
int previous0 = 0;
if (hbSize < 4) {
/* This function only works when hbSize >= 4 */
char buffer[4];
memset(buffer, 0, sizeof(buffer));
memcpy(buffer, headerBuffer, hbSize);
if (hbSize < 8) {
/* This function only works when hbSize >= 8 */
char buffer[8] = {0};
ZSTD_memcpy(buffer, headerBuffer, hbSize);
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
buffer, sizeof(buffer));
if (FSE_isError(countSize)) return countSize;
if (countSize > hbSize) return ERROR(corruption_detected);
return countSize;
} }
assert(hbSize >= 4);
assert(hbSize >= 8);
/* init */
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
bitStream = MEM_readLE32(ip);
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@ -78,36 +78,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
threshold = 1<<nbBits;
nbBits++;
while ((remaining>1) & (charnum<=*maxSVPtr)) {
for (;;) {
if (previous0) {
unsigned n0 = charnum;
while ((bitStream & 0xFFFF) == 0xFFFF) {
n0 += 24;
if (ip < iend-5) {
ip += 2;
bitStream = MEM_readLE32(ip) >> bitCount;
/* Count the number of repeats. Each time the
* 2-bit repeat code is 0b11 there is another
* repeat.
* Avoid UB by setting the high bit to 1.
*/
int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) {
charnum += 3 * 12;
if (LIKELY(ip <= iend-7)) {
ip += 3;
} else {
bitStream >>= 16;
bitCount += 16;
} }
while ((bitStream & 3) == 3) {
n0 += 3;
bitStream >>= 2;
bitCount += 2;
bitCount -= (int)(8 * (iend - 7 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
}
n0 += bitStream & 3;
charnum += 3 * repeats;
bitStream >>= 2 * repeats;
bitCount += 2 * repeats;
/* Add the final repeat which isn't 0b11. */
assert((bitStream & 3) < 3);
charnum += bitStream & 3;
bitCount += 2;
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
while (charnum < n0) normalizedCounter[charnum++] = 0;
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
/* This is an error, but break and return an error
* at the end, because returning out of a loop makes
* it harder for the compiler to optimize.
*/
if (charnum >= maxSV1) break;
/* We don't need to set the normalized count to 0
* because we already memset the whole buffer to 0.
*/
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert((bitCount >> 3) <= 3); /* For first condition to work */
ip += bitCount>>3;
bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 2;
} }
{ int const max = (2*threshold-1) - remaining;
bitCount -= (int)(8 * (iend - 4 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
}
{
int const max = (2*threshold-1) - remaining;
int count;
if ((bitStream & (threshold-1)) < (U32)max) {
@ -120,24 +142,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
}
count--; /* extra accuracy */
remaining -= count < 0 ? -count : count; /* -1 means +1 */
/* When it matters (small blocks), this is a
* predictable branch, because we don't use -1.
*/
if (count >= 0) {
remaining -= count;
} else {
assert(count == -1);
remaining += count;
}
normalizedCounter[charnum++] = (short)count;
previous0 = !count;
while (remaining < threshold) {
nbBits--;
threshold >>= 1;
}
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert(threshold > 1);
if (remaining < threshold) {
/* This branch can be folded into the
* threshold update condition because we
* know that threshold > 1.
*/
if (remaining <= 1) break;
nbBits = ZSTD_highbit32(remaining) + 1;
threshold = 1 << (nbBits - 1);
}
if (charnum >= maxSV1) break;
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
ip += bitCount>>3;
bitCount &= 7;
} else {
bitCount -= (int)(8 * (iend - 4 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
bitStream = MEM_readLE32(ip) >> bitCount;
} }
if (remaining != 1) return ERROR(corruption_detected);
/* Only possible when there are too many zeros. */
if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
if (bitCount > 32) return ERROR(corruption_detected);
*maxSVPtr = charnum-1;
@ -145,6 +186,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
return ip-istart;
}
/* Avoids the FORCE_INLINE of the _body() function. */
static size_t FSE_readNCount_body_default(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#if DYNAMIC_BMI2
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#endif
size_t FSE_readNCount_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#endif
(void)bmi2;
return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
size_t FSE_readNCount(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
}
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
@ -156,6 +234,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize)
{
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
}
FORCE_INLINE_TEMPLATE size_t
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize,
int bmi2)
{
U32 weightTotal;
const BYTE* ip = (const BYTE*) src;
@ -164,7 +253,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
if (!srcSize) return ERROR(srcSize_wrong);
iSize = ip[0];
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
/* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
if (iSize >= 128) { /* special header */
oSize = iSize - 127;
@ -178,31 +267,31 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
huffWeight[n+1] = ip[n/2] & 15;
} } }
else { /* header compressed with FSE (normal case) */
FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
/* max (hwSize-1) values decoded, as last one is implied */
oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
if (FSE_isError(oSize)) return oSize;
}
/* collect weight stats */
memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
} }
if (weightTotal == 0) return ERROR(corruption_detected);
/* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
{ U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog;
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
U32 const verif = 1 << ZSTD_highbit32(rest);
U32 const lastWeight = ZSTD_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;
@ -215,3 +304,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
*nbSymbolsPtr = (U32)(oSize+1);
return iSize+1;
}
/* Avoids the FORCE_INLINE of the _body() function. */
static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
{
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
}
#if DYNAMIC_BMI2
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
{
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
}
#endif
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize,
int flags)
{
#if DYNAMIC_BMI2
if (flags & HUF_flags_bmi2) {
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}
#endif
(void)flags;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -28,9 +27,11 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(version_unsupported): return "Version not supported";
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
case PREFIX(corruption_detected): return "Corrupted block detected";
case PREFIX(corruption_detected): return "Data corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
@ -39,16 +40,23 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
case PREFIX(cannotProduce_uncompressedBlock): return "This mode cannot generate an uncompressed block";
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
case PREFIX(dictionary_wrong): return "Dictionary mismatch";
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
case PREFIX(maxCode):
default: return notErrorCode;
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -14,17 +13,13 @@
#ifndef ERROR_H_MODULE
#define ERROR_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/* ****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t */
#include "zstd_errors.h" /* enum list */
#include "../zstd_errors.h" /* enum list */
#include "compiler.h"
#include "debug.h"
#include "zstd_deps.h" /* size_t */
/* ****************************************
* Compiler-specific
@ -59,8 +54,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
/* check and forward error code */
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
#define CHECK_V_F(e, f) \
size_t const e = f; \
do { \
if (ERR_isError(e)) \
return e; \
} while (0)
#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0)
/*-****************************************
@ -74,8 +74,85 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
return ERR_getErrorString(ERR_getErrorCode(code));
}
#if defined (__cplusplus)
/**
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}
#endif
/**
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
do { \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
} \
} while (0)
#define ERR_QUOTE(str) #str
/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
do { \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} \
} while (0)
/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0)
/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0)
#endif /* ERROR_H_MODULE */

View File

@ -1,8 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* FSE : Finite State Entropy codec
* Public Prototypes declaration
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -12,11 +11,6 @@
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef FSE_H
#define FSE_H
@ -24,8 +18,7 @@ extern "C" {
/*-*****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include "zstd_deps.h" /* size_t, ptrdiff_t */
/*-*****************************************
* FSE_PUBLIC_API : control library symbols visibility
@ -54,34 +47,6 @@ extern "C" {
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
/*-****************************************
* FSE simple functions
******************************************/
/*! FSE_compress() :
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
@return : size of compressed data (<= dstCapacity).
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
*/
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/*! FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'dstCapacity'.
@return : size of regenerated data (<= maxDstSize),
or an error code, which can be tested using FSE_isError() .
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
Why ? : making this distinction requires a header.
Header management is intentionally delegated to the user layer, which can better manage special cases.
*/
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize);
/*-*****************************************
* Tool functions
******************************************/
@ -92,20 +57,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/*-*****************************************
* FSE advanced functions
******************************************/
/*! FSE_compress2() :
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
@return : size of compressed data
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
*/
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/*-*****************************************
* FSE detailed API
******************************************/
@ -138,10 +89,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
/*! FSE_normalizeCount():
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
useLowProbCount is a boolean parameter which trades off compressed size for
faster header decoding. When it is set to 1, the compressed data will be slightly
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
is a good default, since header deserialization makes a big speed difference.
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
@return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
/*! FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@ -159,8 +116,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
/*! FSE_buildCTable():
Builds `ct`, which must be already allocated, using FSE_createCTable().
@ -229,23 +184,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
/*! FSE_readNCount_bmi2():
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
*/
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize, int bmi2);
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
/*! FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable().
return : 0, or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*! FSE_decompress_usingDTable():
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
into `dst` which must be already allocated.
@return : size of regenerated data (necessarily <= `dstCapacity`),
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
/*!
Tutorial :
@ -277,24 +223,22 @@ If there is an error, the function will return an error code, which can be teste
#endif /* FSE_H */
#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
#define FSE_H_FSE_STATIC_LINKING_ONLY
/* *** Dependency *** */
#include "bitstream.h"
/* *****************************************
* Static allocation
*******************************************/
/* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@ -308,33 +252,28 @@ If there is an error, the function will return an error code, which can be teste
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
*/
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` must be >= `(1<<tableLog)`.
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
*/
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
typedef enum {
FSE_repeat_none, /**< Cannot use the previous table */
@ -517,20 +456,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
const U16* const stateTable = (const U16*)(statePtr->stateTable);
U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
BIT_addBits(bitC, statePtr->value, nbBitsOut);
BIT_addBits(bitC, (BitContainerType)statePtr->value, nbBitsOut);
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
}
MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
{
BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
BIT_addBits(bitC, (BitContainerType)statePtr->value, statePtr->stateLog);
BIT_flushBits(bitC);
}
/* FSE_getMaxNbBits() :
* Approximate maximum cost of a symbol, in bits.
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
@ -645,6 +584,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
#ifndef FSE_DEFAULT_MEMORY_USAGE
# define FSE_DEFAULT_MEMORY_USAGE 13
#endif
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
#endif
/*!FSE_MAX_SYMBOL_VALUE :
* Maximum symbol value authorized.
@ -678,12 +620,6 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
#endif
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
#endif /* FSE_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif

View File

@ -1,7 +1,6 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* FSE : Finite State Entropy decoder
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -17,13 +16,14 @@
/* **************************************************************
* Includes
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include "debug.h" /* assert */
#include "bitstream.h"
#include "compiler.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#include "error_private.h"
#include "zstd_deps.h" /* ZSTD_memcpy */
#include "bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -55,19 +55,19 @@
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
U16* symbolNext = (U16*)workSpace;
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
U32 const maxSV1 = maxSymbolValue + 1;
U32 const tableSize = 1 << tableLog;
U32 highThreshold = tableSize-1;
/* Sanity Checks */
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
@ -83,13 +83,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
symbolNext[s] = 1;
} else {
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
symbolNext[s] = normalizedCounter[s];
symbolNext[s] = (U16)normalizedCounter[s];
} } }
memcpy(dt, &DTableH, sizeof(DTableH));
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
}
/* Spread symbols */
{ U32 const tableMask = tableSize-1;
if (highThreshold == tableSize - 1) {
size_t const tableMask = tableSize-1;
size_t const step = FSE_TABLESTEP(tableSize);
/* First lay down the symbols in order.
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
* misses since small blocks generally have small table logs, so nearly
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
* our buffer to handle the over-write.
*/
{ U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
for (s=0; s<maxSV1; ++s, sv += add) {
int i;
int const n = normalizedCounter[s];
MEM_write64(spread + pos, sv);
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
pos += (size_t)n;
} }
/* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what empirically worked best.
*/
{
size_t position = 0;
size_t s;
size_t const unroll = 2;
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
for (s = 0; s < (size_t)tableSize; s += unroll) {
size_t u;
for (u = 0; u < unroll; ++u) {
size_t const uPosition = (position + (u * step)) & tableMask;
tableDecode[uPosition].symbol = spread[s + u];
}
position = (position + (unroll * step)) & tableMask;
}
assert(position == 0);
}
} else {
U32 const tableMask = tableSize-1;
U32 const step = FSE_TABLESTEP(tableSize);
U32 s, position = 0;
for (s=0; s<maxSV1; s++) {
@ -107,62 +151,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} }
return 0;
}
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
}
#ifndef FSE_COMMONDEFS_ONLY
/*-*******************************************************
* Decompression (Byte symbols)
*********************************************************/
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
DTableH->tableLog = 0;
DTableH->fastMode = 0;
cell->newState = 0;
cell->symbol = symbolValue;
cell->nbBits = 0;
return 0;
}
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSV1 = tableMask+1;
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* Build Decoding Table */
DTableH->tableLog = (U16)nbBits;
DTableH->fastMode = 1;
for (s=0; s<maxSV1; s++) {
dinfo[s].newState = 0;
dinfo[s].symbol = (BYTE)s;
dinfo[s].nbBits = (BYTE)nbBits;
}
return 0;
}
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
void* dst, size_t maxDstSize,
@ -184,6 +190,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
FSE_initDState(&state1, &bitD, dt);
FSE_initDState(&state2, &bitD, dt);
RETURN_ERROR_IF(BIT_reloadDStream(&bitD)==BIT_DStream_overflow, corruption_detected, "");
#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
/* 4 symbols per loop */
@ -223,54 +231,85 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
break;
} }
return op-ostart;
assert(op >= ostart);
return (size_t)(op-ostart);
}
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize,
const FSE_DTable* dt)
{
const void* ptr = dt;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
}
typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
} FSE_DecompressWksp;
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize,
unsigned maxLog, void* workSpace, size_t wkspSize,
int bmi2)
{
const BYTE* const istart = (const BYTE*)cSrc;
const BYTE* ip = istart;
short counting[FSE_MAX_SYMBOL_VALUE+1];
unsigned tableLog;
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
/* correct offset to dtable depends on this property */
FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
/* normal FSE decoding mode */
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
if (FSE_isError(NCountLength)) return NCountLength;
/* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
ip += NCountLength;
cSrcSize -= NCountLength;
{ size_t const NCountLength =
FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
ip += NCountLength;
cSrcSize -= NCountLength;
}
CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
{
const void* ptr = dtable;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
}
}
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
/* Avoids the FORCE_INLINE of the _body() function. */
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
}
#if DYNAMIC_BMI2
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
}
#endif
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}
#endif
(void)bmi2;
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}
#endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,8 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* huff0 huffman codec,
* part of Finite State Entropy library
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -13,113 +12,33 @@
* You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef HUF_H_298734234
#define HUF_H_298734234
/* *** Dependencies *** */
#include <stddef.h> /* size_t */
/* *** library symbols visibility *** */
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
* HUF symbols remain "private" (internal symbols for library only).
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
# define HUF_PUBLIC_API __declspec(dllexport)
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
#else
# define HUF_PUBLIC_API
#endif
/* ========================== */
/* *** simple functions *** */
/* ========================== */
/** HUF_compress() :
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
* 'dst' buffer must be already allocated.
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
* @return : size of compressed data (<= `dstCapacity`).
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
*/
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/** HUF_decompress() :
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
* into already allocated buffer 'dst', of minimum size 'dstSize'.
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
* Note : in contrast with FSE, HUF_decompress can regenerate
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
* because it knows size to regenerate (originalSize).
* @return : size of regenerated data (== originalSize),
* or an error code, which can be tested using HUF_isError()
*/
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
#include "zstd_deps.h" /* size_t */
#include "mem.h" /* U32 */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
/* Error Management */
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
/* *** Advanced function *** */
/** HUF_compress2() :
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize);
#endif /* HUF_H_298734234 */
/* ******************************************************************
* WARNING !!
* The following section contains advanced and experimental definitions
* which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking.
* *****************************************************************/
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY
/* *** Dependencies *** */
#include "mem.h" /* U32 */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
/* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
#define HUF_SYMBOLVALUE_MAX 255
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !"
#endif
@ -134,12 +53,12 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* static allocation of HUF's Compression Table */
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
typedef size_t HUF_CElt; /* consider it an incomplete type */
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
void* name##hv = &(name##hb); \
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
/* static allocation of HUF's DTable */
typedef U32 HUF_DTable;
@ -153,25 +72,49 @@ typedef U32 HUF_DTable;
/* ****************************************
* Advanced decompression functions
******************************************/
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
#endif
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#endif
/**
* Huffman flags bitset.
* For all flags, 0 is the default value.
*/
typedef enum {
/**
* If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
* Otherwise: Ignored.
*/
HUF_flags_bmi2 = (1 << 0),
/**
* If set: Test possible table depths to find the one that produces the smallest header + encoded size.
* If unset: Use heuristic to find the table depth.
*/
HUF_flags_optimalDepth = (1 << 1),
/**
* If set: If the previous table can encode the input, always reuse the previous table.
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
*/
HUF_flags_preferRepeat = (1 << 2),
/**
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
* If unset: Always histogram the entire input.
*/
HUF_flags_suspectUncompressible = (1 << 3),
/**
* If set: Don't use assembly implementations
* If unset: Allow using assembly implementations
*/
HUF_flags_disableAsm = (1 << 4),
/**
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
* If unset: Use the fast decoding loop when possible.
*/
HUF_flags_disableFast = (1 << 5)
} HUF_flags_e;
/* ****************************************
* HUF detailed API
* ****************************************/
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
/*! HUF_compress() does the following:
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
@ -184,11 +127,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods.
*/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
unsigned HUF_minTableLog(unsigned symbolCardinality);
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@ -197,22 +141,24 @@ typedef enum {
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */
* If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
@ -227,15 +173,40 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize);
/*! HUF_readStats_wksp() :
* Same as HUF_readStats() but takes an external workspace which must be
* 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
int flags);
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/** HUF_getNbBits() :
/** HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
* Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
* Note 2 : is not inlined, as HUF_CElt definition is private
*/
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
typedef struct {
BYTE tableLog;
BYTE maxSymbolValue;
BYTE unused[sizeof(size_t) - 2];
} HUF_CTableHeader;
/** HUF_readCTableHeader() :
* @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
*/
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
/*
* HUF_decompress() does the following:
@ -261,81 +232,46 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
* a required workspace size greater than that specified in the following
* macro.
*/
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
/* ====================== */
/* single stream variants */
/* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
/** HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */
* If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
#endif
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#endif
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */
#endif
/* BMI2 variants.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif /* HUF_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif
#endif /* HUF_H_298734234 */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,15 +11,13 @@
#ifndef MEM_H_MODULE
#define MEM_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/*-****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include <string.h> /* memcpy */
#include <stddef.h> /* size_t, ptrdiff_t */
#include "compiler.h" /* __has_builtin */
#include "debug.h" /* DEBUG_STATIC_ASSERT */
#include "zstd_deps.h" /* ZSTD_memcpy */
/*-****************************************
@ -29,102 +26,22 @@ extern "C" {
#if defined(_MSC_VER) /* Visual Studio */
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
#elif defined(__ICCARM__)
# include <intrinsics.h>
#endif
#if defined(__GNUC__)
# define MEM_STATIC static __inline __attribute__((unused))
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define MEM_STATIC static inline
#elif defined(_MSC_VER)
# define MEM_STATIC static __inline
#else
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#ifndef __has_builtin
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
#endif
/* code only tested on 32 and 64 bits systems */
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
/* detects whether we are being compiled under msan */
#if defined (__has_feature)
# if __has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1
# endif
#endif
#if defined (MEMORY_SANITIZER)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stdint.h> /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
#endif
/* detects whether we are being compiled under asan */
#if defined (ZFS_ASAN_ENABLED)
# define ADDRESS_SANITIZER 1
# define ZSTD_ASAN_DONT_POISON_WORKSPACE
#endif
#if defined (ADDRESS_SANITIZER)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
/*-**************************************************************
* Basic Types
*****************************************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint8_t U8;
typedef int8_t S8;
typedef uint16_t U16;
typedef int16_t S16;
typedef uint32_t U32;
@ -137,6 +54,8 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
# error "this implementation requires char to be exactly 8-bit type"
#endif
typedef unsigned char BYTE;
typedef unsigned char U8;
typedef signed char S8;
#if USHRT_MAX != 65535
# error "this implementation requires short to be exactly 16-bit type"
#endif
@ -153,27 +72,64 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
typedef signed long long S64;
#endif
/*-**************************************************************
* Memory I/O API
*****************************************************************/
/*=== Static platform detection ===*/
MEM_STATIC unsigned MEM_32bits(void);
MEM_STATIC unsigned MEM_64bits(void);
MEM_STATIC unsigned MEM_isLittleEndian(void);
/*=== Native unaligned read/write ===*/
MEM_STATIC U16 MEM_read16(const void* memPtr);
MEM_STATIC U32 MEM_read32(const void* memPtr);
MEM_STATIC U64 MEM_read64(const void* memPtr);
MEM_STATIC size_t MEM_readST(const void* memPtr);
MEM_STATIC void MEM_write16(void* memPtr, U16 value);
MEM_STATIC void MEM_write32(void* memPtr, U32 value);
MEM_STATIC void MEM_write64(void* memPtr, U64 value);
/*=== Little endian unaligned read/write ===*/
MEM_STATIC U16 MEM_readLE16(const void* memPtr);
MEM_STATIC U32 MEM_readLE24(const void* memPtr);
MEM_STATIC U32 MEM_readLE32(const void* memPtr);
MEM_STATIC U64 MEM_readLE64(const void* memPtr);
MEM_STATIC size_t MEM_readLEST(const void* memPtr);
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
/*=== Big endian unaligned read/write ===*/
MEM_STATIC U32 MEM_readBE32(const void* memPtr);
MEM_STATIC U64 MEM_readBE64(const void* memPtr);
MEM_STATIC size_t MEM_readBEST(const void* memPtr);
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
/*=== Byteswap ===*/
MEM_STATIC U32 MEM_swap32(U32 in);
MEM_STATIC U64 MEM_swap64(U64 in);
MEM_STATIC size_t MEM_swapST(size_t in);
/*-**************************************************************
* Memory I/O
* Memory I/O Implementation
*****************************************************************/
/* MEM_FORCE_MEMORY_ACCESS :
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
* Method 0 (default) : use `memcpy()`. Safe and portable.
* Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
* Method 0 : always use `memcpy()`. Safe and portable.
* Method 1 : Use compiler extension to set unaligned access.
* Method 2 : direct access. This method is portable but violate C standard.
* It can generate buggy code on targets depending on alignment.
* In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
* See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
* Prefer these methods in priority order (0 > 1 > 2)
* Default : method 1 if supported, else method 0
*/
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define MEM_FORCE_MEMORY_ACCESS 2
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
# ifdef __GNUC__
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
@ -183,8 +139,24 @@ MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
MEM_STATIC unsigned MEM_isLittleEndian(void)
{
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
return 1;
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
return 0;
#elif defined(__clang__) && __LITTLE_ENDIAN__
return 1;
#elif defined(__clang__) && __BIG_ENDIAN__
return 0;
#elif defined(_MSC_VER) && (_M_X64 || _M_IX86)
return 1;
#elif defined(__DMC__) && defined(_M_IX86)
return 1;
#elif defined(__IAR_SYSTEMS_ICC__) && __LITTLE_ENDIAN__
return 1;
#else
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
return one.c[0];
#endif
}
#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
@ -202,30 +174,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
__pragma( pack(push, 1) )
typedef struct { U16 v; } unalign16;
typedef struct { U32 v; } unalign32;
typedef struct { U64 v; } unalign64;
typedef struct { size_t v; } unalignArch;
__pragma( pack(pop) )
#else
typedef struct { U16 v; } __attribute__((packed)) unalign16;
typedef struct { U32 v; } __attribute__((packed)) unalign32;
typedef struct { U64 v; } __attribute__((packed)) unalign64;
typedef struct { size_t v; } __attribute__((packed)) unalignArch;
#endif
typedef __attribute__((aligned(1))) U16 unalign16;
typedef __attribute__((aligned(1))) U32 unalign32;
typedef __attribute__((aligned(1))) U64 unalign64;
typedef __attribute__((aligned(1))) size_t unalignArch;
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; }
MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; }
MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; }
MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; }
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; }
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; }
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; }
#else
@ -234,41 +195,49 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
MEM_STATIC U16 MEM_read16(const void* memPtr)
{
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC U32 MEM_read32(const void* memPtr)
{
U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC U64 MEM_read64(const void* memPtr)
{
U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC size_t MEM_readST(const void* memPtr)
{
size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC void MEM_write16(void* memPtr, U16 value)
{
memcpy(memPtr, &value, sizeof(value));
ZSTD_memcpy(memPtr, &value, sizeof(value));
}
MEM_STATIC void MEM_write32(void* memPtr, U32 value)
{
memcpy(memPtr, &value, sizeof(value));
ZSTD_memcpy(memPtr, &value, sizeof(value));
}
MEM_STATIC void MEM_write64(void* memPtr, U64 value)
{
memcpy(memPtr, &value, sizeof(value));
ZSTD_memcpy(memPtr, &value, sizeof(value));
}
#endif /* MEM_FORCE_MEMORY_ACCESS */
MEM_STATIC U32 MEM_swap32_fallback(U32 in)
{
return ((in << 24) & 0xff000000 ) |
((in << 8) & 0x00ff0000 ) |
((in >> 8) & 0x0000ff00 ) |
((in >> 24) & 0x000000ff );
}
MEM_STATIC U32 MEM_swap32(U32 in)
{
#if defined(_MSC_VER) /* Visual Studio */
@ -276,14 +245,25 @@ MEM_STATIC U32 MEM_swap32(U32 in)
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
return __builtin_bswap32(in);
#elif defined(__ICCARM__)
return __REV(in);
#else
return ((in << 24) & 0xff000000 ) |
((in << 8) & 0x00ff0000 ) |
((in >> 8) & 0x0000ff00 ) |
((in >> 24) & 0x000000ff );
return MEM_swap32_fallback(in);
#endif
}
MEM_STATIC U64 MEM_swap64_fallback(U64 in)
{
return ((in << 56) & 0xff00000000000000ULL) |
((in << 40) & 0x00ff000000000000ULL) |
((in << 24) & 0x0000ff0000000000ULL) |
((in << 8) & 0x000000ff00000000ULL) |
((in >> 8) & 0x00000000ff000000ULL) |
((in >> 24) & 0x0000000000ff0000ULL) |
((in >> 40) & 0x000000000000ff00ULL) |
((in >> 56) & 0x00000000000000ffULL);
}
MEM_STATIC U64 MEM_swap64(U64 in)
{
#if defined(_MSC_VER) /* Visual Studio */
@ -292,14 +272,7 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
return __builtin_bswap64(in);
#else
return ((in << 56) & 0xff00000000000000ULL) |
((in << 40) & 0x00ff000000000000ULL) |
((in << 24) & 0x0000ff0000000000ULL) |
((in << 8) & 0x000000ff00000000ULL) |
((in >> 8) & 0x00000000ff000000ULL) |
((in >> 24) & 0x0000000000ff0000ULL) |
((in >> 40) & 0x000000000000ff00ULL) |
((in >> 56) & 0x00000000000000ffULL);
return MEM_swap64_fallback(in);
#endif
}
@ -336,7 +309,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
}
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
@ -443,9 +416,7 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
MEM_writeBE64(memPtr, (U64)val);
}
#if defined (__cplusplus)
}
#endif
/* code only tested on 32 and 64 bits systems */
MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
#endif /* MEM_H_MODULE */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -11,9 +10,9 @@
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include "../common/allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */
#include "zstd_deps.h" /* size_t */
#include "debug.h" /* assert */
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
#include "pool.h"
/* ====== Compiler specifics ====== */
@ -87,7 +86,7 @@ static void* POOL_thread(void* opaque) {
{ POOL_job const job = ctx->queue[ctx->queueHead];
ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
ctx->numThreadsBusy++;
ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
ctx->queueEmpty = (ctx->queueHead == ctx->queueTail);
/* Unlock the mutex, signal a pusher, and run the job */
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
@ -97,33 +96,37 @@ static void* POOL_thread(void* opaque) {
/* If the intended queue size was 0, signal after finishing job */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
ctx->numThreadsBusy--;
if (ctx->queueSize == 1) {
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
}
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
}
} /* for (;;) */
assert(0); /* Unreachable */
}
/* ZSTD_createThreadPool() : public access point */
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
return POOL_create (numThreads, 0);
}
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
}
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ZSTD_customMem customMem) {
ZSTD_customMem customMem)
{
POOL_ctx* ctx;
/* Check parameters */
if (!numThreads) { return NULL; }
/* Allocate the context and zero initialize */
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
if (!ctx) { return NULL; }
/* Initialize the job queue.
* It needs one extra space since one space is wasted to differentiate
* empty and full queues.
*/
ctx->queueSize = queueSize + 1;
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem);
ctx->queueHead = 0;
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
@ -137,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
}
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->threadCapacity = 0;
ctx->customMem = customMem;
/* Check for errors */
@ -170,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) {
/* Join all of the threads */
{ size_t i;
for (i = 0; i < ctx->threadCapacity; ++i) {
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */
} }
}
@ -180,14 +183,27 @@ void POOL_free(POOL_ctx *ctx) {
ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
ZSTD_free(ctx->queue, ctx->customMem);
ZSTD_free(ctx->threads, ctx->customMem);
ZSTD_free(ctx, ctx->customMem);
ZSTD_customFree(ctx->queue, ctx->customMem);
ZSTD_customFree(ctx->threads, ctx->customMem);
ZSTD_customFree(ctx, ctx->customMem);
}
/*! POOL_joinJobs() :
* Waits for all queued jobs to finish executing.
*/
void POOL_joinJobs(POOL_ctx* ctx) {
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) {
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
}
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
}
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
POOL_free (pool);
}
size_t POOL_sizeof(POOL_ctx *ctx) {
size_t POOL_sizeof(const POOL_ctx* ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
return sizeof(*ctx)
+ ctx->queueSize * sizeof(POOL_job)
@ -204,11 +220,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
return 0;
}
/* numThreads > threadCapacity */
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
if (!threadPool) return 1;
/* replace existing thread pool */
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
ZSTD_free(ctx->threads, ctx->customMem);
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
ZSTD_customFree(ctx->threads, ctx->customMem);
ctx->threads = threadPool;
/* Initialize additional threads */
{ size_t threadId;
@ -252,9 +268,12 @@ static int isQueueFull(POOL_ctx const* ctx) {
}
static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
static void
POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
{
POOL_job const job = {function, opaque};
POOL_job job;
job.function = function;
job.opaque = opaque;
assert(ctx != NULL);
if (ctx->shutdown) return;
@ -302,21 +321,28 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
struct POOL_ctx_s {
int dummy;
};
static POOL_ctx g_ctx;
static POOL_ctx g_poolCtx;
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
}
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
POOL_ctx*
POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem)
{
(void)numThreads;
(void)queueSize;
(void)customMem;
return &g_ctx;
return &g_poolCtx;
}
void POOL_free(POOL_ctx* ctx) {
assert(!ctx || ctx == &g_ctx);
assert(!ctx || ctx == &g_poolCtx);
(void)ctx;
}
void POOL_joinJobs(POOL_ctx* ctx){
assert(!ctx || ctx == &g_poolCtx);
(void)ctx;
}
@ -336,9 +362,9 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
return 1;
}
size_t POOL_sizeof(POOL_ctx* ctx) {
size_t POOL_sizeof(const POOL_ctx* ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
assert(ctx == &g_ctx);
assert(ctx == &g_poolCtx);
return sizeof(*ctx);
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,12 +11,8 @@
#ifndef POOL_H
#define POOL_H
#if defined (__cplusplus)
extern "C" {
#endif
#include <stddef.h> /* size_t */
#include "zstd_deps.h"
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
#include "../zstd.h"
@ -39,10 +34,16 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
*/
void POOL_free(POOL_ctx* ctx);
/*! POOL_joinJobs() :
* Waits for all queued jobs to finish executing.
*/
void POOL_joinJobs(POOL_ctx* ctx);
/*! POOL_resize() :
* Expands or shrinks pool's number of threads.
* This is more efficient than releasing + creating a new context,
* since it tries to preserve and re-use existing threads.
* since it tries to preserve and reuse existing threads.
* `numThreads` must be at least 1.
* @return : 0 when resize was successful,
* !0 (typically 1) if there is an error.
@ -54,7 +55,7 @@ int POOL_resize(POOL_ctx* ctx, size_t numThreads);
* @return threadpool memory usage
* note : compatible with NULL (returns 0 in this case)
*/
size_t POOL_sizeof(POOL_ctx* ctx);
size_t POOL_sizeof(const POOL_ctx* ctx);
/*! POOL_function :
* The function type that can be added to a thread pool.
@ -71,15 +72,10 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
/*! POOL_tryAdd() :
* Add the job `function(opaque)` to thread pool _if_ a worker is available.
* Add the job `function(opaque)` to thread pool _if_ a queue slot is available.
* Returns immediately even if not (does not block).
* @return : 1 if successful, 0 if not.
*/
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
#if defined (__cplusplus)
}
#endif
#endif

View File

@ -0,0 +1,171 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_PORTABILITY_MACROS_H
#define ZSTD_PORTABILITY_MACROS_H
/**
* This header file contains macro definitions to support portability.
* This header is shared between C and ASM code, so it MUST only
* contain macro definitions. It MUST not contain any C code.
*
* This header ONLY defines macros to detect platforms/feature support.
*
*/
/* compat. with non-clang compilers */
#ifndef __has_attribute
#define __has_attribute(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* detects whether we are being compiled under msan */
#ifndef ZSTD_MEMORY_SANITIZER
# if __has_feature(memory_sanitizer)
# define ZSTD_MEMORY_SANITIZER 1
# else
# define ZSTD_MEMORY_SANITIZER 0
# endif
#endif
/* detects whether we are being compiled under asan */
#ifndef ZSTD_ADDRESS_SANITIZER
# if __has_feature(address_sanitizer)
# define ZSTD_ADDRESS_SANITIZER 1
# elif defined(__SANITIZE_ADDRESS__)
# define ZSTD_ADDRESS_SANITIZER 1
# else
# define ZSTD_ADDRESS_SANITIZER 0
# endif
#endif
/* detects whether we are being compiled under dfsan */
#ifndef ZSTD_DATAFLOW_SANITIZER
# if __has_feature(dataflow_sanitizer)
# define ZSTD_DATAFLOW_SANITIZER 1
# else
# define ZSTD_DATAFLOW_SANITIZER 0
# endif
#endif
/* Mark the internal assembly functions as hidden */
#ifdef __ELF__
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
#elif defined(__APPLE__)
# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
#else
# define ZSTD_HIDE_ASM_FUNCTION(func)
#endif
/* Compile time determination of BMI2 support */
#ifndef STATIC_BMI2
# if defined(__BMI2__)
# define STATIC_BMI2 1
# elif defined(_MSC_VER) && defined(__AVX2__)
# define STATIC_BMI2 1 /* MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 */
# endif
#endif
#ifndef STATIC_BMI2
# define STATIC_BMI2 0
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
# if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
# else
# define DYNAMIC_BMI2 0
# endif
#endif
/**
* Only enable assembly for GNU C compatible compilers,
* because other platforms may not support GAS assembly syntax.
*
* Only enable assembly for Linux / MacOS / Win32, other platforms may
* work, but they haven't been tested. This could likely be
* extended to BSD systems.
*
* Disable assembly when MSAN is enabled, because MSAN requires
* 100% of code to be instrumented to work.
*/
#if defined(__GNUC__)
# if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(_WIN32)
# if ZSTD_MEMORY_SANITIZER
# define ZSTD_ASM_SUPPORTED 0
# elif ZSTD_DATAFLOW_SANITIZER
# define ZSTD_ASM_SUPPORTED 0
# else
# define ZSTD_ASM_SUPPORTED 1
# endif
# else
# define ZSTD_ASM_SUPPORTED 0
# endif
#else
# define ZSTD_ASM_SUPPORTED 0
#endif
/**
* Determines whether we should enable assembly for x86-64
* with BMI2.
*
* Enable if all of the following conditions hold:
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
* - Assembly is supported
* - We are compiling for x86-64 and either:
* - DYNAMIC_BMI2 is enabled
* - BMI2 is supported at compile time
*/
#if !defined(ZSTD_DISABLE_ASM) && \
ZSTD_ASM_SUPPORTED && \
defined(__x86_64__) && \
(DYNAMIC_BMI2 || defined(__BMI2__))
# define ZSTD_ENABLE_ASM_X86_64_BMI2 1
#else
# define ZSTD_ENABLE_ASM_X86_64_BMI2 0
#endif
/*
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
* assembly sources when CET is enabled.
*
* Additionally, any function that may be called indirectly must begin
* with ZSTD_CET_ENDBRANCH.
*/
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
&& defined(__has_include)
# if __has_include(<cet.h>)
# include <cet.h>
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
# endif
#endif
#ifndef ZSTD_CET_ENDBRANCH
# define ZSTD_CET_ENDBRANCH
#endif
#endif /* ZSTD_PORTABILITY_MACROS_H */

View File

@ -1,865 +0,0 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* xxHash - Fast Hash algorithm
* Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
*
* You can contact the author at :
* - xxHash homepage: http://www.xxhash.com
* - xxHash source repository : https://github.com/Cyan4973/xxHash
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* *************************************
* Tuning parameters
***************************************/
/*!XXH_FORCE_MEMORY_ACCESS :
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
* Method 0 (default) : use `memcpy()`. Safe and portable.
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
* Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
* It can generate buggy code on targets which do not support unaligned memory accesses.
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
* See http://stackoverflow.com/a/32095106/646947 for details.
* Prefer these methods in priority order (0 > 1 > 2)
*/
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
defined(__ICCARM__)
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
/*!XXH_ACCEPT_NULL_INPUT_POINTER :
* If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
* When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
* By default, this option is disabled. To enable it, uncomment below define :
*/
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
/*!XXH_FORCE_NATIVE_FORMAT :
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
* Results are therefore identical for little-endian and big-endian CPU.
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
* to improve speed for Big-endian CPU.
* This option has no impact on Little_Endian CPU.
*/
#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
# define XXH_FORCE_NATIVE_FORMAT 0
#endif
/*!XXH_FORCE_ALIGN_CHECK :
* This is a minor performance trick, only useful with lots of very small keys.
* It means : check for aligned/unaligned input.
* The check costs one initial branch per hash; set to 0 when the input data
* is guaranteed to be aligned.
*/
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
# define XXH_FORCE_ALIGN_CHECK 0
# else
# define XXH_FORCE_ALIGN_CHECK 1
# endif
#endif
/* *************************************
* Includes & Memory related functions
***************************************/
/* Modify the local functions below should you wish to use some other memory routines */
/* for malloc(), free() */
#include <stdlib.h>
#include <stddef.h> /* size_t */
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
/* for memcpy() */
#include <string.h>
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
#ifndef XXH_STATIC_LINKING_ONLY
# define XXH_STATIC_LINKING_ONLY
#endif
#include "xxhash.h"
/* *************************************
* Compiler Specific Options
***************************************/
#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
#endif
#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
#else
# define FORCE_INLINE_ATTR
#endif
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
#ifdef _MSC_VER
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
/* *************************************
* Basic Types
***************************************/
#ifndef MEM_MODULE
# define MEM_MODULE
# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
# else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
# endif
#endif
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
#else
/* portable and safe solution. Generally efficient.
* see : http://stackoverflow.com/a/32095106/646947
*/
static U32 XXH_read32(const void* memPtr)
{
U32 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
static U64 XXH_read64(const void* memPtr)
{
U64 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
/* ****************************************
* Compiler-specific Functions and Macros
******************************************/
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
#if defined(__ICCARM__)
# include <intrinsics.h>
# define XXH_rotl32(x,r) __ROR(x,(32 - r))
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER) /* Visual Studio */
# define XXH_swap32 _byteswap_ulong
# define XXH_swap64 _byteswap_uint64
#elif GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
# define XXH_swap64 __builtin_bswap64
#else
static U32 XXH_swap32 (U32 x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
static U64 XXH_swap64 (U64 x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
/* *************************************
* Architecture Macros
***************************************/
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
#ifndef XXH_CPU_LITTLE_ENDIAN
static const int g_one = 1;
# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
#endif
/* ***************************
* Memory reads
*****************************/
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
else
return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
}
FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
{
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
}
static U32 XXH_readBE32(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
}
FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
else
return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
}
FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
{
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
}
static U64 XXH_readBE64(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
}
/* *************************************
* Macros
***************************************/
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
/* *************************************
* Constants
***************************************/
static const U32 PRIME32_1 = 2654435761U;
static const U32 PRIME32_2 = 2246822519U;
static const U32 PRIME32_3 = 3266489917U;
static const U32 PRIME32_4 = 668265263U;
static const U32 PRIME32_5 = 374761393U;
static const U64 PRIME64_1 = 11400714785074694791ULL;
static const U64 PRIME64_2 = 14029467366897019727ULL;
static const U64 PRIME64_3 = 1609587929392839161ULL;
static const U64 PRIME64_4 = 9650029242287828579ULL;
static const U64 PRIME64_5 = 2870177450012600261ULL;
XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
/* **************************
* Utils
****************************/
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
{
memcpy(dstState, srcState, sizeof(*dstState));
}
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
{
memcpy(dstState, srcState, sizeof(*dstState));
}
/* ***************************
* Simple Hash Functions
*****************************/
static U32 XXH32_round(U32 seed, U32 input)
{
seed += input * PRIME32_2;
seed = XXH_rotl32(seed, 13);
seed *= PRIME32_1;
return seed;
}
FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U32 h32;
#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL) {
len=0;
bEnd=p=(const BYTE*)(size_t)16;
}
#endif
if (len>=16) {
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
do {
v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
} while (p<=limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
} else {
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p+4<=bEnd) {
h32 += XXH_get32bits(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd) {
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH32_CREATESTATE_STATIC(state);
XXH32_reset(state, seed);
XXH32_update(state, input, len);
return XXH32_digest(state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
} }
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
static U64 XXH64_round(U64 acc, U64 input)
{
acc += input * PRIME64_2;
acc = XXH_rotl64(acc, 31);
acc *= PRIME64_1;
return acc;
}
static U64 XXH64_mergeRound(U64 acc, U64 val)
{
val = XXH64_round(0, val);
acc ^= val;
acc = acc * PRIME64_1 + PRIME64_4;
return acc;
}
FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
U64 h64;
#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL) {
len=0;
bEnd=p=(const BYTE*)(size_t)32;
}
#endif
if (len>=32) {
const BYTE* const limit = bEnd - 32;
U64 v1 = seed + PRIME64_1 + PRIME64_2;
U64 v2 = seed + PRIME64_2;
U64 v3 = seed + 0;
U64 v4 = seed - PRIME64_1;
do {
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
} while (p<=limit);
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
h64 = XXH64_mergeRound(h64, v2);
h64 = XXH64_mergeRound(h64, v3);
h64 = XXH64_mergeRound(h64, v4);
} else {
h64 = seed + PRIME64_5;
}
h64 += (U64) len;
while (p+8<=bEnd) {
U64 const k1 = XXH64_round(0, XXH_get64bits(p));
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd) {
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd) {
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH64_CREATESTATE_STATIC(state);
XXH64_reset(state, seed);
XXH64_update(state, input, len);
return XXH64_digest(state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
} }
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
/* **************************************************
* Advanced Hash Functions
****************************************************/
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
{
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
}
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
{
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
}
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
/*** Hash feed ***/
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
{
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
state.v1 = seed + PRIME32_1 + PRIME32_2;
state.v2 = seed + PRIME32_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME32_1;
memcpy(statePtr, &state, sizeof(state));
return XXH_OK;
}
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
{
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
state.v1 = seed + PRIME64_1 + PRIME64_2;
state.v2 = seed + PRIME64_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME64_1;
memcpy(statePtr, &state, sizeof(state));
return XXH_OK;
}
FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
{
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len_32 += (unsigned)len;
state->large_len |= (len>=16) | (state->total_len_32>=16);
if (state->memsize + len < 16) { /* fill in tmp buffer */
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
state->memsize += (unsigned)len;
return XXH_OK;
}
if (state->memsize) { /* some data left from previous update */
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
{ const U32* p32 = state->mem32;
state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= bEnd-16) {
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
do {
v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
} while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd) {
XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
state->memsize = (unsigned)(bEnd-p);
}
return XXH_OK;
}
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
{
const BYTE * p = (const BYTE*)state->mem32;
const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
U32 h32;
if (state->large_len) {
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
} else {
h32 = state->v3 /* == seed */ + PRIME32_5;
}
h32 += state->total_len_32;
while (p+4<=bEnd) {
h32 += XXH_readLE32(p, endian) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
p+=4;
}
while (p<bEnd) {
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_digest_endian(state_in, XXH_littleEndian);
else
return XXH32_digest_endian(state_in, XXH_bigEndian);
}
/* **** XXH64 **** */
FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
{
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
if (input != NULL) {
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
}
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) { /* tmp buffer is full */
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
p += 32-state->memsize;
state->memsize = 0;
}
if (p+32 <= bEnd) {
const BYTE* const limit = bEnd - 32;
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
do {
v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
} while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd) {
XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
state->memsize = (unsigned)(bEnd-p);
}
return XXH_OK;
}
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
{
const BYTE * p = (const BYTE*)state->mem64;
const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
U64 h64;
if (state->total_len >= 32) {
U64 const v1 = state->v1;
U64 const v2 = state->v2;
U64 const v3 = state->v3;
U64 const v4 = state->v4;
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
h64 = XXH64_mergeRound(h64, v2);
h64 = XXH64_mergeRound(h64, v3);
h64 = XXH64_mergeRound(h64, v4);
} else {
h64 = state->v3 + PRIME64_5;
}
h64 += (U64) state->total_len;
while (p+8<=bEnd) {
U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd) {
h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd) {
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_digest_endian(state_in, XXH_littleEndian);
else
return XXH64_digest_endian(state_in, XXH_bigEndian);
}
/* **************************
* Canonical representation
****************************/
/*! Default XXH result types are basic unsigned 32 and 64 bits.
* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
* These functions allow transformation of hash result into and from its canonical format.
* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
*/
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
memcpy(dst, &hash, sizeof(*dst));
}
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
memcpy(dst, &hash, sizeof(*dst));
}
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
{
return XXH_readBE32(src);
}
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
{
return XXH_readBE64(src);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -14,8 +13,7 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdlib.h> /* malloc, calloc, free */
#include <string.h> /* memset */
#define ZSTD_DEPS_NEED_MALLOC
#include "error_private.h"
#include "zstd_internal.h"
@ -31,7 +29,7 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
/*-****************************************
* ZSTD Error Management
******************************************/
#undef ZSTD_isError /* defined within zstd_internal.h */
/*! ZSTD_isError() :
* tells if a return value is an error code
* symbol is required for external callers */
@ -48,37 +46,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
/*! ZSTD_getErrorString() :
* provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
/*=**************************************************************
* Custom allocator
****************************************************************/
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return malloc(size);
}
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
memset(ptr, 0, size);
return ptr;
}
return calloc(1, size);
}
void ZSTD_free(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
free(ptr);
}
}

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This file provides common libc dependencies that zstd requires.
* The purpose is to allow replacing this file with a custom implementation
* to compile zstd without libc support.
*/
/* Need:
* NULL
* INT_MAX
* UINT_MAX
* ZSTD_memcpy()
* ZSTD_memset()
* ZSTD_memmove()
*/
#ifndef ZSTD_DEPS_COMMON
#define ZSTD_DEPS_COMMON
/* Even though we use qsort_r only for the dictionary builder, the macro
* _GNU_SOURCE has to be declared *before* the inclusion of any standard
* header and the script 'combine.sh' combines the whole zstd source code
* in a single file.
*/
#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \
defined(__CYGWIN__) || defined(__MSYS__)
#if !defined(_GNU_SOURCE) && !defined(__ANDROID__) /* NDK doesn't ship qsort_r(). */
#define _GNU_SOURCE
#endif
#endif
#include <limits.h>
#include <stddef.h>
#include <string.h>
#if defined(__GNUC__) && __GNUC__ >= 4
# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
#else
# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
# define ZSTD_memset(p,v,l) memset((p),(v),(l))
#endif
#endif /* ZSTD_DEPS_COMMON */
/* Need:
* ZSTD_malloc()
* ZSTD_free()
* ZSTD_calloc()
*/
#ifdef ZSTD_DEPS_NEED_MALLOC
#ifndef ZSTD_DEPS_MALLOC
#define ZSTD_DEPS_MALLOC
#include <stdlib.h>
#define ZSTD_malloc(s) malloc(s)
#define ZSTD_calloc(n,s) calloc((n), (s))
#define ZSTD_free(p) free((p))
#endif /* ZSTD_DEPS_MALLOC */
#endif /* ZSTD_DEPS_NEED_MALLOC */
/*
* Provides 64-bit math support.
* Need:
* U64 ZSTD_div64(U64 dividend, U32 divisor)
*/
#ifdef ZSTD_DEPS_NEED_MATH64
#ifndef ZSTD_DEPS_MATH64
#define ZSTD_DEPS_MATH64
#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
#endif /* ZSTD_DEPS_MATH64 */
#endif /* ZSTD_DEPS_NEED_MATH64 */
/* Need:
* assert()
*/
#ifdef ZSTD_DEPS_NEED_ASSERT
#ifndef ZSTD_DEPS_ASSERT
#define ZSTD_DEPS_ASSERT
#include <assert.h>
#endif /* ZSTD_DEPS_ASSERT */
#endif /* ZSTD_DEPS_NEED_ASSERT */
/* Need:
* ZSTD_DEBUG_PRINT()
*/
#ifdef ZSTD_DEPS_NEED_IO
#ifndef ZSTD_DEPS_IO
#define ZSTD_DEPS_IO
#include <stdio.h>
#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
#endif /* ZSTD_DEPS_IO */
#endif /* ZSTD_DEPS_NEED_IO */
/* Only requested when <stdint.h> is known to be present.
* Need:
* intptr_t
*/
#ifdef ZSTD_DEPS_NEED_STDINT
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
#include <stdint.h>
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,15 +11,6 @@
#ifndef ZSTD_CCOMMON_H_MODULE
#define ZSTD_CCOMMON_H_MODULE
/*
* Disable the aarch64 NEON SIMD intrinsics for kernel builds. Safely
* using them in the kernel context requires saving/restoring the FPU
* registers which is not currently done.
*/
#ifdef _KERNEL
#define ZSTD_NO_INTRINSICS
#endif
/* this module contains definitions which must be identical
* across compression, decompression and dictBuilder.
* It also contains a few functions useful to at least 2 of them
@ -29,10 +19,8 @@
/*-*************************************
* Dependencies
***************************************/
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#include "compiler.h"
#include "cpu.h"
#include "mem.h"
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
@ -40,19 +28,20 @@
#include "../zstd.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
#ifndef XXH_STATIC_LINKING_ONLY
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#endif
#include "xxhash.h" /* XXH_reset, update, digest */
#if defined (__cplusplus)
extern "C" {
#ifndef ZSTD_NO_TRACE
# include "zstd_trace.h"
#else
# define ZSTD_TRACE 0
#endif
/* ---- static assert (debug) --- */
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
#define ZSTD_isError ERR_isError /* for inlining */
#define FSE_isError ERR_isError
#define HUF_isError ERR_isError
@ -64,81 +53,7 @@ extern "C" {
#undef MAX
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MAX(a,b) ((a)>(b) ? (a) : (b))
/**
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}
/**
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}
/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}
/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);
/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
/*-*************************************
@ -147,8 +62,7 @@ void _force_has_format_string(const char *format, ...) {
#define ZSTD_OPT_NUM (1<<12)
#define ZSTD_REP_NUM 3 /* number of repcodes */
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
#define KB *(1 <<10)
#define MB *(1 <<20)
@ -162,28 +76,29 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
#define BIT0 1
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define ZSTD_FRAMECHECKSUMSIZE 4
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
#define MIN_LITERALS_FOR_4_STREAMS 6
#define HufLog 12
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
typedef enum { set_basic, set_rle, set_compressed, set_repeat } SymbolEncodingType_e;
#define LONGNBSEQ 0x7F00
#define MINMATCH 3
#define Litbits 8
#define LitHufLog 11
#define MaxLit ((1<<Litbits) - 1)
#define MaxML 52
#define MaxLL 35
@ -194,65 +109,92 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define LLFSELog 9
#define OffFSELog 8
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
#define MaxMLBits 16
#define MaxLLBits 16
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 6, 7, 8, 9,10,11,12,
13,14,15,16 };
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 2, 1, 1, 1, 1, 1,
-1,-1,-1,-1 };
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
/* Each table cannot take more than #symbols * FSELog bits */
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 6, 7, 8, 9,10,11,12,
13,14,15,16
};
static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
4, 3, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 2, 1, 1, 1, 1, 1,
-1,-1,-1,-1
};
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 4, 5, 7, 8, 9,10,11,
12,13,14,15,16 };
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1,-1,-1,
-1,-1,-1,-1,-1 };
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 4, 5, 7, 8, 9,10,11,
12,13,14,15,16
};
static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
1, 4, 3, 2, 2, 2, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1,-1,-1,
-1,-1,-1,-1,-1
};
#define ML_DEFAULTNORMLOG 6 /* for static allocation */
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
-1,-1,-1,-1,-1 };
static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
1, 1, 1, 1, 1, 1, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
-1,-1,-1,-1,-1
};
#define OF_DEFAULTNORMLOG 5 /* for static allocation */
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
/*-*******************************************
* Shared functions to include for inlining
*********************************************/
static void ZSTD_copy8(void* dst, const void* src) {
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
#if defined(ZSTD_ARCH_ARM_NEON)
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
#else
memcpy(dst, src, 8);
ZSTD_memcpy(dst, src, 8);
#endif
}
#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
/* Need to use memmove here since the literal buffer can now be located within
the dst buffer. In circumstances where the op "catches up" to where the
literal buffer is, there can be partial overlaps in this call on the final
copy if the literal is being shifted by less than 16 bytes. */
static void ZSTD_copy16(void* dst, const void* src) {
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
#if defined(ZSTD_ARCH_ARM_NEON)
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
#elif defined(ZSTD_ARCH_X86_SSE2)
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
#elif defined(__clang__)
ZSTD_memmove(dst, src, 16);
#else
memcpy(dst, src, 16);
/* ZSTD_memmove is not inlined properly by gcc */
BYTE copy16_buf[16];
ZSTD_memcpy(copy16_buf, src, 16);
ZSTD_memcpy(dst, copy16_buf, 16);
#endif
}
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
#define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16
@ -264,13 +206,13 @@ typedef enum {
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* @param ovtype controls the overlap detection
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
* The src buffer must be before the dst buffer.
*/
MEM_STATIC FORCE_INLINE_ATTR
MEM_STATIC FORCE_INLINE_ATTR
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
@ -278,12 +220,10 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
/* Handle short offset copies. */
do {
COPY8(op, ip)
COPY8(op, ip);
} while (op < oend);
} else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
@ -293,20 +233,15 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
* one COPY16() in the first call. Then, do two calls per loop since
* at that point it is more likely to have a high trip count.
*/
#ifndef __aarch64__
do {
COPY16(op, ip);
}
while (op < oend);
#else
COPY16(op, ip);
if (op >= oend) return;
ZSTD_copy16(op, ip);
if (16 >= length) return;
op += 16;
ip += 16;
do {
COPY16(op, ip);
COPY16(op, ip);
}
while (op < oend);
#endif
}
}
@ -314,7 +249,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
{
size_t const length = MIN(dstCapacity, srcSize);
if (length > 0) {
memcpy(dst, src, length);
ZSTD_memcpy(dst, src, length);
}
return length;
}
@ -329,54 +264,16 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
* In which case, resize it down to free some memory */
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
/* Controls whether the input/output buffer is buffered or stable. */
typedef enum {
ZSTD_bm_buffered = 0, /* Buffer the input/output */
ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
} ZSTD_bufferMode_e;
/*-*******************************************
* Private declarations
*********************************************/
typedef struct seqDef_s {
U32 offset;
U16 litLength;
U16 matchLength;
} seqDef;
typedef struct {
seqDef* sequencesStart;
seqDef* sequences;
BYTE* litStart;
BYTE* lit;
BYTE* llCode;
BYTE* mlCode;
BYTE* ofCode;
size_t maxNbSeq;
size_t maxNbLit;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
} seqStore_t;
typedef struct {
U32 litLength;
U32 matchLength;
} ZSTD_sequenceLength;
/**
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
* indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
*/
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
{
ZSTD_sequenceLength seqLen;
seqLen.litLength = seq->litLength;
seqLen.matchLength = seq->matchLength + MINMATCH;
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
if (seqStore->longLengthID == 1) {
seqLen.litLength += 0xFFFF;
}
if (seqStore->longLengthID == 2) {
seqLen.matchLength += 0xFFFF;
}
}
return seqLen;
}
/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
@ -385,44 +282,11 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
*/
typedef struct {
size_t nbBlocks;
size_t compressedSize;
unsigned long long decompressedBound;
} ZSTD_frameSizeInfo; /* decompress & legacy */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
/* custom memory allocation functions */
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
void ZSTD_free(void* ptr, ZSTD_customMem customMem);
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
# endif
}
}
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
* Note : only works with regular variant;
@ -438,19 +302,23 @@ typedef struct {
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
/* Used by: decompress, fullbench (does not get its definition from here) */
/* Used by: decompress, fullbench */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr);
/*! ZSTD_decodeSeqHeaders() :
* decode sequence header from src */
/* Used by: decompress, fullbench (does not get its definition from here) */
/* Used by: zstd_decompress_block, fullbench */
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize);
#if defined (__cplusplus)
/**
* @returns true iff the CPU supports dynamic BMI2 dispatch.
*/
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
{
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
}
#endif
#endif /* ZSTD_CCOMMON_H_MODULE */

View File

@ -0,0 +1,156 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_TRACE_H
#define ZSTD_TRACE_H
#include <stddef.h>
/* weak symbol support
* For now, enable conservatively:
* - Only GNUC
* - Only ELF
* - Only x86-64, i386, aarch64 and risc-v.
* Also, explicitly disable on platforms known not to work so they aren't
* forgotten in the future.
*/
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
defined(__GNUC__) && defined(__ELF__) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86) || defined(__aarch64__) || defined(__riscv)) && \
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
!defined(__CYGWIN__) && !defined(_AIX)
# define ZSTD_HAVE_WEAK_SYMBOLS 1
#else
# define ZSTD_HAVE_WEAK_SYMBOLS 0
#endif
#if ZSTD_HAVE_WEAK_SYMBOLS
# define ZSTD_WEAK_ATTR __attribute__((__weak__))
#else
# define ZSTD_WEAK_ATTR
#endif
/* Only enable tracing when weak symbols are available. */
#ifndef ZSTD_TRACE
# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
#endif
#if ZSTD_TRACE
struct ZSTD_CCtx_s;
struct ZSTD_DCtx_s;
struct ZSTD_CCtx_params_s;
typedef struct {
/**
* ZSTD_VERSION_NUMBER
*
* This is guaranteed to be the first member of ZSTD_trace.
* Otherwise, this struct is not stable between versions. If
* the version number does not match your expectation, you
* should not interpret the rest of the struct.
*/
unsigned version;
/**
* Non-zero if streaming (de)compression is used.
*/
int streaming;
/**
* The dictionary ID.
*/
unsigned dictionaryID;
/**
* Is the dictionary cold?
* Only set on decompression.
*/
int dictionaryIsCold;
/**
* The dictionary size or zero if no dictionary.
*/
size_t dictionarySize;
/**
* The uncompressed size of the data.
*/
size_t uncompressedSize;
/**
* The compressed size of the data.
*/
size_t compressedSize;
/**
* The fully resolved CCtx parameters (NULL on decompression).
*/
struct ZSTD_CCtx_params_s const* params;
/**
* The ZSTD_CCtx pointer (NULL on decompression).
*/
struct ZSTD_CCtx_s const* cctx;
/**
* The ZSTD_DCtx pointer (NULL on compression).
*/
struct ZSTD_DCtx_s const* dctx;
} ZSTD_Trace;
/**
* A tracing context. It must be 0 when tracing is disabled.
* Otherwise, any non-zero value returned by a tracing begin()
* function is presented to any subsequent calls to end().
*
* Any non-zero value is treated as tracing is enabled and not
* interpreted by the library.
*
* Two possible uses are:
* * A timestamp for when the begin() function was called.
* * A unique key identifying the (de)compression, like the
* address of the [dc]ctx pointer if you need to track
* more information than just a timestamp.
*/
typedef unsigned long long ZSTD_TraceCtx;
/**
* Trace the beginning of a compression call.
* @param cctx The dctx pointer for the compression.
* It can be used as a key to map begin() to end().
* @returns Non-zero if tracing is enabled. The return value is
* passed to ZSTD_trace_compress_end().
*/
ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
struct ZSTD_CCtx_s const* cctx);
/**
* Trace the end of a compression call.
* @param ctx The return value of ZSTD_trace_compress_begin().
* @param trace The zstd tracing info.
*/
ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
ZSTD_TraceCtx ctx,
ZSTD_Trace const* trace);
/**
* Trace the beginning of a decompression call.
* @param dctx The dctx pointer for the decompression.
* It can be used as a key to map begin() to end().
* @returns Non-zero if tracing is enabled. The return value is
* passed to ZSTD_trace_compress_end().
*/
ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
struct ZSTD_DCtx_s const* dctx);
/**
* Trace the end of a decompression call.
* @param ctx The return value of ZSTD_trace_decompress_begin().
* @param trace The zstd tracing info.
*/
ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
ZSTD_TraceCtx ctx,
ZSTD_Trace const* trace);
#endif /* ZSTD_TRACE */
#endif /* ZSTD_TRACE_H */

View File

@ -0,0 +1,134 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_CLEVELS_H
#define ZSTD_CLEVELS_H
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
#include "../zstd.h"
/*-===== Pre-defined compression levels =====-*/
#define ZSTD_MAX_CLEVEL 22
#ifdef __GNUC__
__attribute__((__unused__))
#endif
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" - for any srcSize > 256 KB */
/* W, C, H, S, L, TL, strat */
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
},
};
#endif /* ZSTD_CLEVELS_H */

View File

@ -1,7 +1,6 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* FSE : Finite State Entropy encoder
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -16,8 +15,6 @@
/* **************************************************************
* Includes
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include "../common/compiler.h"
#include "../common/mem.h" /* U32, U16, etc. */
#include "../common/debug.h" /* assert, DEBUGLOG */
@ -26,6 +23,10 @@
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
#include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -75,41 +76,85 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize);
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
U32 const maxSV1 = maxSymbolValue+1;
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
U32 highThreshold = tableSize-1;
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
tableU16[-2] = (U16) tableLog;
tableU16[-1] = (U16) maxSymbolValue;
assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
#endif
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
for (u=1; u <= maxSymbolValue+1; u++) {
for (u=1; u <= maxSV1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else {
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
assert(normalizedCounter[u-1] >= 0);
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
assert(cumul[u] >= cumul[u-1]); /* no overflow */
} }
cumul[maxSymbolValue+1] = tableSize+1;
cumul[maxSV1] = (U16)(tableSize+1);
}
/* Spread symbols */
{ U32 position = 0;
if (highThreshold == tableSize - 1) {
/* Case for no low prob count symbols. Lay down 8 bytes at a time
* to reduce branch misses since we are operating on a small block
*/
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
{ U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
for (s=0; s<maxSV1; ++s, sv += add) {
int i;
int const n = normalizedCounter[s];
MEM_write64(spread + pos, sv);
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
assert(n>=0);
pos += (size_t)n;
}
}
/* Spread symbols across the table. Lack of lowprob symbols means that
* we don't need variable sized inner loop, so we can unroll the loop and
* reduce branch misses.
*/
{ size_t position = 0;
size_t s;
size_t const unroll = 2; /* Experimentally determined optimal unroll */
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
for (s = 0; s < (size_t)tableSize; s += unroll) {
size_t u;
for (u = 0; u < unroll; ++u) {
size_t const uPosition = (position + (u * step)) & tableMask;
tableSymbol[uPosition] = spread[s + u];
}
position = (position + (unroll * step)) & tableMask;
}
assert(position == 0); /* Must have initialized all positions */
}
} else {
U32 position = 0;
U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
for (symbol=0; symbol<maxSV1; symbol++) {
int nbOccurrences;
int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
@ -118,7 +163,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */
} }
assert(position==0); /* Must have initialized all positions */
}
@ -142,16 +186,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
case -1:
case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
symbolTT[s].deltaFindState = total - 1;
assert(total <= INT_MAX);
symbolTT[s].deltaFindState = (int)(total - 1);
total ++;
break;
default :
{
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
assert(normalizedCounter[s] > 1);
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = total - normalizedCounter[s];
total += normalizedCounter[s];
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
total += (unsigned)normalizedCounter[s];
} } } }
#if 0 /* debug : symbol costs */
@ -162,31 +207,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
}
}
} }
#endif
return 0;
}
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
}
#ifndef FSE_COMMONDEFS_ONLY
/*-**************************************************************
* FSE NCount encoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8)
+ 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
@ -215,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
nbBits = tableLog+1;
nbBits = (int)tableLog+1;
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
@ -234,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
}
while (symbol >= start+3) {
start+=3;
bitStream += 3 << bitCount;
bitStream += 3U << bitCount;
bitCount += 2;
}
bitStream += (symbol-start) << bitCount;
@ -254,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
count++; /* +1 for extra accuracy */
if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitStream += count << bitCount;
bitStream += (U32)count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previousIs0 = (count==1);
@ -282,7 +322,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
return (out-ostart);
assert(out >= ostart);
return (size_t)(out-ostart);
}
@ -303,21 +344,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
* FSE Compression Code
****************************************************************/
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size __attribute__ ((unused));
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
@ -325,7 +356,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
@ -342,11 +373,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
}
/* Secondary normalization method.
To be used when primary method fails. */
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
{
short const NOT_YET_ASSIGNED = -2;
U32 s;
@ -363,7 +393,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
continue;
}
if (count[s] <= lowThreshold) {
norm[s] = -1;
norm[s] = lowProbCount;
distributed++;
total -= count[s];
continue;
@ -415,7 +445,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
{ U64 const vStepLog = 62 - tableLog;
U64 const mid = (1ULL << (vStepLog-1)) - 1;
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
U64 tmpTotal = mid;
for (s=0; s<=maxSymbolValue; s++) {
if (norm[s]==NOT_YET_ASSIGNED) {
@ -432,10 +462,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
return 0;
}
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t total,
unsigned maxSymbolValue)
unsigned maxSymbolValue, unsigned useLowProbCount)
{
/* Sanity checks */
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
@ -444,8 +473,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
short const lowProbCount = useLowProbCount ? -1 : 1;
U64 const scale = 62 - tableLog;
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
U64 const vStep = 1ULL<<(scale-20);
int stillToDistribute = 1<<tableLog;
unsigned s;
@ -457,7 +487,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
if (count[s] == total) return 0; /* rle special case */
if (count[s] == 0) { normalizedCounter[s]=0; continue; }
if (count[s] <= lowThreshold) {
normalizedCounter[s] = -1;
normalizedCounter[s] = lowProbCount;
stillToDistribute--;
} else {
short proba = (short)((count[s]*step) >> scale);
@ -471,7 +501,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
} }
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
/* corner case, need another normalization method */
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
if (FSE_isError(errorCode)) return errorCode;
}
else normalizedCounter[largest] += (short)stillToDistribute;
@ -494,40 +524,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
return tableLog;
}
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
@ -626,74 +622,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
FSE_CTable* CTable = (FSE_CTable*)workSpace;
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
void* scratchBuffer = (void*)(CTable + CTableSize);
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
/* init conditions */
if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
if (srcSize <= 1) return 0; /* Not compressible */
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
}
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
/* Write table description header */
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += nc_err;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
return op-ostart;
}
typedef struct {
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
} fseWkspMax_t;
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
fseWkspMax_t scratchBuffer;
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
}
#endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,8 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -27,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
* Histogram functions
****************************************************************/
void HIST_add(unsigned* count, const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
while (ip<end) {
count[*ip++]++;
}
}
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
@ -35,7 +44,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned largestCount=0;
memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) {
@ -61,9 +70,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
* this design makes better use of OoO cpus,
* and is noticeably faster when some values are heavily repeated.
* But it needs some additional workspace for intermediate tables.
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
* `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
* @return : largest histogram frequency,
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
* or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
static size_t HIST_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
@ -72,22 +81,21 @@ static size_t HIST_count_parallel_wksp(
{
const BYTE* ip = (const BYTE*)source;
const BYTE* const iend = ip+sourceSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
unsigned max=0;
U32* const Counting1 = workSpace;
U32* const Counting2 = Counting1 + 256;
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
assert(*maxSymbolValuePtr <= 255);
if (!sourceSize) {
memset(count, 0, maxSymbolValue + 1);
ZSTD_memset(count, 0, countSize);
*maxSymbolValuePtr = 0;
return 0;
}
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32(ip); ip += 4;
@ -119,21 +127,18 @@ static size_t HIST_count_parallel_wksp(
/* finish last symbols */
while (ip<iend) Counting1[*ip++]++;
if (check) { /* verify stats will fit into destination table */
U32 s; for (s=255; s>maxSymbolValue; s--) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
{ U32 s;
if (maxSymbolValue > 255) maxSymbolValue = 255;
for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
for (s=0; s<256; s++) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s] > max) max = Counting1[s];
} }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
{ unsigned maxSymbolValue = 255;
while (!Counting1[maxSymbolValue]) maxSymbolValue--;
if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
*maxSymbolValuePtr = maxSymbolValue;
ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
}
return (size_t)max;
}
@ -153,14 +158,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
}
/* HIST_count_wksp() :
* Same as HIST_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
@ -176,9 +173,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
}
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
}
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
}
#endif

View File

@ -1,8 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -15,7 +14,7 @@
****************************************************************** */
/* --- dependencies --- */
#include <stddef.h> /* size_t */
#include "../common/zstd_deps.h" /* size_t */
/* --- simple histogram functions --- */
@ -74,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);
/*! HIST_add() :
* Lowest level: just add nb of occurrences of characters from @src into @count.
* @count is not reset. @count array is presumed large enough (i.e. 1 KB).
@ This function does not need any additional stack memory.
*/
void HIST_add(unsigned* count, const void* src, size_t srcSize);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -14,11 +13,36 @@
***************************************/
#include "zstd_compress_literals.h"
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showHexa(const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*)src;
size_t u;
for (u=0; u<srcSize; u++) {
RAWLOG(5, " %02X", ip[u]); (void)ip;
}
RAWLOG(5, " \n");
return srcSize;
}
#endif
/* **************************************************************
* Literals compression - special cases
****************************************************************/
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
switch(flSize)
@ -36,17 +60,31 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
assert(0);
}
memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
ZSTD_memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
return srcSize + flSize;
}
static int allBytesIdentical(const void* src, size_t srcSize)
{
assert(srcSize >= 1);
assert(src != NULL);
{ const BYTE b = ((const BYTE*)src)[0];
size_t p;
for (p=1; p<srcSize; p++) {
if (((const BYTE*)src)[p] != b) return 0;
}
return 1;
}
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
assert(dstCapacity >= 4); (void)dstCapacity;
assert(allBytesIdentical(src, srcSize));
switch(flSize)
{
@ -64,68 +102,103 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
}
ostart[flSize] = *(const BYTE*)src;
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
return flSize+1;
}
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2)
/* ZSTD_minLiteralsToCompress() :
* returns minimal amount of literals
* for literal compression to even be attempted.
* Minimum is made tighter as compression strategy increases.
*/
static size_t
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
{
assert((int)strategy >= 0);
assert((int)strategy <= 9);
/* btultra2 : min 8 bytes;
* then 2x larger for each successive compression strategy
* max threshold 64 bytes */
{ int const shift = MIN(9-(int)strategy, 3);
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
return mintc;
}
}
size_t ZSTD_compressLiterals (
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy,
int disableLiteralCompression,
int suspectUncompressible,
int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed;
SymbolEncodingType_e hType = set_compressed;
size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
disableLiteralCompression, (U32)srcSize);
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
disableLiteralCompression, (U32)srcSize, dstCapacity);
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
/* Prepare nextEntropy assuming reusing the existing table */
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
/* if too small, don't even attempt compression (speed opt) */
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
int const flags = 0
| (bmi2 ? HUF_flags_bmi2 : 0)
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ?
HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
src, srcSize,
HUF_SYMBOLVALUE_MAX, LitHufLog,
entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable,
&repeat, flags);
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table");
DEBUGLOG(5, "reusing statistics from previous huffman block");
hType = set_repeat;
}
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) {
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
/* A return value of 1 signals that the alphabet consists of a single symbol.
* However, in some rare circumstances, it could be the compressed size (a single byte).
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
* (it's also necessary to not generate statistics).
* Therefore, in such a case, actively check that all bytes are identical. */
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
} }
if (hType == set_compressed) {
/* using a newly constructed table */
@ -136,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
case 4: /* 2 - 2 - 14 - 14 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
case 5: /* 2 - 2 - 18 - 18 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -17,14 +16,24 @@
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* ZSTD_compressRleLiteralsBlock() :
* Conditions :
* - All bytes in @src are identical
* - dstCapacity >= 4 */
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
/* ZSTD_compressLiterals():
* @entropyWorkspace: must be aligned on 4-bytes boundaries
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
* @suspectUncompressible: sampling checks, to potentially skip huffman coding
*/
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2);
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
int suspectUncompressible,
int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -51,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
return maxSymbolValue;
}
/**
* Returns true if we should use ncount=-1 else we should
* use ncount=1 for low probability symbols instead.
*/
static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
{
/* Heuristic: This should cover most blocks <= 16K and
* start to fade out after 16K to about 32K depending on
* compressibility.
*/
return nbSeq >= 2048;
}
/**
* Returns the cost in bytes of encoding the normalized count header.
* Returns an error if any of the helper functions return an error.
@ -61,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
BYTE wksp[FSE_NCOUNTBOUND];
S16 norm[MaxSeq + 1];
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
}
@ -73,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
{
unsigned cost = 0;
unsigned s;
assert(total > 0);
for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0)
@ -139,20 +153,20 @@ size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
return cost >> 8;
}
symbolEncodingType_e
SymbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_DefaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy)
{
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols,
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE.
*/
@ -220,9 +234,14 @@ ZSTD_selectEncodingType(
return set_compressed;
}
typedef struct {
S16 norm[MaxSeq + 1];
U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
} ZSTD_BuildCTableWksp;
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@ -240,13 +259,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
*op = codeTable[0];
return 1;
case set_repeat:
memcpy(nextCTable, prevCTable, prevCTableSize);
ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
if (count[codeTable[nbSeq-1]] > 1) {
@ -254,10 +273,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
nbSeq_1--;
}
assert(nbSeq_1 > 1);
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
(void)entropyWorkspaceSize;
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
assert(oend >= op);
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
return NCountSize;
}
}
@ -271,7 +293,7 @@ ZSTD_encodeSequences_body(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
SeqDef const* sequences, size_t nbSeq, int longOffsets)
{
BIT_CStream_t blockStream;
FSE_CState_t stateMatchLength;
@ -291,19 +313,19 @@ ZSTD_encodeSequences_body(
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream);
@ -317,8 +339,8 @@ ZSTD_encodeSequences_body(
U32 const mlBits = ML_bits[mlCode];
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
(unsigned)sequences[n].litLength,
(unsigned)sequences[n].matchLength + MINMATCH,
(unsigned)sequences[n].offset);
(unsigned)sequences[n].mlBase + MINMATCH,
(unsigned)sequences[n].offBase);
/* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
@ -329,18 +351,18 @@ ZSTD_encodeSequences_body(
BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
if (longOffsets) {
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
@ -365,7 +387,7 @@ ZSTD_encodeSequences_default(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
SeqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
@ -377,13 +399,13 @@ ZSTD_encodeSequences_default(
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
SeqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
@ -399,7 +421,7 @@ size_t ZSTD_encodeSequences(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
{
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
#if DYNAMIC_BMI2

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,26 +11,27 @@
#ifndef ZSTD_COMPRESS_SEQUENCES_H
#define ZSTD_COMPRESS_SEQUENCES_H
#include "zstd_compress_internal.h" /* SeqDef */
#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
#include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */
typedef enum {
ZSTD_defaultDisallowed = 0,
ZSTD_defaultAllowed = 1
} ZSTD_defaultPolicy_e;
} ZSTD_DefaultPolicy_e;
symbolEncodingType_e
SymbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_DefaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy);
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@ -43,7 +43,7 @@ size_t ZSTD_encodeSequences(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
size_t ZSTD_fseBitCost(
FSE_CTable const* ctable,

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -16,288 +15,10 @@
#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
#include "hist.h" /* HIST_countFast_wksp */
#include "zstd_compress_internal.h"
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
#include "zstd_compress_sequences.h"
#include "zstd_compress_literals.h"
/*-*************************************
* Superblock entropy buffer structs
***************************************/
/** ZSTD_hufCTablesMetadata_t :
* Stores Literals Block Type for a super-block in hType, and
* huffman tree description in hufDesBuffer.
* hufDesSize refers to the size of huffman tree description in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
typedef struct {
symbolEncodingType_e hType;
BYTE hufDesBuffer[500]; /* TODO give name to this value */
size_t hufDesSize;
} ZSTD_hufCTablesMetadata_t;
/** ZSTD_fseCTablesMetadata_t :
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
* fse tables in fseTablesBuffer.
* fseTablesSize refers to the size of fse tables in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
typedef struct {
symbolEncodingType_e llType;
symbolEncodingType_e ofType;
symbolEncodingType_e mlType;
BYTE fseTablesBuffer[500]; /* TODO give name to this value */
size_t fseTablesSize;
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
} ZSTD_fseCTablesMetadata_t;
typedef struct {
ZSTD_hufCTablesMetadata_t hufMetadata;
ZSTD_fseCTablesMetadata_t fseMetadata;
} ZSTD_entropyCTablesMetadata_t;
/** ZSTD_buildSuperBlockEntropy_literal() :
* Builds entropy for the super-block literals.
* Stores literals block type (raw, rle, compressed, repeat) and
* huffman description table to hufMetadata.
* @return : size of huffman description table or error code */
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_hufCTablesMetadata_t* hufMetadata,
const int disableLiteralsCompression,
void* workspace, size_t wkspSize)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
BYTE* const countWkspStart = wkspStart;
unsigned* const countWksp = (unsigned*)workspace;
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
BYTE* const nodeWksp = countWkspStart + countWkspSize;
const size_t nodeWkspSize = wkspEnd-nodeWksp;
unsigned maxSymbolValue = 255;
unsigned huffLog = HUF_TABLELOG_DEFAULT;
HUF_repeat repeat = prevHuf->repeatMode;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
/* Prepare nextEntropy assuming reusing the existing table */
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
if (disableLiteralsCompression) {
DEBUGLOG(5, "set_basic - disabled");
hufMetadata->hType = set_basic;
return 0;
}
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) {
DEBUGLOG(5, "set_basic - too small");
hufMetadata->hType = set_basic;
return 0;
}
}
/* Scan input and build symbol stats */
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
if (largest == srcSize) {
DEBUGLOG(5, "set_rle");
hufMetadata->hType = set_rle;
return 0;
}
if (largest <= (srcSize >> 7)+4) {
DEBUGLOG(5, "set_basic - no gain");
hufMetadata->hType = set_basic;
return 0;
}
}
/* Validate the previous Huffman table */
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
repeat = HUF_repeat_none;
}
/* Build Huffman Tree */
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
maxSymbolValue, huffLog,
nodeWksp, nodeWkspSize);
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
huffLog = (U32)maxBits;
{ /* Build and write the CTable */
size_t const newCSize = HUF_estimateCompressedSize(
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
size_t const hSize = HUF_writeCTable(
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
/* Check against repeating the previous CTable */
if (repeat != HUF_repeat_none) {
size_t const oldCSize = HUF_estimateCompressedSize(
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
DEBUGLOG(5, "set_repeat - smaller");
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
hufMetadata->hType = set_repeat;
return 0;
}
}
if (newCSize + hSize >= srcSize) {
DEBUGLOG(5, "set_basic - no gains");
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
hufMetadata->hType = set_basic;
return 0;
}
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
hufMetadata->hType = set_compressed;
nextHuf->repeatMode = HUF_repeat_check;
return hSize;
}
}
}
/** ZSTD_buildSuperBlockEntropy_sequences() :
* Builds entropy for the super-block sequences.
* Stores symbol compression modes and fse table to fseMetadata.
* @return : size of fse tables or error code */
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
const ZSTD_fseCTables_t* prevEntropy,
ZSTD_fseCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_fseCTablesMetadata_t* fseMetadata,
void* workspace, size_t wkspSize)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
BYTE* const countWkspStart = wkspStart;
unsigned* const countWksp = (unsigned*)workspace;
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
BYTE* const cTableWksp = countWkspStart + countWkspSize;
const size_t cTableWkspSize = wkspEnd-cTableWksp;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* const ostart = fseMetadata->fseTablesBuffer;
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
BYTE* op = ostart;
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
memset(workspace, 0, wkspSize);
fseMetadata->lastCountSize = 0;
/* convert length/distances into codes */
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ U32 LLtype;
unsigned max = MaxLL;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
countWksp, max, mostFrequent, nbSeq,
LLFSELog, prevEntropy->litlengthCTable,
LL_defaultNorm, LL_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
if (LLtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->llType = (symbolEncodingType_e) LLtype;
} }
/* build CTable for Offsets */
{ U32 Offtype;
unsigned max = MaxOff;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
countWksp, max, mostFrequent, nbSeq,
OffFSELog, prevEntropy->offcodeCTable,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
if (Offtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
} }
/* build CTable for MatchLengths */
{ U32 MLtype;
unsigned max = MaxML;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
countWksp, max, mostFrequent, nbSeq,
MLFSELog, prevEntropy->matchlengthCTable,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
if (MLtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
} }
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
return op-ostart;
}
/** ZSTD_buildSuperBlockEntropy() :
* Builds entropy for the super-block.
* @return : 0 on success or error code */
static size_t
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize)
{
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
entropyMetadata->hufMetadata.hufDesSize =
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
&prevEntropy->huf, &nextEntropy->huf,
&entropyMetadata->hufMetadata,
ZSTD_disableLiteralsCompression(cctxParams),
workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
entropyMetadata->fseMetadata.fseTablesSize =
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
&prevEntropy->fse, &nextEntropy->fse,
cctxParams,
&entropyMetadata->fseMetadata,
workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
return 0;
}
/** ZSTD_compressSubBlock_literal() :
* Compresses literals section for a sub-block.
* When we have to write the Huffman table we will sometimes choose a header
@ -305,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
* before we know the table size + compressed size, so we have a bound on the
* table size. If we guessed incorrectly, we fall back to uncompressed literals.
*
* We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
* in writing the header, otherwise it is set to 0.
*
* hufMetadata->hType has literals block type info.
@ -315,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
* @return : compressed size of literals section of a sub-block
* Or 0 if it unable to compress.
* Or 0 if unable to compress.
* Or error code */
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
const BYTE* literals, size_t litSize,
void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
static size_t
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
const BYTE* literals, size_t litSize,
void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
{
size_t const header = writeEntropy ? 200 : 0;
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@ -329,11 +51,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart + lhSize;
U32 const singleStream = lhSize == 3;
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
size_t cLitSize = 0;
(void)bmi2; /* TODO bmi2... */
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
*entropyWritten = 0;
@ -349,15 +69,15 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
if (writeEntropy && hufMetadata->hType == set_compressed) {
memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
op += hufMetadata->hufDesSize;
cLitSize += hufMetadata->hufDesSize;
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
}
/* TODO bmi2 */
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
: HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
op += cSize;
cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) {
@ -382,7 +102,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
@ -402,25 +122,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
}
*entropyWritten = 1;
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
return op-ostart;
return (size_t)(op-ostart);
}
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
const seqDef* const sstart = sequences;
const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart;
static size_t
ZSTD_seqDecompressedSize(SeqStore_t const* seqStore,
const SeqDef* sequences, size_t nbSeqs,
size_t litSize, int lastSubBlock)
{
size_t matchLengthSum = 0;
size_t litLengthSum __attribute__ ((unused)) = 0;
while (send-sp > 0) {
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
size_t litLengthSum = 0;
size_t n;
for (n=0; n<nbSeqs; n++) {
const ZSTD_SequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
litLengthSum += seqLen.litLength;
matchLengthSum += seqLen.matchLength;
sp++;
}
assert(litLengthSum <= litSize);
if (!lastSequence) {
DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
(unsigned)nbSeqs, (const void*)sequences,
(unsigned)litLengthSum, (unsigned)matchLengthSum);
if (!lastSubBlock)
assert(litLengthSum == litSize);
}
else
assert(litLengthSum <= litSize);
(void)litLengthSum;
return matchLengthSum + litSize;
}
@ -434,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
* @return : compressed size of sequences section of a sub-block
* Or 0 if it is unable to compress
* Or error code. */
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
const seqDef* sequences, size_t nbSeq,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
static size_t
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
const SeqDef* sequences, size_t nbSeq,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
BYTE* const ostart = (BYTE*)dst;
@ -454,14 +180,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall, "");
if (nbSeq < 0x7F)
if (nbSeq < 128)
*op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ)
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) {
return op - ostart;
return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
@ -475,7 +201,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
const U32 MLtype = fseMetadata->mlType;
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
op += fseMetadata->fseTablesSize;
} else {
const U32 repeat = set_repeat;
@ -483,7 +209,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
}
{ size_t const bitstreamSize = ZSTD_encodeSequences(
op, oend - op,
op, (size_t)(oend - op),
fseTables->matchlengthCTable, mlCode,
fseTables->offcodeCTable, ofCode,
fseTables->litlengthCTable, llCode,
@ -527,7 +253,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
#endif
*entropyWritten = 1;
return op - ostart;
return (size_t)(op - ostart);
}
/** ZSTD_compressSubBlock() :
@ -536,7 +262,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
* Or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
const seqDef* sequences, size_t nbSeq,
const SeqDef* sequences, size_t nbSeq,
const BYTE* literals, size_t litSize,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
@ -553,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
&entropyMetadata->hufMetadata, literals, litSize,
op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
op, (size_t)(oend-op),
bmi2, writeLitEntropy, litEntropyWritten);
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
if (cLitSize == 0) return 0;
op += cLitSize;
@ -563,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
sequences, nbSeq,
llCode, mlCode, ofCode,
cctxParams,
op, oend-op,
op, (size_t)(oend-op),
bmi2, writeSeqEntropy, seqEntropyWritten);
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
if (cSeqSize == 0) return 0;
op += cSeqSize;
}
/* Write block header */
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
{ size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(ostart, cBlockHeader24);
}
return op-ostart;
return (size_t)(op-ostart);
}
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@ -600,10 +327,10 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
return 0;
}
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
static size_t ZSTD_estimateSubBlockSize_symbolType(SymbolEncodingType_e type,
const BYTE* codeTable, unsigned maxCode,
size_t nbSeq, const FSE_CTable* fseCTable,
const U32* additionalBits,
const U8* additionalBits,
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
void* workspace, size_t wkspSize)
{
@ -644,8 +371,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
void* workspace, size_t wkspSize,
int writeEntropy)
{
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
size_t cSeqSizeEstimate = 0;
if (nbSeq == 0) return sequencesSectionHeaderSize;
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
nbSeq, fseTables->offcodeCTable, NULL,
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
@ -662,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
return cSeqSizeEstimate + sequencesSectionHeaderSize;
}
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
typedef struct {
size_t estLitSize;
size_t estBlockSize;
} EstimatedBlockSize;
static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
@ -670,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize,
int writeLitEntropy, int writeSeqEntropy) {
size_t cSizeEstimate = 0;
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
&entropy->huf, &entropyMetadata->hufMetadata,
workspace, wkspSize, writeLitEntropy);
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
int writeLitEntropy, int writeSeqEntropy)
{
EstimatedBlockSize ebs;
ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
&entropy->huf, &entropyMetadata->hufMetadata,
workspace, wkspSize, writeLitEntropy);
ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy);
return cSizeEstimate + ZSTD_blockHeaderSize;
ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
return ebs;
}
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@ -692,14 +426,57 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
return 0;
}
static size_t countLiterals(SeqStore_t const* seqStore, const SeqDef* sp, size_t seqCount)
{
size_t n, total = 0;
assert(sp != NULL);
for (n=0; n<seqCount; n++) {
total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
}
DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
return total;
}
#define BYTESCALE 256
static size_t sizeBlockSequences(const SeqDef* sp, size_t nbSeqs,
size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
int firstSubBlock)
{
size_t n, budget = 0, inSize=0;
/* entropy headers */
size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
assert(firstSubBlock==0 || firstSubBlock==1);
budget += headerSize;
/* first sequence => at least one sequence*/
budget += sp[0].litLength * avgLitCost + avgSeqCost;
if (budget > targetBudget) return 1;
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
/* loop over sequences */
for (n=1; n<nbSeqs; n++) {
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
budget += currentCost;
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
/* stop when sub-block budget is reached */
if ( (budget > targetBudget)
/* though continue to expand until the sub-block is deemed compressible */
&& (budget < inSize * BYTESCALE) )
break;
}
return n;
}
/** ZSTD_compressSubBlock_multi() :
* Breaks super-block into multiple sub-blocks and compresses them.
* Entropy will be written to the first block.
* The following blocks will use repeat mode to compress.
* All sub-blocks are compressed blocks (no raw or rle blocks).
* @return : compressed size of the super block (which is multiple ZSTD blocks)
* Or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
* Entropy will be written into the first block.
* The following blocks use repeat_mode to compress.
* Sub-blocks are all compressed, except the last one when beneficial.
* @return : compressed size of the super block (which features multiple ZSTD blocks)
* or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr,
const ZSTD_compressedBlockState_t* prevCBlock,
ZSTD_compressedBlockState_t* nextCBlock,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
@ -709,12 +486,14 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const int bmi2, U32 lastBlock,
void* workspace, size_t wkspSize)
{
const seqDef* const sstart = seqStorePtr->sequencesStart;
const seqDef* const send = seqStorePtr->sequences;
const seqDef* sp = sstart;
const SeqDef* const sstart = seqStorePtr->sequencesStart;
const SeqDef* const send = seqStorePtr->sequences;
const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
size_t const nbSeqs = (size_t)(send - sstart);
const BYTE* const lstart = seqStorePtr->litStart;
const BYTE* const lend = seqStorePtr->lit;
const BYTE* lp = lstart;
size_t const nbLiterals = (size_t)(lend - lstart);
BYTE const* ip = (BYTE const*)src;
BYTE const* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*)dst;
@ -723,120 +502,179 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const BYTE* llCodePtr = seqStorePtr->llCode;
const BYTE* mlCodePtr = seqStorePtr->mlCode;
const BYTE* ofCodePtr = seqStorePtr->ofCode;
size_t targetCBlockSize = cctxParams->targetCBlockSize;
size_t litSize, seqCount;
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
int writeSeqEntropy = 1;
int lastSequence = 0;
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
(unsigned)(lend-lp), (unsigned)(send-sstart));
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
(unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
litSize = 0;
seqCount = 0;
do {
size_t cBlockSizeEstimate = 0;
if (sstart == send) {
lastSequence = 1;
} else {
const seqDef* const sequence = sp + seqCount;
lastSequence = sequence == send - 1;
litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
seqCount++;
/* let's start by a general estimation for the full block */
if (nbSeqs > 0) {
EstimatedBlockSize const ebs =
ZSTD_estimateSubBlockSize(lp, nbLiterals,
ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
&nextCBlock->entropy, entropyMetadata,
workspace, wkspSize,
writeLitEntropy, writeSeqEntropy);
/* quick estimation */
size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
size_t n, avgBlockBudget, blockBudgetSupp=0;
avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
(unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
(unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
/* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
* this will result in the production of a single uncompressed block covering @srcSize.*/
if (ebs.estBlockSize > srcSize) return 0;
/* compress and write sub-blocks */
assert(nbSubBlocks>0);
for (n=0; n < nbSubBlocks-1; n++) {
/* determine nb of sequences for current sub-block + nbLiterals from next sequence */
size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
/* if reached last sequence : break to last sub-block (simplification) */
assert(seqCount <= (size_t)(send-sp));
if (sp + seqCount == send) break;
assert(seqCount > 0);
/* compress sub-block */
{ int litEntropyWritten = 0;
int seqEntropyWritten = 0;
size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
const size_t decompressedSize =
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, (size_t)(oend-op),
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
0);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
/* check compressibility, update state components */
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
(unsigned)decompressedSize, (unsigned)cSize);
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
blockBudgetSupp = 0;
} }
/* otherwise : do not compress yet, coalesce current sub-block with following one */
}
if (lastSequence) {
assert(lp <= lend);
assert(litSize <= (size_t)(lend - lp));
litSize = (size_t)(lend - lp);
}
/* I think there is an optimization opportunity here.
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
* since it recalculates estimate from scratch.
* For example, it would recount literal distribution and symbol codes everytime.
*/
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
&nextCBlock->entropy, entropyMetadata,
workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
int litEntropyWritten = 0;
int seqEntropyWritten = 0;
const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, oend-op,
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
lastBlock && lastSequence);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Committed the sub-block");
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
sp += seqCount;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
litSize = 0;
seqCount = 0;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
} /* if (nbSeqs > 0) */
/* write last block */
DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
{ int litEntropyWritten = 0;
int seqEntropyWritten = 0;
size_t litSize = (size_t)(lend - lp);
size_t seqCount = (size_t)(send - sp);
const size_t decompressedSize =
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, (size_t)(oend-op),
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
/* update pointers, the nb of literals borrowed from next sequence must be preserved */
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
(unsigned)decompressedSize, (unsigned)cSize);
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
}
} while (!lastSequence);
}
if (writeLitEntropy) {
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
DEBUGLOG(5, "Literal entropy tables were never written");
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
}
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
/* If we haven't written our entropy tables, then we've violated our contract and
* must emit an uncompressed block.
*/
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
return 0;
}
if (ip < iend) {
size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
/* some data left : last part of the block sent uncompressed */
size_t const rSize = (size_t)((iend - ip));
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
assert(cSize != 0);
op += cSize;
/* We have to regenerate the repcodes because we've skipped some sequences */
if (sp < send) {
seqDef const* seq;
repcodes_t rep;
memcpy(&rep, prevCBlock->rep, sizeof(rep));
const SeqDef* seq;
Repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) {
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
}
memcpy(nextCBlock->rep, &rep, sizeof(rep));
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}
}
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
return op-ostart;
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
(unsigned)(op-ostart));
return (size_t)(op-ostart);
}
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
void const* src, size_t srcSize,
unsigned lastBlock) {
const void* src, size_t srcSize,
unsigned lastBlock)
{
ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
&zc->blockState.prevCBlock->entropy,
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
&entropyMetadata,
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), "");
return ZSTD_compressSubBlock_multi(&zc->seqStore,
zc->blockState.prevCBlock,
@ -846,5 +684,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
dst, dstCapacity,
src, srcSize,
zc->bmi2, lastBlock,
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */);
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -15,11 +14,10 @@
/*-*************************************
* Dependencies
***************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h"
#if defined (__cplusplus)
extern "C" {
#endif
#include "../common/portability_macros.h"
#include "../common/compiler.h" /* ZS2_isPower2 */
/*-*************************************
* Constants
@ -36,15 +34,30 @@ extern "C" {
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
#endif
/* Set our tables and aligneds to align by 64 bytes */
#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
/*-*************************************
* Structures
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers,
ZSTD_cwksp_alloc_aligned
ZSTD_cwksp_alloc_aligned_init_once,
ZSTD_cwksp_alloc_aligned,
ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e;
/**
* Used to describe whether the workspace is statically allocated (and will not
* necessarily ever be freed), or if it's dynamically allocated and we can
* expect a well-formed caller to free this.
*/
typedef enum {
ZSTD_cwksp_dynamic_alloc,
ZSTD_cwksp_static_alloc
} ZSTD_cwksp_static_alloc_e;
/**
* Zstd fits all its internal datastructures into a single continuous buffer,
* so that it only needs to perform a single OS allocation (or so that a buffer
@ -85,15 +98,15 @@ typedef enum {
*
* Workspace Layout:
*
* [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
* [ ... workspace ... ]
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
*
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
* so that literally everything fits in a single buffer. Note: if present,
* this must be the first object in the workspace, since ZSTD_free{CCtx,
* this must be the first object in the workspace, since ZSTD_customFree{CCtx,
* CDict}() rely on a pointer comparison to see whether one or two frees are
* required.
*
@ -108,10 +121,20 @@ typedef enum {
* - Tables: these are any of several different datastructures (hash tables,
* chain tables, binary trees) that all respect a common format: they are
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams.
* Their sizes depend on the cparams. These tables are 64-byte aligned.
*
* - Aligned: these buffers are used for various purposes that require 4 byte
* alignment, but don't require any initialization before they're used.
* - Init once: these buffers require to be initialized at least once before
* use. They should be used when we want to skip memory initialization
* while not triggering memory checkers (like Valgrind) when reading from
* from this memory without writing to it first.
* These buffers should be used carefully as they might contain data
* from previous compressions.
* Buffers are aligned to 64 bytes.
*
* - Aligned: these buffers don't require any initialization before they're
* used. The user of the buffer should make sure they write into a buffer
* location before reading from it.
* Buffers are aligned to 64 bytes.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
@ -123,9 +146,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
* 2. Buffers
* 3. Aligned
* 4. Tables
* 2. Init once / Tables
* 3. Aligned / Tables
* 4. Buffers / Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
@ -137,10 +160,12 @@ typedef struct {
void* tableEnd;
void* tableValidEnd;
void* allocStart;
void* initOnceStart;
int allocFailed;
BYTE allocFailed;
int workspaceOversizedDuration;
ZSTD_cwksp_alloc_phase_e phase;
ZSTD_cwksp_static_alloc_e isStatic;
} ZSTD_cwksp;
/*-*************************************
@ -148,6 +173,7 @@ typedef struct {
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
@ -157,14 +183,29 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
assert(ws->workspace <= ws->initOnceStart);
#if ZSTD_MEMORY_SANITIZER
{
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
(void)offset;
#if defined(ZSTD_MSAN_PRINT)
if(offset!=-1) {
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
}
#endif
assert(offset==-1);
};
#endif
}
/**
* Align must be a power of 2.
*/
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t align) {
size_t const mask = align - 1;
assert((align & mask) == 0);
assert(ZSTD_isPower2(align));
return (size + mask) & ~mask;
}
@ -177,64 +218,81 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
* Since tables aren't currently redzoned, you don't need to call through this
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size().
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
if (size == 0)
return 0;
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#else
return size;
#endif
}
MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
assert(phase >= ws->phase);
if (phase > ws->phase) {
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
phase >= ZSTD_cwksp_alloc_buffers) {
ws->tableValidEnd = ws->objectEnd;
}
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
/* If unaligned allocations down from a too-large top have left us
* unaligned, we need to realign our alloc ptr. Technically, this
* can consume space that is unaccounted for in the neededSpace
* calculation. However, I believe this can only happen when the
* workspace is too large, and specifically when it is too large
* by a larger margin than the space that will be consumed. */
/* TODO: cleaner, compiler warning friendly way to do this??? */
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
if (ws->allocStart < ws->tableValidEnd) {
ws->tableValidEnd = ws->allocStart;
}
}
ws->phase = phase;
}
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) {
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment));
}
/**
* Returns whether this object/buffer/etc was allocated in this workspace.
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
* Used to determine the number of bytes required for a given "aligned".
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) {
return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES);
}
/**
* Returns the amount of additional space the cwksp must allocate
* for internal purposes (currently only alignment).
*/
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
* bytes to align the beginning of tables section and end of buffers;
*/
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace;
}
/**
* Return the number of additional bytes required to align a pointer to the given number of bytes.
* alignBytes must be a power of two.
*/
MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert(ZSTD_isPower2(alignBytes));
assert(bytes < alignBytes);
return bytes;
}
/**
* Returns the initial value for allocStart which is used to determine the position from
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws)
{
char* endPtr = (char*)ws->workspaceEnd;
assert(ZSTD_isPower2(ZSTD_CWKSP_ALIGNMENT_BYTES));
endPtr = endPtr - ((size_t)endPtr % ZSTD_CWKSP_ALIGNMENT_BYTES);
return (void*)endPtr;
}
/**
* Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
* which counts from the end of the wksp (as opposed to the object/table segment).
*
* Returns a pointer to the beginning of that space.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
void* alloc;
void* bottom = ws->tableEnd;
ZSTD_cwksp_internal_advance_phase(ws, phase);
alloc = (BYTE *)ws->allocStart - bytes;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
MEM_STATIC void*
ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
{
void* const alloc = (BYTE*)ws->allocStart - bytes;
void* const bottom = ws->tableEnd;
DEBUGLOG(5, "cwksp: reserving [0x%p]:%zd bytes; %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(alloc >= bottom);
@ -243,16 +301,88 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
ws->allocFailed = 1;
return NULL;
}
/* the area is reserved from the end of wksp.
* If it overlaps with tableValidEnd, it voids guarantees on values' range */
if (alloc < ws->tableValidEnd) {
ws->tableValidEnd = alloc;
}
ws->allocStart = alloc;
return alloc;
}
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/**
* Moves the cwksp to the next phase, and does any necessary allocations.
* cwksp initialization must necessarily go through each phase in order.
* Returns a 0 on success, or zstd error
*/
MEM_STATIC size_t
ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
{
assert(phase >= ws->phase);
if (phase > ws->phase) {
/* Going from allocating objects to allocating initOnce / tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd;
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!");
ws->objectEnd = objectEnd;
ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
}
}
}
ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws);
}
return 0;
}
/**
* Returns whether this object/buffer/etc was allocated in this workspace.
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
}
/**
* Internal function. Do not use directly.
*/
MEM_STATIC void*
ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
{
void* alloc;
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
return NULL;
}
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
__asan_unpoison_memory_region(alloc, bytes);
if (alloc) {
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
/* We need to keep the redzone poisoned while unpoisoning the bytes that
* are actually allocated. */
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
}
}
#endif
return alloc;
@ -261,33 +391,79 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
/**
* Reserves and returns unaligned memory.
*/
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
{
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
}
/**
* Reserves and returns memory sized on and aligned on sizeof(unsigned).
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
* This memory has been initialized at least once in the past.
* This doesn't mean it has been initialized this time, and it might contain data from previous
* operations.
* The main usage is for algorithms that might need read access into uninitialized memory.
* The algorithm must maintain safety under these conditions and must make sure it doesn't
* leak any of the past data (directly or in side channels).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
assert((bytes & (sizeof(U32)-1)) == 0);
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
{
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
if(ptr && ptr < ws->initOnceStart) {
/* We assume the memory following the current allocation is either:
* 1. Not usable as initOnce memory (end of workspace)
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
* 3. An ASAN redzone, in which case we don't want to write on it
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
ws->initOnceStart = ptr;
}
#if ZSTD_MEMORY_SANITIZER
assert(__msan_test_shadow(ptr, bytes) == -1);
#endif
return ptr;
}
/**
* Aligned on sizeof(unsigned). These buffers have the special property that
* their values remain constrained, allowing us to re-use them without
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned64(ZSTD_cwksp* ws, size_t bytes)
{
void* const ptr = ZSTD_cwksp_reserve_internal(ws,
ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
ZSTD_cwksp_alloc_aligned);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
return ptr;
}
/**
* Aligned on 64 bytes. These buffers have the special property that
* their values remain constrained, allowing us to reuse them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
void* alloc = ws->tableEnd;
void* end = (BYTE *)alloc + bytes;
void* top = ws->allocStart;
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc;
void* end;
void* top;
/* We can only start allocating tables after we are done reserving space for objects at the
* start of the workspace */
if(ws->phase < phase) {
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
}
}
alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes;
top = ws->allocStart;
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
assert((bytes & (sizeof(U32)-1)) == 0);
ZSTD_cwksp_internal_advance_phase(ws, phase);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(end <= top);
if (end > top) {
@ -297,35 +473,41 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
}
ws->tableEnd = end;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
__asan_unpoison_memory_region(alloc, bytes);
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
__asan_unpoison_memory_region(alloc, bytes);
}
#endif
assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
return alloc;
}
/**
* Aligned on sizeof(void*).
* Note : should happen only once, at workspace first initialization
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
{
size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
void* alloc = ws->objectEnd;
void* end = (BYTE*)alloc + roundedBytes;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5,
DEBUGLOG(4,
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
assert((bytes & (sizeof(void*)-1)) == 0);
assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
assert(bytes % ZSTD_ALIGNOF(void*) == 0);
ZSTD_cwksp_assert_internal_consistency(ws);
/* we must be in the first phase, no advance is possible */
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
DEBUGLOG(4, "cwksp: object alloc failed!");
DEBUGLOG(3, "cwksp: object alloc failed!");
ws->allocFailed = 1;
return NULL;
}
@ -333,27 +515,52 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
ws->tableEnd = end;
ws->tableValidEnd = end;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
__asan_unpoison_memory_region(alloc, bytes);
alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
__asan_unpoison_memory_region(alloc, bytes);
}
#endif
return alloc;
}
/**
* with alignment control
* Note : should happen only once, at workspace first initialization
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t byteSize, size_t alignment)
{
size_t const mask = alignment - 1;
size_t const surplus = (alignment > sizeof(void*)) ? alignment - sizeof(void*) : 0;
void* const start = ZSTD_cwksp_reserve_object(ws, byteSize + surplus);
if (start == NULL) return NULL;
if (surplus == 0) return start;
assert(ZSTD_isPower2(alignment));
return (void*)(((size_t)start + surplus) & ~mask);
}
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
{
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. */
* space every time we mark it dirty.
* Since tableValidEnd space and initOnce space may overlap we don't poison
* the initOnce portion as it break its promise. This means that this poisoning
* check isn't always applied fully. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
__msan_poison(ws->objectEnd, size);
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
__msan_poison(ws->objectEnd, size);
} else {
assert(ws->initOnceStart >= ws->objectEnd);
__msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
}
}
#endif
@ -381,7 +588,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
}
ZSTD_cwksp_mark_tables_clean(ws);
}
@ -390,11 +597,16 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
* Invalidates table allocations.
* All other allocations remain valid.
*/
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws)
{
DEBUGLOG(4, "cwksp: clearing tables!");
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
{
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* We don't do this when the workspace is statically allocated, because
* when that is the case, we have no capability to hook into the end of the
* workspace's lifecycle to unpoison the memory.
*/
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
@ -411,77 +623,96 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing!");
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context reuse logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
* the workspace (or at least the non-static, non-table parts of it)
* every time we start a new compression. */
* the workspace except for the areas in which we expect memory reuse
* without initialization (objects, valid tables area and init once
* memory). */
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
}
}
#endif
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
{
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* We don't do this when the workspace is statically allocated, because
* when that is the case, we have no capability to hook into the end of the
* workspace's lifecycle to unpoison the memory.
*/
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
#endif
ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd;
ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
ws->phase = ZSTD_cwksp_alloc_buffers;
if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
}
/**
* The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed
* buffer, if present, must be separately freed).
*/
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
ws->workspace = start;
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws);
ws->workspaceOversizedDuration = 0;
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
void* workspace = ZSTD_malloc(size, customMem);
void* workspace = ZSTD_customMalloc(size, customMem);
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
ZSTD_cwksp_init(ws, workspace, size);
ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
return 0;
}
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace");
memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_free(ptr, customMem);
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
if (ptr != NULL && customMem.customFree != NULL) {
__msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
}
#endif
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_customFree(ptr, customMem);
}
/**
* Moves the management of a workspace from one cwksp to another. The src cwksp
* is left in an invalid state (src must be re-init()'ed before its used again).
* is left in an invalid state (src must be re-init()'ed before it's used again).
*/
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
*dst = *src;
memset(src, 0, sizeof(ZSTD_cwksp));
}
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
}
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
@ -492,6 +723,18 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Functions Checking Free Space
***************************************/
/* ZSTD_alignmentSpaceWithinBounds() :
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used.
*/
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
* the alignment bytes difference between estimation and actual usage */
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
ZSTD_cwksp_used(ws) <= estimatedSpace;
}
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
}
@ -519,8 +762,4 @@ MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
}
}
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_CWKSP_H */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,8 +11,49 @@
#include "zstd_compress_internal.h"
#include "zstd_double_fast.h"
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashLarge = ms->hashTable;
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
/* Always insert every fastHashFillStep position into the hash tables.
* Insert the other positions into the large hash table if their entry
* is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
if (i == 0) {
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
}
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
}
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
}
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -32,27 +72,263 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
* is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
U32 const current = (U32)(ip - base);
U32 const curr = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
if (i == 0)
hashSmall[smHash] = current + i;
hashSmall[smHash] = curr + i;
if (i == 0 || hashLarge[lgHash] == 0)
hashLarge[lgHash] = current + i;
hashLarge[lgHash] = curr + i;
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
} }
}
void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
}
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_doubleFast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls /* template */)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32* const hashLong = ms->hashTable;
const U32 hBitsL = cParams->hashLog;
U32* const hashSmall = ms->chainTable;
const U32 hBitsS = cParams->chainLog;
const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
/* presumes that, if there is a dictionary, it must be using Attach mode */
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t mLength;
U32 offset;
U32 curr;
/* how many positions to search before increasing step size */
const size_t kStepIncr = 1 << kSearchStrength;
/* the position at which to increment the step size if no match is found */
const BYTE* nextStep;
size_t step; /* the current step size */
size_t hl0; /* the long hash at ip */
size_t hl1; /* the long hash at ip1 */
U32 idxl0; /* the long match index for ip */
U32 idxl1; /* the long match index for ip1 */
const BYTE* matchl0; /* the long match for ip */
const BYTE* matchs0; /* the short match for ip */
const BYTE* matchl1; /* the long match for ip1 */
const BYTE* matchs0_safe; /* matchs0 or safe address */
const BYTE* ip = istart; /* the current position */
const BYTE* ip1; /* the next position */
/* Array of ~random data, should have low probability of matching data
* we load from here instead of from tables, if matchl0/matchl1 are
* invalid indices. Used to avoid unpredictable branches. */
const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
/* init */
ip += ((ip - prefixLowest) == 0);
{
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}
/* Outer Loop: one iteration per match found and stored */
while (1) {
step = 1;
nextStep = ip + kStepIncr;
ip1 = ip + step;
if (ip1 > ilimit) {
goto _cleanup;
}
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
idxl0 = hashLong[hl0];
matchl0 = base + idxl0;
/* Inner Loop: one iteration per search / position */
do {
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
const U32 idxs0 = hashSmall[hs0];
curr = (U32)(ip-base);
matchs0 = base + idxs0;
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
/* check noDict repcode */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored;
}
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
/* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch.
* However expression below complies into conditional move. Since
* match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
* if there is a match, all branches become predictable. */
{ const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
/* check prefix long match */
if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
offset = (U32)(ip-matchl0);
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
goto _match_found;
} }
idxl1 = hashLong[hl1];
matchl1 = base + idxl1;
/* Same optimization as matchl0 above */
matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]);
/* check prefix short match */
if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) {
goto _search_next_long;
}
if (ip1 >= nextStep) {
PREFETCH_L1(ip1 + 64);
PREFETCH_L1(ip1 + 128);
step++;
nextStep += kStepIncr;
}
ip = ip1;
ip1 += step;
hl0 = hl1;
idxl0 = idxl1;
matchl0 = matchl1;
#if defined(__aarch64__)
PREFETCH_L1(ip+256);
#endif
} while (ip1 <= ilimit);
_cleanup:
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */
return (size_t)(iend - anchor);
_search_next_long:
/* short match found: let's check for a longer one */
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
offset = (U32)(ip - matchs0);
/* check long match at +1 position */
if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) {
size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8;
if (l1len > mLength) {
/* use the long match instead */
ip = ip1;
mLength = l1len;
offset = (U32)(ip-matchl1);
matchs0 = matchl1;
}
}
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */
/* fall-through */
_match_found: /* requires ip, offset, mLength */
offset_2 = offset_1;
offset_1 = offset;
if (step < 4) {
/* It is unsafe to write this value back to the hashtable when ip1 is
* greater than or equal to the new ip we will have after we're done
* processing this match. Rather than perform that test directly
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
* more predictable test. The minmatch even if we take a short match is
* 4 bytes, so as long as step, the distance between ip and ip1
* (initially) is less than 4, we know ip1 < new ip. */
hashLong[hl1] = (U32)(ip1 - base);
}
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored:
/* match found */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = curr+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
}
}
}
}
FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
U32 const mls /* template */)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32* const hashLong = ms->hashTable;
@ -70,57 +346,39 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams =
dictMode == ZSTD_dictMatchState ?
&dms->cParams : NULL;
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
dms->hashTable : NULL;
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
dms->chainTable : NULL;
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
dms->window.base : NULL;
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
dictBase + dictStartIndex : NULL;
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
dictCParams->hashLog : hBitsL;
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
dictCParams->chainLog : hBitsS;
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
const U32* const dictHashLong = dms->hashTable;
const U32* const dictHashSmall = dms->chainTable;
const U32 dictStartIndex = dms->window.dictLimit;
const BYTE* const dictBase = dms->window.base;
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
/* if a dictionary is attached, it must be within window range */
if (dictMode == ZSTD_dictMatchState) {
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes);
PREFETCH_AREA(dictHashSmall, chainTableBytes);
}
/* init */
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
if (dictMode == ZSTD_dictMatchState) {
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
}
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@ -128,69 +386,60 @@ size_t ZSTD_compressBlock_doubleFast_generic(
U32 offset;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const current = (U32)(ip-base);
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
U32 const curr = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2];
U32 matchIndexS = hashSmall[h];
const BYTE* matchLong = base + matchIndexL;
const BYTE* match = base + matchIndexS;
const U32 repIndex = current + 1 - offset_1;
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
&& repIndex < prefixLowestIndex) ?
const U32 repIndex = curr + 1 - offset_1;
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
/* check dictMatchState repcode */
if (dictMode == ZSTD_dictMatchState
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
/* check repcode */
if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored;
}
/* check noDict repcode */
if ( dictMode == ZSTD_noDict
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
if (matchIndexL > prefixLowestIndex) {
if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
/* check prefix long match */
if (MEM_read64(matchLong) == MEM_read64(ip)) {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
offset = (U32)(ip-matchLong);
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found;
}
} else if (dictMode == ZSTD_dictMatchState) {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
offset = (U32)(ip-matchLong);
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found;
} else if (dictTagsMatchL) {
/* check dictMatchState long match */
U32 const dictMatchIndexL = dictHashLong[dictHL];
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
assert(dictMatchL < dictEnd);
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
goto _match_found;
} }
if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */
/* short match candidate */
if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
}
} else if (dictMode == ZSTD_dictMatchState) {
} else if (dictTagsMatchS) {
/* check dictMatchState short match */
U32 const dictMatchIndexS = dictHashSmall[dictHS];
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
match = dictBase + dictMatchIndexS;
matchIndexS = dictMatchIndexS + dictIndexDelta;
@ -205,39 +454,38 @@ size_t ZSTD_compressBlock_doubleFast_generic(
continue;
_search_next_long:
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = current + 1;
hashLong[hl3] = curr + 1;
/* check prefix long +1 match */
if (matchIndexL3 > prefixLowestIndex) {
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
ip++;
offset = (U32)(ip-matchL3);
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found;
}
} else if (dictMode == ZSTD_dictMatchState) {
if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
ip++;
offset = (U32)(ip-matchL3);
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found;
} else if (dictTagsMatchL3) {
/* check dict long +1 match */
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
assert(dictMatchL3 < dictEnd);
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
ip++;
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
goto _match_found;
} } }
/* if no long +1 match, explore the short match we found */
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
if (matchIndexS < prefixLowestIndex) {
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
offset = (U32)(current - matchIndexS);
offset = (U32)(curr - matchIndexS);
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
} else {
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@ -245,13 +493,11 @@ _search_next_long:
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
/* fall-through */
_match_found:
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored:
/* match found */
@ -261,7 +507,7 @@ _match_stored:
if (ip <= ilimit) {
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = current+2;
{ U32 const indexToInsert = curr+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@ -269,56 +515,58 @@ _match_stored:
}
/* check immediate repcode */
if (dictMode == ZSTD_dictMatchState) {
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
&& repIndex2 < prefixLowestIndex ?
dictBase + repIndex2 - dictIndexDelta :
base + repIndex2;
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
} }
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
dictBase + repIndex2 - dictIndexDelta :
base + repIndex2;
if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
continue;
}
break;
}
}
} /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1;
rep[1] = offset_2;
/* Return the last literals size */
return (size_t)(iend - anchor);
}
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
}
ZSTD_GEN_DFAST_FN(noDict, 4)
ZSTD_GEN_DFAST_FN(noDict, 5)
ZSTD_GEN_DFAST_FN(noDict, 6)
ZSTD_GEN_DFAST_FN(noDict, 7)
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
const U32 mls = ms->cParams.minMatch;
@ -326,19 +574,19 @@ size_t ZSTD_compressBlock_doubleFast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
}
}
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
const U32 mls = ms->cParams.minMatch;
@ -346,19 +594,21 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls /* template */)
{
@ -388,7 +638,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@ -402,31 +652,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
const BYTE* matchLong = matchLongBase + matchLongIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
const U32 curr = (U32)(ip-base);
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
size_t mLength;
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
& (offset_1 < current+1 - dictStartIndex)) /* note: we are searching at current+1 */
if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
U32 offset;
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
offset = current - matchLongIndex;
offset = curr - matchLongIndex;
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -434,24 +684,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
const BYTE* match3 = match3Base + matchIndex3;
U32 offset;
hashLong[h3] = current + 1;
hashLong[h3] = curr + 1;
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
ip++;
offset = current+1 - matchIndex3;
offset = curr+1 - matchIndex3;
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
} else {
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
offset = current - matchIndex;
offset = curr - matchIndex;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -465,7 +715,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
if (ip <= ilimit) {
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = current+2;
{ U32 const indexToInsert = curr+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@ -477,13 +727,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
& (offset_2 < current2 - dictStartIndex))
if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
& (offset_2 <= current2 - dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@ -501,9 +751,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
return (size_t)(iend - anchor);
}
ZSTD_GEN_DFAST_FN(extDict, 4)
ZSTD_GEN_DFAST_FN(extDict, 5)
ZSTD_GEN_DFAST_FN(extDict, 6)
ZSTD_GEN_DFAST_FN(extDict, 7)
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
U32 const mls = ms->cParams.minMatch;
@ -511,12 +765,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,28 +11,32 @@
#ifndef ZSTD_DOUBLE_FAST_H
#define ZSTD_DOUBLE_FAST_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#endif
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
#else
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
#endif /* ZSTD_DOUBLE_FAST_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,27 +11,20 @@
#ifndef ZSTD_FAST_H
#define ZSTD_FAST_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_FAST_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,57 +11,183 @@
#ifndef ZSTD_LAZY_H
#define ZSTD_LAZY_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_compress_internal.h"
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
/**
* Dedicated Dictionary Search Structure bucket log. In the
* ZSTD_dedicatedDictSearch mode, the hashTable has
* 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
* one.
*/
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip);
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
#endif
size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
size_t ZSTD_compressBlock_greedy_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_GREEDY NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_LAZY NULL
#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy2(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btlazy2(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
#else
#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
#endif
#endif /* ZSTD_LAZY_H */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,37 +11,157 @@
#include "zstd_ldm.h"
#include "../common/debug.h"
#include "../common/xxhash.h"
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
#include "zstd_ldm_geartab.h"
#define LDM_BUCKET_SIZE_LOG 3
#define LDM_BUCKET_SIZE_LOG 4
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_RLOG 7
#define LDM_HASH_CHAR_OFFSET 10
typedef struct {
U64 rolling;
U64 stopMask;
} ldmRollingHashState_t;
/** ZSTD_ldm_gear_init():
*
* Initializes the rolling hash state such that it will honor the
* settings in params. */
static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
{
unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
unsigned hashRateLog = params->hashRateLog;
state->rolling = ~(U32)0;
/* The choice of the splitting criterion is subject to two conditions:
* 1. it has to trigger on average every 2^(hashRateLog) bytes;
* 2. ideally, it has to depend on a window of minMatchLength bytes.
*
* In the gear hash algorithm, bit n depends on the last n bytes;
* so in order to obtain a good quality splitting criterion it is
* preferable to use bits with high weight.
*
* To match condition 1 we use a mask with hashRateLog bits set
* and, because of the previous remark, we make sure these bits
* have the highest possible weight while still respecting
* condition 2.
*/
if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
} else {
/* In this degenerate case we simply honor the hash rate. */
state->stopMask = ((U64)1 << hashRateLog) - 1;
}
}
/** ZSTD_ldm_gear_reset()
* Feeds [data, data + minMatchLength) into the hash without registering any
* splits. This effectively resets the hash state. This is used when skipping
* over data, either at the beginning of a block, or skipping sections.
*/
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
BYTE const* data, size_t minMatchLength)
{
U64 hash = state->rolling;
size_t n = 0;
#define GEAR_ITER_ONCE() do { \
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
n += 1; \
} while (0)
while (n + 3 < minMatchLength) {
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
}
while (n < minMatchLength) {
GEAR_ITER_ONCE();
}
#undef GEAR_ITER_ONCE
}
/** ZSTD_ldm_gear_feed():
*
* Registers in the splits array all the split points found in the first
* size bytes following the data pointer. This function terminates when
* either all the data has been processed or LDM_BATCH_SIZE splits are
* present in the splits array.
*
* Precondition: The splits array must not be full.
* Returns: The number of bytes processed. */
static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
BYTE const* data, size_t size,
size_t* splits, unsigned* numSplits)
{
size_t n;
U64 hash, mask;
hash = state->rolling;
mask = state->stopMask;
n = 0;
#define GEAR_ITER_ONCE() do { \
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
n += 1; \
if (UNLIKELY((hash & mask) == 0)) { \
splits[*numSplits] = n; \
*numSplits += 1; \
if (*numSplits == LDM_BATCH_SIZE) \
goto done; \
} \
} while (0)
while (n + 3 < size) {
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
}
while (n < size) {
GEAR_ITER_ONCE();
}
#undef GEAR_ITER_ONCE
done:
state->rolling = hash;
return n;
}
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
ZSTD_compressionParameters const* cParams)
const ZSTD_compressionParameters* cParams)
{
params->windowLog = cParams->windowLog;
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
if (cParams->strategy >= ZSTD_btopt) {
/* Get out of the way of the optimal parser */
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
params->minMatchLength = minMatch;
if (params->hashRateLog == 0) {
if (params->hashLog > 0) {
/* if params->hashLog is set, derive hashRateLog from it */
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
if (params->windowLog > params->hashLog) {
params->hashRateLog = params->windowLog - params->hashLog;
}
} else {
assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
/* mapping from [fast, rate7] to [btultra2, rate4] */
params->hashRateLog = 7 - (cParams->strategy/3);
}
}
if (params->hashLog == 0) {
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX);
}
if (params->hashRateLog == 0) {
params->hashRateLog = params->windowLog < params->hashLog
? 0
: params->windowLog - params->hashLog;
if (params->minMatchLength == 0) {
params->minMatchLength = LDM_MIN_MATCH_LENGTH;
if (cParams->strategy >= ZSTD_btultra)
params->minMatchLength /= 2;
}
if (params->bucketSizeLog==0) {
assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX);
}
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
}
@ -54,95 +173,34 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0;
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
}
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
{
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
}
/** ZSTD_ldm_getSmallHash() :
* numBits should be <= 32
* If numBits==0, returns 0.
* @return : the most significant numBits of value. */
static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
{
assert(numBits <= 32);
return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
}
/** ZSTD_ldm_getChecksum() :
* numBitsToDiscard should be <= 32
* @return : the next most significant 32 bits after numBitsToDiscard */
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
{
assert(numBitsToDiscard <= 32);
return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
}
/** ZSTD_ldm_getTag() ;
* Given the hash, returns the most significant numTagBits bits
* after (32 + hbits) bits.
*
* If there are not enough bits remaining, return the last
* numTagBits bits. */
static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
{
assert(numTagBits < 32 && hbits <= 32);
if (32 - hbits < numTagBits) {
return hash & (((U32)1 << numTagBits) - 1);
} else {
return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
}
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
}
/** ZSTD_ldm_getBucket() :
* Returns a pointer to the start of the bucket associated with hash. */
static ldmEntry_t* ZSTD_ldm_getBucket(
ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
const ldmState_t* ldmState, size_t hash, U32 const bucketSizeLog)
{
return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
return ldmState->hashTable + (hash << bucketSizeLog);
}
/** ZSTD_ldm_insertEntry() :
* Insert the entry with corresponding hash into the hash table */
static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
size_t const hash, const ldmEntry_t entry,
ldmParams_t const ldmParams)
U32 const bucketSizeLog)
{
BYTE* const bucketOffsets = ldmState->bucketOffsets;
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
bucketOffsets[hash]++;
bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
}
BYTE* const pOffset = ldmState->bucketOffsets + hash;
unsigned const offset = *pOffset;
*(ZSTD_ldm_getBucket(ldmState, hash, bucketSizeLog) + offset) = entry;
*pOffset = (BYTE)((offset + 1) & ((1u << bucketSizeLog) - 1));
/** ZSTD_ldm_makeEntryAndInsertByTag() :
*
* Gets the small hash, checksum, and tag from the rollingHash.
*
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
* creates an ldmEntry from the offset, and inserts it into the hash table.
*
* hBits is the length of the small hash, which is the most significant hBits
* of rollingHash. The checksum is the next 32 most significant bits, followed
* by ldmParams.hashRateLog bits that make up the tag. */
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
U64 const rollingHash,
U32 const hBits,
U32 const offset,
ldmParams_t const ldmParams)
{
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
if (tag == tagMask) {
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
ldmEntry_t entry;
entry.offset = offset;
entry.checksum = checksum;
ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
}
}
/** ZSTD_ldm_countBackwardsMatch() :
@ -151,10 +209,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
static size_t ZSTD_ldm_countBackwardsMatch(
const BYTE* pIn, const BYTE* pAnchor,
const BYTE* pMatch, const BYTE* pBase)
const BYTE* pMatch, const BYTE* pMatchBase)
{
size_t matchLength = 0;
while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
pIn--;
pMatch--;
matchLength++;
@ -162,6 +220,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
return matchLength;
}
/** ZSTD_ldm_countBackwardsMatch_2segments() :
* Returns the number of bytes that match backwards from pMatch,
* even with the backwards match spanning 2 different segments.
*
* On reaching `pMatchBase`, start counting from mEnd */
static size_t ZSTD_ldm_countBackwardsMatch_2segments(
const BYTE* pIn, const BYTE* pAnchor,
const BYTE* pMatch, const BYTE* pMatchBase,
const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
{
size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
/* If backwards match is entirely in the extDict or prefix, immediately return */
return matchLength;
}
DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
DEBUGLOG(7, "final backwards match length = %zu", matchLength);
return matchLength;
}
/** ZSTD_ldm_fillFastTables() :
*
* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
@ -169,7 +248,7 @@ static size_t ZSTD_ldm_countBackwardsMatch(
*
* The tables for the other strategies are filled within their
* block compressors. */
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
static size_t ZSTD_ldm_fillFastTables(ZSTD_MatchState_t* ms,
void const* end)
{
const BYTE* const iend = (const BYTE*)end;
@ -177,11 +256,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
switch(ms->cParams.strategy)
{
case ZSTD_fast:
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break;
case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
#else
assert(0); /* shouldn't be called: cparams should've been adjusted. */
#endif
break;
case ZSTD_greedy:
@ -199,43 +282,43 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
return 0;
}
/** ZSTD_ldm_fillLdmHashTable() :
*
* Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
* lastHash is the rolling hash that corresponds to lastHashed.
*
* Returns the rolling hash corresponding to position iend-1. */
static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
U64 lastHash, const BYTE* lastHashed,
const BYTE* iend, const BYTE* base,
U32 hBits, ldmParams_t const ldmParams)
{
U64 rollingHash = lastHash;
const BYTE* cur = lastHashed + 1;
while (cur < iend) {
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
cur[ldmParams.minMatchLength-1],
state->hashPower);
ZSTD_ldm_makeEntryAndInsertByTag(state,
rollingHash, hBits,
(U32)(cur - base), ldmParams);
++cur;
}
return rollingHash;
}
void ZSTD_ldm_fillHashTable(
ldmState_t* state, const BYTE* ip,
ldmState_t* ldmState, const BYTE* ip,
const BYTE* iend, ldmParams_t const* params)
{
U32 const minMatchLength = params->minMatchLength;
U32 const bucketSizeLog = params->bucketSizeLog;
U32 const hBits = params->hashLog - bucketSizeLog;
BYTE const* const base = ldmState->window.base;
BYTE const* const istart = ip;
ldmRollingHashState_t hashState;
size_t* const splits = ldmState->splitIndices;
unsigned numSplits;
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
if ((size_t)(iend - ip) >= params->minMatchLength) {
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
ZSTD_ldm_fillLdmHashTable(
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
params->hashLog - params->bucketSizeLog,
*params);
ZSTD_ldm_gear_init(&hashState, params);
while (ip < iend) {
size_t hashed;
unsigned n;
numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, (size_t)(iend - ip), splits, &numSplits);
for (n = 0; n < numSplits; n++) {
if (ip + splits[n] >= istart + minMatchLength) {
BYTE const* const split = ip + splits[n] - minMatchLength;
U64 const xxhash = XXH64(split, minMatchLength, 0);
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
ldmEntry_t entry;
entry.offset = (U32)(split - base);
entry.checksum = (U32)(xxhash >> 32);
ZSTD_ldm_insertEntry(ldmState, hash, entry, params->bucketSizeLog);
}
}
ip += hashed;
}
}
@ -245,27 +328,26 @@ void ZSTD_ldm_fillHashTable(
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
* if it is far way
* (after a long match, only update tables a limited amount). */
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
static void ZSTD_ldm_limitTableUpdate(ZSTD_MatchState_t* ms, const BYTE* anchor)
{
U32 const current = (U32)(anchor - ms->window.base);
if (current > ms->nextToUpdate + 1024) {
U32 const curr = (U32)(anchor - ms->window.base);
if (curr > ms->nextToUpdate + 1024) {
ms->nextToUpdate =
current - MIN(512, current - ms->nextToUpdate - 1024);
curr - MIN(512, curr - ms->nextToUpdate - 1024);
}
}
static size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, RawSeqStore_t* rawSeqStore,
ldmParams_t const* params, void const* src, size_t srcSize)
{
/* LDM parameters */
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
U32 const minMatchLength = params->minMatchLength;
U64 const hashPower = ldmState->hashPower;
U32 const entsPerBucket = 1U << params->bucketSizeLog;
U32 const hBits = params->hashLog - params->bucketSizeLog;
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
U32 const hashRateLog = params->hashRateLog;
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
/* Prefix and extDict parameters */
U32 const dictLimit = ldmState->window.dictLimit;
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@ -277,45 +359,69 @@ static size_t ZSTD_ldm_generateSequences_internal(
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize;
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
BYTE const* const ilimit = iend - HASH_READ_SIZE;
/* Input positions */
BYTE const* anchor = istart;
BYTE const* ip = istart;
/* Rolling hash */
BYTE const* lastHashed = NULL;
U64 rollingHash = 0;
/* Rolling hash state */
ldmRollingHashState_t hashState;
/* Arrays for staged-processing */
size_t* const splits = ldmState->splitIndices;
ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
unsigned numSplits;
while (ip <= ilimit) {
size_t mLength;
U32 const current = (U32)(ip - base);
size_t forwardMatchLength = 0, backwardMatchLength = 0;
ldmEntry_t* bestEntry = NULL;
if (ip != istart) {
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
lastHashed[minMatchLength],
hashPower);
} else {
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
}
lastHashed = ip;
if (srcSize < minMatchLength)
return iend - anchor;
/* Do not insert and do not look for a match */
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
ip++;
continue;
/* Initialize the rolling hash state with the first minMatchLength bytes */
ZSTD_ldm_gear_init(&hashState, params);
ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
ip += minMatchLength;
while (ip < ilimit) {
size_t hashed;
unsigned n;
numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
splits, &numSplits);
for (n = 0; n < numSplits; n++) {
BYTE const* const split = ip + splits[n] - minMatchLength;
U64 const xxhash = XXH64(split, minMatchLength, 0);
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
candidates[n].split = split;
candidates[n].hash = hash;
candidates[n].checksum = (U32)(xxhash >> 32);
candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, params->bucketSizeLog);
PREFETCH_L1(candidates[n].bucket);
}
/* Get the best entry and compute the match lengths */
{
ldmEntry_t* const bucket =
ZSTD_ldm_getBucket(ldmState,
ZSTD_ldm_getSmallHash(rollingHash, hBits),
*params);
ldmEntry_t* cur;
size_t bestMatchLength = 0;
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
for (n = 0; n < numSplits; n++) {
size_t forwardMatchLength = 0, backwardMatchLength = 0,
bestMatchLength = 0, mLength;
U32 offset;
BYTE const* const split = candidates[n].split;
U32 const checksum = candidates[n].checksum;
U32 const hash = candidates[n].hash;
ldmEntry_t* const bucket = candidates[n].bucket;
ldmEntry_t const* cur;
ldmEntry_t const* bestEntry = NULL;
ldmEntry_t newEntry;
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
newEntry.offset = (U32)(split - base);
newEntry.checksum = checksum;
/* If a split point would generate a sequence overlapping with
* the previous one, we merely register it in the hash table and
* move on */
if (split < anchor) {
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
continue;
}
for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
size_t curForwardMatchLength, curBackwardMatchLength,
curTotalMatchLength;
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
@ -329,30 +435,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
cur->offset < dictLimit ? dictEnd : iend;
BYTE const* const lowMatchPtr =
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
curForwardMatchLength = ZSTD_count_2segments(
ip, pMatch, iend,
matchEnd, lowPrefixPtr);
curForwardMatchLength =
ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
if (curForwardMatchLength < minMatchLength) {
continue;
}
curBackwardMatchLength =
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
lowMatchPtr);
curTotalMatchLength = curForwardMatchLength +
curBackwardMatchLength;
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
} else { /* !extDict */
BYTE const* const pMatch = base + cur->offset;
curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
curForwardMatchLength = ZSTD_count(split, pMatch, iend);
if (curForwardMatchLength < minMatchLength) {
continue;
}
curBackwardMatchLength =
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
lowPrefixPtr);
curTotalMatchLength = curForwardMatchLength +
curBackwardMatchLength;
ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
}
curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
if (curTotalMatchLength > bestMatchLength) {
bestMatchLength = curTotalMatchLength;
@ -361,57 +460,54 @@ static size_t ZSTD_ldm_generateSequences_internal(
bestEntry = cur;
}
}
}
/* No match found -- continue searching */
if (bestEntry == NULL) {
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
hBits, current,
*params);
ip++;
continue;
}
/* No match found -- insert an entry into the hash table
* and process the next candidate match */
if (bestEntry == NULL) {
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
continue;
}
/* Match found */
mLength = forwardMatchLength + backwardMatchLength;
ip -= backwardMatchLength;
/* Match found */
offset = (U32)(split - base) - bestEntry->offset;
mLength = forwardMatchLength + backwardMatchLength;
{
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
{
/* Store the sequence:
* ip = current - backwardMatchLength
* The match is at (bestEntry->offset - backwardMatchLength)
/* Out of sequence storage */
if (rawSeqStore->size == rawSeqStore->capacity)
return ERROR(dstSize_tooSmall);
seq->litLength = (U32)(split - backwardMatchLength - anchor);
seq->matchLength = (U32)mLength;
seq->offset = offset;
rawSeqStore->size++;
}
/* Insert the current entry into the hash table --- it must be
* done after the previous block to avoid clobbering bestEntry */
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
anchor = split + forwardMatchLength;
/* If we find a match that ends after the data that we've hashed
* then we have a repeating, overlapping, pattern. E.g. all zeros.
* If one repetition of the pattern matches our `stopMask` then all
* repetitions will. We don't need to insert them all into out table,
* only the first one. So skip over overlapping matches.
* This is a major speed boost (20x) for compressing a single byte
* repeated, when that byte ends up in the table.
*/
U32 const matchIndex = bestEntry->offset;
U32 const offset = current - matchIndex;
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
/* Out of sequence storage */
if (rawSeqStore->size == rawSeqStore->capacity)
return ERROR(dstSize_tooSmall);
seq->litLength = (U32)(ip - anchor);
seq->matchLength = (U32)mLength;
seq->offset = offset;
rawSeqStore->size++;
if (anchor > ip + hashed) {
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
/* Continue the outer loop at anchor (ip + hashed == anchor). */
ip = anchor - hashed;
break;
}
}
/* Insert the current entry into the hash table */
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
(U32)(lastHashed - base),
*params);
assert(ip + backwardMatchLength == lastHashed);
/* Fill the hash table from lastHashed+1 to ip+mLength*/
/* Heuristic: don't need to fill the entire table at end of block */
if (ip + mLength <= ilimit) {
rollingHash = ZSTD_ldm_fillLdmHashTable(
ldmState, rollingHash, lastHashed,
ip + mLength, base, hBits, *params);
lastHashed = ip + mLength - 1;
}
ip += mLength;
anchor = ip;
ip += hashed;
}
return iend - anchor;
}
@ -428,7 +524,7 @@ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
}
size_t ZSTD_ldm_generateSequences(
ldmState_t* ldmState, rawSeqStore_t* sequences,
ldmState_t* ldmState, RawSeqStore_t* sequences,
ldmParams_t const* params, void const* src, size_t srcSize)
{
U32 const maxDist = 1U << params->windowLog;
@ -460,7 +556,7 @@ size_t ZSTD_ldm_generateSequences(
assert(chunkStart < iend);
/* 1. Perform overflow correction if necessary. */
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@ -474,7 +570,7 @@ size_t ZSTD_ldm_generateSequences(
* the window through early invalidation.
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
* offset against maxDist directly.
*
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
* that any offset used is valid at the END of the sequence, since it may
@ -504,7 +600,9 @@ size_t ZSTD_ldm_generateSequences(
return 0;
}
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
void
ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
{
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
if (srcSize <= seq->litLength) {
@ -539,7 +637,7 @@ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 cons
* Returns the current sequence to handle, or if the rest of the block should
* be literals, it returns a sequence with offset == 0.
*/
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
static rawSeq maybeSplitSequence(RawSeqStore_t* rawSeqStore,
U32 const remaining, U32 const minMatch)
{
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
@ -563,14 +661,32 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
return sequence;
}
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) {
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
if (currPos >= currSeq.litLength + currSeq.matchLength) {
currPos -= currSeq.litLength + currSeq.matchLength;
rawSeqStore->pos++;
} else {
rawSeqStore->posInSequence = currPos;
break;
}
}
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
rawSeqStore->posInSequence = 0;
}
}
size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_ParamSwitch_e useRowMatchFinder,
void const* src, size_t srcSize)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
unsigned const minMatch = cParams->minMatch;
ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
ZSTD_BlockCompressor_f const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize;
@ -578,14 +694,22 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
BYTE const* ip = istart;
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
/* If using opt parser, use LDMs only as candidates rather than always accepting them */
if (cParams->strategy >= ZSTD_btopt) {
size_t lastLLSize;
ms->ldmSeqStore = rawSeqStore;
lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
return lastLLSize;
}
assert(rawSeqStore->pos <= rawSeqStore->size);
assert(rawSeqStore->size <= rawSeqStore->capacity);
/* Loop through each sequence and apply the block compressor to the lits */
/* Loop through each sequence and apply the block compressor to the literals */
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
/* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
(U32)(iend - ip), minMatch);
int i;
/* End signal */
if (sequence.offset == 0)
break;
@ -598,6 +722,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* Run the block compressor */
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{
int i;
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
ip += sequence.litLength;
@ -607,8 +732,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[0] = sequence.offset;
/* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH);
OFFSET_TO_OFFBASE(sequence.offset),
sequence.matchLength);
ip += sequence.matchLength;
}
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,10 +11,6 @@
#ifndef ZSTD_LDM_H
#define ZSTD_LDM_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_compress_internal.h" /* ldmParams_t, U32 */
#include "../zstd.h" /* ZSTD_CCtx, size_t */
@ -44,7 +39,7 @@ void ZSTD_ldm_fillHashTable(
* sequences.
*/
size_t ZSTD_ldm_generateSequences(
ldmState_t* ldms, rawSeqStore_t* sequences,
ldmState_t* ldms, RawSeqStore_t* sequences,
ldmParams_t const* params, void const* src, size_t srcSize);
/**
@ -65,8 +60,9 @@ size_t ZSTD_ldm_generateSequences(
* two. We handle that case correctly, and update `rawSeqStore` appropriately.
* NOTE: This function does not return any errors.
*/
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_ParamSwitch_e useRowMatchFinder,
void const* src, size_t srcSize);
/**
@ -74,11 +70,17 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
*
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
* Avoids emitting matches less than `minMatch` bytes.
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
* Must be called for data that is not passed to ZSTD_ldm_blockCompress().
*/
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
void ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize,
U32 const minMatch);
/* ZSTD_ldm_skipRawSeqStoreBytes():
* Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
* Not to be used in conjunction with ZSTD_ldm_skipSequences().
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
*/
void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes);
/** ZSTD_ldm_getTableSize() :
* Estimate the space needed for long distance matching tables or 0 if LDM is
@ -104,8 +106,4 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
ZSTD_compressionParameters const* cParams);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_FAST_H */

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_LDM_GEARTAB_H
#define ZSTD_LDM_GEARTAB_H
#include "../common/compiler.h" /* UNUSED_ATTR */
#include "../common/mem.h" /* U64 */
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140,
0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e,
0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b,
0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec,
0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab,
0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c,
0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a,
0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756,
0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
0x2b4da14f2613d8f4
};
#endif /* ZSTD_LDM_GEARTAB_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -12,46 +11,62 @@
#ifndef ZSTD_OPT_H
#define ZSTD_OPT_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_compress_internal.h"
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
/* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend);
#endif
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
#else
#define ZSTD_COMPRESSBLOCK_BTOPT NULL
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
#endif
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btultra(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */
size_t ZSTD_compressBlock_btultra2(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
#else
#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
#endif
#endif /* ZSTD_OPT_H */

View File

@ -0,0 +1,238 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "../common/compiler.h" /* ZSTD_ALIGNOF */
#include "../common/mem.h" /* S64 */
#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
#include "hist.h" /* HIST_add */
#include "zstd_preSplit.h"
#define BLOCKSIZE_MIN 3500
#define THRESHOLD_PENALTY_RATE 16
#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)
#define THRESHOLD_PENALTY 3
#define HASHLENGTH 2
#define HASHLOG_MAX 10
#define HASHTABLESIZE (1 << HASHLOG_MAX)
#define HASHMASK (HASHTABLESIZE - 1)
#define KNUTH 0x9e3779b9
/* for hashLog > 8, hash 2 bytes.
* for hashLog == 8, just take the byte, no hashing.
* The speed of this method relies on compile-time constant propagation */
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
{
assert(hashLog >= 8);
if (hashLog == 8) return (U32)((const BYTE*)p)[0];
assert(hashLog <= HASHLOG_MAX);
return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
}
typedef struct {
unsigned events[HASHTABLESIZE];
size_t nbEvents;
} Fingerprint;
typedef struct {
Fingerprint pastEvents;
Fingerprint newEvents;
} FPStats;
static void initStats(FPStats* fpstats)
{
ZSTD_memset(fpstats, 0, sizeof(FPStats));
}
FORCE_INLINE_TEMPLATE void
addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
const char* p = (const char*)src;
size_t limit = srcSize - HASHLENGTH + 1;
size_t n;
assert(srcSize >= HASHLENGTH);
for (n = 0; n < limit; n+=samplingRate) {
fp->events[hash2(p+n, hashLog)]++;
}
fp->nbEvents += limit/samplingRate;
}
FORCE_INLINE_TEMPLATE void
recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
fp->nbEvents = 0;
addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
}
typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
{ \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
}
ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
{
U64 distance = 0;
size_t n;
assert(hashLog <= HASHLOG_MAX);
for (n = 0; n < ((size_t)1 << hashLog); n++) {
distance +=
abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
}
return distance;
}
/* Compare newEvents with pastEvents
* return 1 when considered "too different"
*/
static int compareFingerprints(const Fingerprint* ref,
const Fingerprint* newfp,
int penalty,
unsigned hashLog)
{
assert(ref->nbEvents > 0);
assert(newfp->nbEvents > 0);
{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
U64 deviation = fpDistance(ref, newfp, hashLog);
U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
return deviation >= threshold;
}
}
static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)
{
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
acc->events[n] += newfp->events[n];
}
acc->nbEvents += newfp->nbEvents;
}
static void flushEvents(FPStats* fpstats)
{
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];
}
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;
ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));
}
static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
{
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
assert(acc->events[n] >= slice->events[n]);
acc->events[n] -= slice->events[n];
}
acc->nbEvents -= slice->nbEvents;
}
#define CHUNKSIZE (8 << 10)
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
static const RecordEvents_f records_fs[] = {
FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
};
static const unsigned hashParams[] = { 8, 9, 10, 10 };
const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
FPStats* const fpstats = (FPStats*)workspace;
const char* p = (const char*)blockStart;
int penalty = THRESHOLD_PENALTY;
size_t pos = 0;
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
initStats(fpstats);
record_f(&fpstats->pastEvents, p, CHUNKSIZE);
for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
return pos;
} else {
mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
if (penalty > 0) penalty--;
}
}
assert(pos == blockSize);
return blockSize;
(void)flushEvents; (void)removeEvents;
}
/* ZSTD_splitBlock_fromBorders(): very fast strategy :
* compare fingerprint from beginning and end of the block,
* derive from their difference if it's preferable to split in the middle,
* repeat the process a second time, for finer grained decision.
* 3 times did not brought improvements, so I stopped at 2.
* Benefits are good enough for a cheap heuristic.
* More accurate splitting saves more, but speed impact is also more perceptible.
* For better accuracy, use more elaborate variant *_byChunks.
*/
static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
void* workspace, size_t wkspSize)
{
#define SEGMENT_SIZE 512
FPStats* const fpstats = (FPStats*)workspace;
Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
initStats(fpstats);
HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
return blockSize;
HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
middleEvents->nbEvents = SEGMENT_SIZE;
{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
return 64 KB;
return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
}
}
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
assert(0<=level && level<=4);
if (level == 0)
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
/* level >= 1*/
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
}

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_PRESPLIT_H
#define ZSTD_PRESPLIT_H
#include <stddef.h> /* size_t */
#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
/* ZSTD_splitBlock():
* @level must be a value between 0 and 4.
* higher levels spend more energy to detect block boundaries.
* @workspace must be aligned for size_t.
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note:
* For the time being, this function only accepts full 128 KB blocks.
* Therefore, @blockSize must be == 128 KB.
* While this could be extended to smaller sizes in the future,
* it is not yet clear if this would be useful. TBD.
*/
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize);
#endif /* ZSTD_PRESPLIT_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,602 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "../common/portability_macros.h"
#if defined(__ELF__) && defined(__GNUC__)
/* Stack marking
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
*/
.section .note.GNU-stack,"",%progbits
#if defined(__aarch64__)
/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
* See: https://github.com/facebook/zstd/issues/3841
* See: https://gcc.godbolt.org/z/sqr5T4ffK
* See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
* See: https://reviews.llvm.org/D62609
*/
.pushsection .note.gnu.property, "a"
.p2align 3
.long 4 /* size of the name - "GNU\0" */
.long 0x10 /* size of descriptor */
.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */
.asciz "GNU"
.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
.long 4 /* pr_datasz - 4 bytes */
.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
.p2align 3 /* pr_padding - bring everything to 8 byte alignment */
.popsection
#endif
#endif
#if ZSTD_ENABLE_ASM_X86_64_BMI2
/* Calling convention:
*
* %rdi (or %rcx on Windows) contains the first argument: HUF_DecompressAsmArgs*.
* %rbp isn't maintained (no frame pointer).
* %rsp contains the stack pointer that grows down.
* No red-zone is assumed, only addresses >= %rsp are used.
* All register contents are preserved.
*/
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
.global HUF_decompress4X1_usingDTable_internal_fast_asm_loop
.global HUF_decompress4X2_usingDTable_internal_fast_asm_loop
.global _HUF_decompress4X1_usingDTable_internal_fast_asm_loop
.global _HUF_decompress4X2_usingDTable_internal_fast_asm_loop
.text
/* Sets up register mappings for clarity.
* op[], bits[], dtable & ip[0] each get their own register.
* ip[1,2,3] & olimit alias var[].
* %rax is a scratch register.
*/
#define op0 rsi
#define op1 rbx
#define op2 rcx
#define op3 rdi
#define ip0 r8
#define ip1 r9
#define ip2 r10
#define ip3 r11
#define bits0 rbp
#define bits1 rdx
#define bits2 r12
#define bits3 r13
#define dtable r14
#define olimit r15
/* var[] aliases ip[1,2,3] & olimit
* ip[1,2,3] are saved every iteration.
* olimit is only used in compute_olimit.
*/
#define var0 r15
#define var1 r9
#define var2 r10
#define var3 r11
/* 32-bit var registers */
#define vard0 r15d
#define vard1 r9d
#define vard2 r10d
#define vard3 r11d
/* Calls X(N) for each stream 0, 1, 2, 3. */
#define FOR_EACH_STREAM(X) \
X(0); \
X(1); \
X(2); \
X(3)
/* Calls X(N, idx) for each stream 0, 1, 2, 3. */
#define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
X(0, idx); \
X(1, idx); \
X(2, idx); \
X(3, idx)
/* Define both _HUF_* & HUF_* symbols because MacOS
* C symbols are prefixed with '_' & Linux symbols aren't.
*/
_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH
/* Save all registers - even if they are callee saved for simplicity. */
push %rax
push %rbx
push %rcx
push %rdx
push %rbp
push %rsi
push %rdi
push %r8
push %r9
push %r10
push %r11
push %r12
push %r13
push %r14
push %r15
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
movq %rcx, %rax
#else
movq %rdi, %rax
#endif
movq 0(%rax), %ip0
movq 8(%rax), %ip1
movq 16(%rax), %ip2
movq 24(%rax), %ip3
movq 32(%rax), %op0
movq 40(%rax), %op1
movq 48(%rax), %op2
movq 56(%rax), %op3
movq 64(%rax), %bits0
movq 72(%rax), %bits1
movq 80(%rax), %bits2
movq 88(%rax), %bits3
movq 96(%rax), %dtable
push %rax /* argument */
push 104(%rax) /* ilowest */
push 112(%rax) /* oend */
push %olimit /* olimit space */
subq $24, %rsp
.L_4X1_compute_olimit:
/* Computes how many iterations we can do safely
* %r15, %rax may be clobbered
* rbx, rdx must be saved
* op3 & ip0 mustn't be clobbered
*/
movq %rbx, 0(%rsp)
movq %rdx, 8(%rsp)
movq 32(%rsp), %rax /* rax = oend */
subq %op3, %rax /* rax = oend - op3 */
/* r15 = (oend - op3) / 5 */
movabsq $-3689348814741910323, %rdx
mulq %rdx
movq %rdx, %r15
shrq $2, %r15
movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %rbx /* rbx = ip0 - ilowest */
/* rdx = (ip0 - ilowest) / 7 */
movabsq $2635249153387078803, %rdx
mulq %rdx
subq %rdx, %rbx
shrq %rbx
addq %rbx, %rdx
shrq $2, %rdx
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
/* r15 = r15 * 5 */
leaq (%r15, %r15, 4), %r15
/* olimit = op3 + r15 */
addq %op3, %olimit
movq 8(%rsp), %rdx
movq 0(%rsp), %rbx
/* If (op3 + 20 > olimit) */
movq %op3, %rax /* rax = op3 */
cmpq %rax, %olimit /* op3 == olimit */
je .L_4X1_exit
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
jb .L_4X1_exit
/* If (ip2 < ip1) go to exit */
cmpq %ip1, %ip2
jb .L_4X1_exit
/* If (ip3 < ip2) go to exit */
cmpq %ip2, %ip3
jb .L_4X1_exit
/* Reads top 11 bits from bits[n]
* Loads dt[bits[n]] into var[n]
*/
#define GET_NEXT_DELT(n) \
movq $53, %var##n; \
shrxq %var##n, %bits##n, %var##n; \
movzwl (%dtable,%var##n,2),%vard##n
/* var[n] must contain the DTable entry computed with GET_NEXT_DELT
* Moves var[n] to %rax
* bits[n] <<= var[n] & 63
* op[n][idx] = %rax >> 8
* %ah is a way to access bits [8, 16) of %rax
*/
#define DECODE_FROM_DELT(n, idx) \
movq %var##n, %rax; \
shlxq %var##n, %bits##n, %bits##n; \
movb %ah, idx(%op##n)
/* Assumes GET_NEXT_DELT has been called.
* Calls DECODE_FROM_DELT then GET_NEXT_DELT
*/
#define DECODE_AND_GET_NEXT(n, idx) \
DECODE_FROM_DELT(n, idx); \
GET_NEXT_DELT(n) \
/* // ctz & nbBytes is stored in bits[n]
* // nbBits is stored in %rax
* ctz = CTZ[bits[n]]
* nbBits = ctz & 7
* nbBytes = ctz >> 3
* op[n] += 5
* ip[n] -= nbBytes
* // Note: x86-64 is little-endian ==> no bswap
* bits[n] = MEM_readST(ip[n]) | 1
* bits[n] <<= nbBits
*/
#define RELOAD_BITS(n) \
bsfq %bits##n, %bits##n; \
movq %bits##n, %rax; \
andq $7, %rax; \
shrq $3, %bits##n; \
leaq 5(%op##n), %op##n; \
subq %bits##n, %ip##n; \
movq (%ip##n), %bits##n; \
orq $1, %bits##n; \
shlx %rax, %bits##n, %bits##n
/* Store clobbered variables on the stack */
movq %olimit, 24(%rsp)
movq %ip1, 0(%rsp)
movq %ip2, 8(%rsp)
movq %ip3, 16(%rsp)
/* Call GET_NEXT_DELT for each stream */
FOR_EACH_STREAM(GET_NEXT_DELT)
.p2align 6
.L_4X1_loop_body:
/* Decode 5 symbols in each of the 4 streams (20 total)
* Must have called GET_NEXT_DELT for each stream
*/
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
/* Load ip[1,2,3] from stack (var[] aliases them)
* ip[] is needed for RELOAD_BITS
* Each will be stored back to the stack after RELOAD
*/
movq 0(%rsp), %ip1
movq 8(%rsp), %ip2
movq 16(%rsp), %ip3
/* Reload each stream & fetch the next table entry
* to prepare for the next iteration
*/
RELOAD_BITS(0)
GET_NEXT_DELT(0)
RELOAD_BITS(1)
movq %ip1, 0(%rsp)
GET_NEXT_DELT(1)
RELOAD_BITS(2)
movq %ip2, 8(%rsp)
GET_NEXT_DELT(2)
RELOAD_BITS(3)
movq %ip3, 16(%rsp)
GET_NEXT_DELT(3)
/* If op3 < olimit: continue the loop */
cmp %op3, 24(%rsp)
ja .L_4X1_loop_body
/* Reload ip[1,2,3] from stack */
movq 0(%rsp), %ip1
movq 8(%rsp), %ip2
movq 16(%rsp), %ip3
/* Re-compute olimit */
jmp .L_4X1_compute_olimit
#undef GET_NEXT_DELT
#undef DECODE_FROM_DELT
#undef DECODE
#undef RELOAD_BITS
.L_4X1_exit:
addq $24, %rsp
/* Restore stack (oend & olimit) */
pop %rax /* olimit */
pop %rax /* oend */
pop %rax /* ilowest */
pop %rax /* arg */
/* Save ip / op / bits */
movq %ip0, 0(%rax)
movq %ip1, 8(%rax)
movq %ip2, 16(%rax)
movq %ip3, 24(%rax)
movq %op0, 32(%rax)
movq %op1, 40(%rax)
movq %op2, 48(%rax)
movq %op3, 56(%rax)
movq %bits0, 64(%rax)
movq %bits1, 72(%rax)
movq %bits2, 80(%rax)
movq %bits3, 88(%rax)
/* Restore registers */
pop %r15
pop %r14
pop %r13
pop %r12
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rbp
pop %rdx
pop %rcx
pop %rbx
pop %rax
ret
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH
/* Save all registers - even if they are callee saved for simplicity. */
push %rax
push %rbx
push %rcx
push %rdx
push %rbp
push %rsi
push %rdi
push %r8
push %r9
push %r10
push %r11
push %r12
push %r13
push %r14
push %r15
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
movq %rcx, %rax
#else
movq %rdi, %rax
#endif
movq 0(%rax), %ip0
movq 8(%rax), %ip1
movq 16(%rax), %ip2
movq 24(%rax), %ip3
movq 32(%rax), %op0
movq 40(%rax), %op1
movq 48(%rax), %op2
movq 56(%rax), %op3
movq 64(%rax), %bits0
movq 72(%rax), %bits1
movq 80(%rax), %bits2
movq 88(%rax), %bits3
movq 96(%rax), %dtable
push %rax /* argument */
push %rax /* olimit */
push 104(%rax) /* ilowest */
movq 112(%rax), %rax
push %rax /* oend3 */
movq %op3, %rax
push %rax /* oend2 */
movq %op2, %rax
push %rax /* oend1 */
movq %op1, %rax
push %rax /* oend0 */
/* Scratch space */
subq $8, %rsp
.L_4X2_compute_olimit:
/* Computes how many iterations we can do safely
* %r15, %rax may be clobbered
* rdx must be saved
* op[1,2,3,4] & ip0 mustn't be clobbered
*/
movq %rdx, 0(%rsp)
/* We can consume up to 7 input bytes each iteration. */
movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %r15 /* r15 = ip0 - ilowest */
/* rdx = rax / 7 */
movabsq $2635249153387078803, %rdx
mulq %rdx
subq %rdx, %r15
shrq %r15
addq %r15, %rdx
shrq $2, %rdx
/* r15 = (ip0 - ilowest) / 7 */
movq %rdx, %r15
/* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
movq 8(%rsp), %rax /* rax = oend0 */
subq %op0, %rax /* rax = oend0 - op0 */
movq 16(%rsp), %rdx /* rdx = oend1 */
subq %op1, %rdx /* rdx = oend1 - op1 */
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movq 24(%rsp), %rax /* rax = oend2 */
subq %op2, %rax /* rax = oend2 - op2 */
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movq 32(%rsp), %rax /* rax = oend3 */
subq %op3, %rax /* rax = oend3 - op3 */
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rax
mulq %rdx
shrq $3, %rdx /* rdx = rdx / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
/* olimit = op3 + 5 * r15 */
movq %r15, %rax
leaq (%op3, %rax, 4), %olimit
addq %rax, %olimit
movq 0(%rsp), %rdx
/* If (op3 + 10 > olimit) */
movq %op3, %rax /* rax = op3 */
cmpq %rax, %olimit /* op3 == olimit */
je .L_4X2_exit
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
jb .L_4X2_exit
/* If (ip2 < ip1) go to exit */
cmpq %ip1, %ip2
jb .L_4X2_exit
/* If (ip3 < ip2) go to exit */
cmpq %ip2, %ip3
jb .L_4X2_exit
#define DECODE(n, idx) \
movq %bits##n, %rax; \
shrq $53, %rax; \
movzwl 0(%dtable,%rax,4),%r8d; \
movzbl 2(%dtable,%rax,4),%r15d; \
movzbl 3(%dtable,%rax,4),%eax; \
movw %r8w, (%op##n); \
shlxq %r15, %bits##n, %bits##n; \
addq %rax, %op##n
#define RELOAD_BITS(n) \
bsfq %bits##n, %bits##n; \
movq %bits##n, %rax; \
shrq $3, %bits##n; \
andq $7, %rax; \
subq %bits##n, %ip##n; \
movq (%ip##n), %bits##n; \
orq $1, %bits##n; \
shlxq %rax, %bits##n, %bits##n
movq %olimit, 48(%rsp)
.p2align 6
.L_4X2_loop_body:
/* We clobber r8, so store it on the stack */
movq %r8, 0(%rsp)
/* Decode 5 symbols from each of the 4 streams (20 symbols total). */
FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
/* Reload r8 */
movq 0(%rsp), %r8
FOR_EACH_STREAM(RELOAD_BITS)
cmp %op3, 48(%rsp)
ja .L_4X2_loop_body
jmp .L_4X2_compute_olimit
#undef DECODE
#undef RELOAD_BITS
.L_4X2_exit:
addq $8, %rsp
/* Restore stack (oend & olimit) */
pop %rax /* oend0 */
pop %rax /* oend1 */
pop %rax /* oend2 */
pop %rax /* oend3 */
pop %rax /* ilowest */
pop %rax /* olimit */
pop %rax /* arg */
/* Save ip / op / bits */
movq %ip0, 0(%rax)
movq %ip1, 8(%rax)
movq %ip2, 16(%rax)
movq %ip3, 24(%rax)
movq %op0, 32(%rax)
movq %op1, 40(%rax)
movq %op2, 48(%rax)
movq %op3, 56(%rax)
movq %bits0, 64(%rax)
movq %bits1, 72(%rax)
movq %bits2, 80(%rax)
movq %bits3, 88(%rax)
/* Restore registers */
pop %r15
pop %r14
pop %r13
pop %r12
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rbp
pop %rdx
pop %rcx
pop %rbx
pop %rax
ret
#endif

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -15,12 +14,12 @@
/*-*******************************************************
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "zstd_decompress_internal.h"
#include "zstd_ddict.h"
@ -128,14 +127,14 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
ddict->dictContent = dict;
if (!dict) dictSize = 0;
} else {
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
ddict->dictBuffer = internalBuffer;
ddict->dictContent = internalBuffer;
if (!internalBuffer) return ERROR(memory_allocation);
memcpy(internalBuffer, dict, dictSize);
ZSTD_memcpy(internalBuffer, dict, dictSize);
}
ddict->dictSize = dictSize;
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
@ -148,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
ZSTD_dictContentType_e dictContentType,
ZSTD_customMem customMem)
{
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
if (ddict == NULL) return NULL;
ddict->cMem = customMem;
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
@ -199,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
if (sBufferSize < neededSpace) return NULL;
if (dictLoadMethod == ZSTD_dlm_byCopy) {
memcpy(ddict+1, dict, dictSize); /* local copy */
ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
dict = ddict+1;
}
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
@ -214,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = ddict->cMem;
ZSTD_free(ddict->dictBuffer, cMem);
ZSTD_free(ddict, cMem);
ZSTD_customFree(ddict->dictBuffer, cMem);
ZSTD_customFree(ddict, cMem);
return 0;
}
}
@ -241,5 +240,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0;
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
return ddict->dictID;
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -16,7 +15,7 @@
/*-*******************************************************
* Dependencies
*********************************************************/
#include <stddef.h> /* size_t */
#include "../common/zstd_deps.h" /* size_t */
#include "../zstd.h" /* ZSTD_DDict, and several public functions */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -16,7 +15,7 @@
/*-*******************************************************
* Dependencies
*********************************************************/
#include <stddef.h> /* size_t */
#include "../common/zstd_deps.h" /* size_t */
#include "../zstd.h" /* DCtx, and some public functions */
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
@ -34,6 +33,12 @@
*/
/* Streaming state is used to inform allocation of the literal buffer */
typedef enum {
not_streaming = 0,
is_streaming = 1
} streaming_operation;
/* ZSTD_decompressBlock_internal() :
* decompress block, starting at `src`,
* into destination buffer `dst`.
@ -42,19 +47,27 @@
*/
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame);
const void* src, size_t srcSize, const streaming_operation streaming);
/* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off)
* this function must be called with valid parameters only
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
* in which case it cannot fail.
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
* defined in zstd_decompress_internal.h.
* Internal use only.
*/
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog);
const U32* baseValue, const U8* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2);
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
#endif /* ZSTD_DEC_BLOCK_H */

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -21,33 +20,33 @@
* Dependencies
*********************************************************/
#include "../common/mem.h" /* BYTE, U16, U32 */
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
/*-*******************************************************
* Constants
*********************************************************/
static const U32 LL_base[MaxLL+1] = {
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 18, 20, 22, 24, 28, 32, 40,
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
0x2000, 0x4000, 0x8000, 0x10000 };
static const U32 OF_base[MaxOff+1] = {
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
static const U32 OF_bits[MaxOff+1] = {
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31 };
static const U32 ML_base[MaxML+1] = {
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26,
@ -74,12 +73,17 @@ static const U32 ML_base[MaxML+1] = {
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
typedef struct {
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
} ZSTD_entropyDTables_t;
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@ -96,10 +100,28 @@ typedef enum {
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
} ZSTD_dictUses_e;
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
typedef struct {
const ZSTD_DDict** ddictPtrTable;
size_t ddictPtrTableSize;
size_t ddictPtrCount;
} ZSTD_DDictHashSet;
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
#endif
#define ZSTD_LBMIN 64
#define ZSTD_LBMAX (128 << 10)
/* extra buffer, compensates when dst is not large enough to store litBuffer */
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
typedef enum {
ZSTD_obm_buffered = 0, /* Buffer the output */
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
} ZSTD_outBufferMode_e;
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
} ZSTD_litLocation_e;
struct ZSTD_DCtx_s
{
@ -114,7 +136,8 @@ struct ZSTD_DCtx_s
const void* virtualStart; /* virtual start of previous segment if it was just before current one */
const void* dictEnd; /* end of previous segment */
size_t expected;
ZSTD_frameHeader fParams;
ZSTD_FrameHeader fParams;
U64 processedCSize;
U64 decodedSize;
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
ZSTD_dStage stage;
@ -123,12 +146,17 @@ struct ZSTD_DCtx_s
XXH64_state_t xxhState;
size_t headerSize;
ZSTD_format_e format;
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
const BYTE* litPtr;
ZSTD_customMem customMem;
size_t litSize;
size_t rleSize;
size_t staticSize;
int isFrameDecompression;
#if DYNAMIC_BMI2
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
#endif
/* dictionary */
ZSTD_DDict* ddictLocal;
@ -136,6 +164,10 @@ struct ZSTD_DCtx_s
U32 dictID;
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
ZSTD_dictUses_e dictUses;
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm;
int maxBlockSizeParam;
/* streaming */
ZSTD_dStreamStage streamStage;
@ -148,16 +180,21 @@ struct ZSTD_DCtx_s
size_t outStart;
size_t outEnd;
size_t lhSize;
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
void* legacyContext;
U32 previousLegacyVersion;
U32 legacyVersion;
#endif
U32 hostageByte;
int noForwardProgress;
ZSTD_outBufferMode_e outBufferMode;
ZSTD_bufferMode_e outBufferMode;
ZSTD_outBuffer expectedOutBuffer;
/* workspace */
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
BYTE* litBuffer;
const BYTE* litBufferEnd;
ZSTD_litLocation_e litBufferLocation;
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
size_t oversizedDuration;
@ -166,8 +203,21 @@ struct ZSTD_DCtx_s
void const* dictContentBeginForFuzzing;
void const* dictContentEndForFuzzing;
#endif
/* Tracing */
#if ZSTD_TRACE
ZSTD_TraceCtx traceCtx;
#endif
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
#if DYNAMIC_BMI2
return dctx->bmi2;
#else
(void)dctx;
return 0;
#endif
}
/*-*******************************************************
* Shared internal functions
@ -184,7 +234,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
* If yes, do nothing (continue on current segment).
* If not, classify previous segment as "external dictionary", and start a new segment.
* This function cannot fail. */
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -16,24 +15,32 @@
extern "C" {
#endif
/*===== dependency =====*/
#include <stddef.h> /* size_t */
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
#ifndef ZSTDERRORLIB_VISIBILITY
# if defined(__GNUC__) && (__GNUC__ >= 4)
# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
#ifndef ZSTDERRORLIB_VISIBLE
/* Backwards compatibility with old macro name */
# ifdef ZSTDERRORLIB_VISIBILITY
# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
# else
# define ZSTDERRORLIB_VISIBILITY
# define ZSTDERRORLIB_VISIBLE
# endif
#endif
#ifndef ZSTDERRORLIB_HIDDEN
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
# define ZSTDERRORLIB_HIDDEN
# endif
#endif
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
#endif
/*-*********************************************
@ -59,14 +66,18 @@ typedef enum {
ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22,
ZSTD_error_literals_headerWrong = 24,
ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40,
ZSTD_error_parameter_combination_unsupported = 41,
ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_maxSymbolValue_tooSmall = 48,
ZSTD_error_cannotProduce_uncompressedBlock = 49,
ZSTD_error_stabilityCondition_notRespected = 50,
ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64,
@ -74,17 +85,18 @@ typedef enum {
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74,
ZSTD_error_noForwardProgress_destFull = 80,
ZSTD_error_noForwardProgress_inputEmpty = 82,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_sequenceProducer_failed = 106,
ZSTD_error_externalSequences_invalid = 107,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
/*! ZSTD_getErrorCode() :
convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
which can be used to compare with enum list published above */
ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */

View File

@ -49,7 +49,7 @@
#define ZSTD_STATIC_LINKING_ONLY
#include "lib/zstd.h"
#include "lib/common/zstd_errors.h"
#include "lib/zstd_errors.h"
static uint_t zstd_earlyabort_pass = 1;
static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;

View File

@ -1,52 +1,65 @@
// SPDX-License-Identifier: BSD-3-Clause
/*
* BSD 3-Clause Clear License
/**
* \file zstd.c
* Single-file Zstandard library.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* Generate using:
* \code
* python combine.py -r ../../lib -x legacy/zstd_legacy.h -o zstd.c zstd-in.c
* \endcode
*/
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) 2019-2020, Michael Niewöhner
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*
* Settings to bake for the single library file.
*
* Note: It's important that none of these affects 'zstd.h' (only the
* implementation files we're amalgamating).
*
* Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also
* defined in mem.h (breaking C99 compatibility).
*
* Note: the undefs for xxHash allow Zstd's implementation to coincide with
* standalone xxHash usage (with global defines).
*
* Note: if you enable ZSTD_LEGACY_SUPPORT the combine.py script will need
* re-running without the "-x legacy/zstd_legacy.h" option (it excludes the
* legacy support at the source level).
*
* Note: multithreading is enabled for all platforms apart from Emscripten.
*/
#define DEBUGLEVEL 0
#define MEM_MODULE
#undef XXH_NAMESPACE
#define XXH_NAMESPACE ZSTD_
#undef XXH_PRIVATE_API
#define XXH_PRIVATE_API
#undef XXH_INLINE_ALL
#define XXH_INLINE_ALL
#define ZSTD_LEGACY_SUPPORT 0
#ifndef __EMSCRIPTEN__
#define ZSTD_MULTITHREAD
#endif
#define ZSTD_TRACE 0
/* TODO: Can't amalgamate ASM function */
#define ZSTD_DISABLE_ASM 1
#define MEM_MODULE
#define XXH_NAMESPACE ZSTD_
#define XXH_PRIVATE_API
#define XXH_INLINE_ALL
#define ZSTD_LEGACY_SUPPORT 0
#define ZSTD_LIB_DICTBUILDER 0
#define ZSTD_LIB_DEPRECATED 0
#define ZSTD_NOBENCH
/* Include zstd_deps.h first with all the options we need enabled. */
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
#include "common/zstd_deps.h"
#include "common/debug.c"
#include "common/entropy_common.c"
#include "common/error_private.c"
#include "common/fse_decompress.c"
#include "common/threading.c"
#include "common/pool.c"
#include "common/zstd_common.c"
@ -56,14 +69,23 @@
#include "compress/zstd_compress_literals.c"
#include "compress/zstd_compress_sequences.c"
#include "compress/zstd_compress_superblock.c"
#include "compress/zstd_preSplit.c"
#include "compress/zstd_compress.c"
#include "compress/zstd_double_fast.c"
#include "compress/zstd_fast.c"
#include "compress/zstd_lazy.c"
#include "compress/zstd_ldm.c"
#include "compress/zstd_opt.c"
#ifdef ZSTD_MULTITHREAD
#include "compress/zstdmt_compress.c"
#endif
#include "decompress/huf_decompress.c"
#include "decompress/zstd_ddict.c"
#include "decompress/zstd_decompress.c"
#include "decompress/zstd_decompress_block.c"
#include "dictBuilder/cover.c"
#include "dictBuilder/divsufsort.c"
#include "dictBuilder/fastcover.c"
#include "dictBuilder/zdict.c"