mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-03-11 04:46:18 +03:00
This commit only replaces the bundled source and does not include any ZFS integration changes. Because the build depends on integration adjustments, it will fail until the accompanying integration commit is applied. Upstream release: https://github.com/facebook/zstd/releases/tag/v1.5.7 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Moch <mail@alexmoch.com> Closes #18089
316 lines
12 KiB
C
316 lines
12 KiB
C
/* ******************************************************************
|
|
* FSE : Finite State Entropy decoder
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
*
|
|
* You can contact the author at :
|
|
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
* - Public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
*
|
|
* This source code is licensed under both the BSD-style license (found in the
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
* in the COPYING file in the root directory of this source tree).
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
****************************************************************** */
|
|
|
|
|
|
/* **************************************************************
|
|
* Includes
|
|
****************************************************************/
|
|
#include "debug.h" /* assert */
|
|
#include "bitstream.h"
|
|
#include "compiler.h"
|
|
#define FSE_STATIC_LINKING_ONLY
|
|
#include "fse.h"
|
|
#include "error_private.h"
|
|
#include "zstd_deps.h" /* ZSTD_memcpy */
|
|
#include "bits.h" /* ZSTD_highbit32 */
|
|
|
|
|
|
/* **************************************************************
|
|
* Error Management
|
|
****************************************************************/
|
|
#define FSE_isError ERR_isError
|
|
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
|
|
|
|
|
|
/* **************************************************************
|
|
* Templates
|
|
****************************************************************/
|
|
/*
|
|
designed to be included
|
|
for type-specific functions (template emulation in C)
|
|
Objective is to write these functions only once, for improved maintenance
|
|
*/
|
|
|
|
/* safety checks */
|
|
#ifndef FSE_FUNCTION_EXTENSION
|
|
# error "FSE_FUNCTION_EXTENSION must be defined"
|
|
#endif
|
|
#ifndef FSE_FUNCTION_TYPE
|
|
# error "FSE_FUNCTION_TYPE must be defined"
|
|
#endif
|
|
|
|
/* Function names */
|
|
#define FSE_CAT(X,Y) X##Y
|
|
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
|
|
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
|
|
|
|
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
|
{
|
|
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
|
|
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
|
|
U16* symbolNext = (U16*)workSpace;
|
|
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
|
|
|
|
U32 const maxSV1 = maxSymbolValue + 1;
|
|
U32 const tableSize = 1 << tableLog;
|
|
U32 highThreshold = tableSize-1;
|
|
|
|
/* Sanity Checks */
|
|
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
|
|
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
|
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
|
|
/* Init, lay down lowprob symbols */
|
|
{ FSE_DTableHeader DTableH;
|
|
DTableH.tableLog = (U16)tableLog;
|
|
DTableH.fastMode = 1;
|
|
{ S16 const largeLimit= (S16)(1 << (tableLog-1));
|
|
U32 s;
|
|
for (s=0; s<maxSV1; s++) {
|
|
if (normalizedCounter[s]==-1) {
|
|
tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
|
|
symbolNext[s] = 1;
|
|
} else {
|
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
|
symbolNext[s] = (U16)normalizedCounter[s];
|
|
} } }
|
|
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
|
}
|
|
|
|
/* Spread symbols */
|
|
if (highThreshold == tableSize - 1) {
|
|
size_t const tableMask = tableSize-1;
|
|
size_t const step = FSE_TABLESTEP(tableSize);
|
|
/* First lay down the symbols in order.
|
|
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
|
* misses since small blocks generally have small table logs, so nearly
|
|
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
|
* our buffer to handle the over-write.
|
|
*/
|
|
{ U64 const add = 0x0101010101010101ull;
|
|
size_t pos = 0;
|
|
U64 sv = 0;
|
|
U32 s;
|
|
for (s=0; s<maxSV1; ++s, sv += add) {
|
|
int i;
|
|
int const n = normalizedCounter[s];
|
|
MEM_write64(spread + pos, sv);
|
|
for (i = 8; i < n; i += 8) {
|
|
MEM_write64(spread + pos + i, sv);
|
|
}
|
|
pos += (size_t)n;
|
|
} }
|
|
/* Now we spread those positions across the table.
|
|
* The benefit of doing it in two stages is that we avoid the
|
|
* variable size inner loop, which caused lots of branch misses.
|
|
* Now we can run through all the positions without any branch misses.
|
|
* We unroll the loop twice, since that is what empirically worked best.
|
|
*/
|
|
{
|
|
size_t position = 0;
|
|
size_t s;
|
|
size_t const unroll = 2;
|
|
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
|
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
|
size_t u;
|
|
for (u = 0; u < unroll; ++u) {
|
|
size_t const uPosition = (position + (u * step)) & tableMask;
|
|
tableDecode[uPosition].symbol = spread[s + u];
|
|
}
|
|
position = (position + (unroll * step)) & tableMask;
|
|
}
|
|
assert(position == 0);
|
|
}
|
|
} else {
|
|
U32 const tableMask = tableSize-1;
|
|
U32 const step = FSE_TABLESTEP(tableSize);
|
|
U32 s, position = 0;
|
|
for (s=0; s<maxSV1; s++) {
|
|
int i;
|
|
for (i=0; i<normalizedCounter[s]; i++) {
|
|
tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
|
|
position = (position + step) & tableMask;
|
|
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
|
} }
|
|
if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
|
|
}
|
|
|
|
/* Build Decoding table */
|
|
{ U32 u;
|
|
for (u=0; u<tableSize; u++) {
|
|
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
|
|
U32 const nextState = symbolNext[symbol]++;
|
|
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
|
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
|
} }
|
|
|
|
return 0;
|
|
}
|
|
|
|
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
|
{
|
|
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
|
|
}
|
|
|
|
|
|
#ifndef FSE_COMMONDEFS_ONLY
|
|
|
|
/*-*******************************************************
|
|
* Decompression (Byte symbols)
|
|
*********************************************************/
|
|
|
|
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
|
void* dst, size_t maxDstSize,
|
|
const void* cSrc, size_t cSrcSize,
|
|
const FSE_DTable* dt, const unsigned fast)
|
|
{
|
|
BYTE* const ostart = (BYTE*) dst;
|
|
BYTE* op = ostart;
|
|
BYTE* const omax = op + maxDstSize;
|
|
BYTE* const olimit = omax-3;
|
|
|
|
BIT_DStream_t bitD;
|
|
FSE_DState_t state1;
|
|
FSE_DState_t state2;
|
|
|
|
/* Init */
|
|
CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
|
|
|
|
FSE_initDState(&state1, &bitD, dt);
|
|
FSE_initDState(&state2, &bitD, dt);
|
|
|
|
RETURN_ERROR_IF(BIT_reloadDStream(&bitD)==BIT_DStream_overflow, corruption_detected, "");
|
|
|
|
#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
|
|
|
|
/* 4 symbols per loop */
|
|
for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
|
|
op[0] = FSE_GETSYMBOL(&state1);
|
|
|
|
if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
|
|
BIT_reloadDStream(&bitD);
|
|
|
|
op[1] = FSE_GETSYMBOL(&state2);
|
|
|
|
if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
|
|
{ if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
|
|
|
|
op[2] = FSE_GETSYMBOL(&state1);
|
|
|
|
if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
|
|
BIT_reloadDStream(&bitD);
|
|
|
|
op[3] = FSE_GETSYMBOL(&state2);
|
|
}
|
|
|
|
/* tail */
|
|
/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
|
|
while (1) {
|
|
if (op>(omax-2)) return ERROR(dstSize_tooSmall);
|
|
*op++ = FSE_GETSYMBOL(&state1);
|
|
if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
|
|
*op++ = FSE_GETSYMBOL(&state2);
|
|
break;
|
|
}
|
|
|
|
if (op>(omax-2)) return ERROR(dstSize_tooSmall);
|
|
*op++ = FSE_GETSYMBOL(&state2);
|
|
if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
|
|
*op++ = FSE_GETSYMBOL(&state1);
|
|
break;
|
|
} }
|
|
|
|
assert(op >= ostart);
|
|
return (size_t)(op-ostart);
|
|
}
|
|
|
|
typedef struct {
|
|
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
|
|
} FSE_DecompressWksp;
|
|
|
|
|
|
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
|
|
void* dst, size_t dstCapacity,
|
|
const void* cSrc, size_t cSrcSize,
|
|
unsigned maxLog, void* workSpace, size_t wkspSize,
|
|
int bmi2)
|
|
{
|
|
const BYTE* const istart = (const BYTE*)cSrc;
|
|
const BYTE* ip = istart;
|
|
unsigned tableLog;
|
|
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
|
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
|
|
size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
|
|
FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
|
|
|
|
FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
|
|
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
|
|
|
|
/* correct offset to dtable depends on this property */
|
|
FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
|
|
|
|
/* normal FSE decoding mode */
|
|
{ size_t const NCountLength =
|
|
FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
|
|
if (FSE_isError(NCountLength)) return NCountLength;
|
|
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
|
|
assert(NCountLength <= cSrcSize);
|
|
ip += NCountLength;
|
|
cSrcSize -= NCountLength;
|
|
}
|
|
|
|
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
|
|
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
|
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
|
|
|
CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
|
|
|
|
{
|
|
const void* ptr = dtable;
|
|
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
|
const U32 fastMode = DTableH->fastMode;
|
|
|
|
/* select fast mode (static) */
|
|
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
|
|
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
|
|
}
|
|
}
|
|
|
|
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
|
{
|
|
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
|
|
}
|
|
|
|
#if DYNAMIC_BMI2
|
|
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
|
{
|
|
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
|
}
|
|
#endif
|
|
|
|
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
|
|
{
|
|
#if DYNAMIC_BMI2
|
|
if (bmi2) {
|
|
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
|
}
|
|
#endif
|
|
(void)bmi2;
|
|
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
|
}
|
|
|
|
#endif /* FSE_COMMONDEFS_ONLY */
|