From 346646016d4c5d3567d26bf6ccd617b318fabb0d Mon Sep 17 00:00:00 2001 From: Berlin_Peng Date: Fri, 15 Aug 2025 11:48:37 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=94=B9=E6=96=87=E4=BB=B6=E4=BD=8D?= =?UTF-8?q?=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bpsf/ksal-bpsf-zstd.patch | 567 +++++++++++++++--- bpsf_zstd.patch | 631 -------------------- ksal-bpsf-zstd.patch | 1016 -------------------------------- ksal_bpsf.spec | 23 - libksal_bpsf_zstd_so_create.sh | 107 ---- 5 files changed, 476 insertions(+), 1868 deletions(-) delete mode 100644 bpsf_zstd.patch delete mode 100644 ksal-bpsf-zstd.patch delete mode 100644 ksal_bpsf.spec delete mode 100644 libksal_bpsf_zstd_so_create.sh diff --git a/bpsf/ksal-bpsf-zstd.patch b/bpsf/ksal-bpsf-zstd.patch index 6ac93c2b..7d72a18d 100644 --- a/bpsf/ksal-bpsf-zstd.patch +++ b/bpsf/ksal-bpsf-zstd.patch @@ -1,15 +1,22 @@ diff --git a/lib/bpsf.c b/lib/bpsf.c new file mode 100644 -index 00000000..e93523fb +index 00000000..d3f11e79 --- /dev/null +++ b/lib/bpsf.c -@@ -0,0 +1,199 @@ +@@ -0,0 +1,345 @@ ++/* ++* 版权所有 (c) 华为技术有限公司 2025 ++*/ ++ +#include "bpsf.h" +#include "zstd.h" +#include "compress/zstd_compress.h" +#include "common/zstd_internal.h" +#include "decompress/zstd_decompress_internal.h" + ++#include "decompress/zstd_decompress_block.h" ++#include "mem.h" ++ +ZSTD_CCtx *BPSF_getCCtx(void) { + return ZSTD_createCCtx(); +} @@ -35,17 +42,17 @@ index 00000000..e93523fb + return ZSTD_CCtxParams_init_internal(cctxParams, params, compressionLevel); +} + -+void BPSF_compressBegin(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ++void BPSF_compressBegin(ZSTD_CCtx *cctx, const uint8_t* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict *cdict, const ZSTD_CCtx_params *params, + U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, params, pledgedSrcSize, zbuff); +} + -+void BPSF_getSeqStore(ZSTD_CCtx *zc, const void *src, size_t srcSize) { ++void BPSF_getSeqStore(ZSTD_CCtx *zc, const uint8_t* src, size_t srcSize) { + ZSTD_buildSeqStore(zc, src, srcSize); +} + -+U32 BPSF_update_window(ZSTD_window_t *window, void const *src, size_t srcSize, int forceNonContiguous) { ++U32 BPSF_update_window(ZSTD_window_t *window, const uint8_t* src, size_t srcSize, int forceNonContiguous) { + return ZSTD_window_update(window, src, srcSize, forceNonContiguous); +} + @@ -57,24 +64,19 @@ index 00000000..e93523fb + return HUF_load_table(src, dtable); +} + -+ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats( -+ const seqStore_t *seqStorePtr, size_t nbSeq, -+ const ZSTD_fseCTables_t *prevEntropy, ZSTD_fseCTables_t *nextEntropy, -+ BYTE *dst, const BYTE *const dstEnd, -+ ZSTD_strategy strategy, unsigned *countWorkspace, -+ void *entropyWorkspace, size_t entropyWkspSize) { ++ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats(const seqStore_t *seqStorePtr, size_t nbSeq, const ZSTD_fseCTables_t *prevEntropy, ++ ZSTD_fseCTables_t *nextEntropy, uint8_t *dst, const uint8_t* dstEnd, ZSTD_strategy strategy, ++ unsigned *countWorkspace, uint8_t *entropyWorkspace, size_t entropyWkspSize) { + return ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, prevEntropy, nextEntropy, dst, dstEnd, + strategy, countWorkspace, entropyWorkspace, entropyWkspSize); +} + + + -+size_t BPSF_encodeSeqs( -+ void *dst, size_t dstCapacity, -+ FSE_CTable const *CTable_MatchLength, BYTE const *mlCodeTable, -+ FSE_CTable const *CTable_OffsetBits, BYTE const *ofCodeTable, -+ FSE_CTable const *CTable_LitLength, BYTE const *llCodeTable, -+ seqDef const *sequences, size_t nbSeq, int longOffsets, int bmi2) { ++size_t BPSF_encodeSeqs(uint8_t *dst, size_t dstCapacity, const FSE_CTable* CTable_MatchLength, ++ const uint8_t* mlCodeTable, const FSE_CTable* CTable_OffsetBits, const uint8_t* ofCodeTable, ++ const FSE_CTable* CTable_LitLength, const uint8_t* llCodeTable, const seqDef* sequences, ++ size_t nbSeq, int longOffsets, int bmi2) { + return ZSTD_encodeSequences(dst, dstCapacity, CTable_MatchLength, mlCodeTable, CTable_OffsetBits, + ofCodeTable, CTable_LitLength, llCodeTable, sequences, + nbSeq, longOffsets, bmi2); @@ -85,7 +87,8 @@ index 00000000..e93523fb + const void* src, size_t srcSize, + const U32* baseValue, const U8* nbAdditionalBits, + const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, -+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, int bmi2); ++ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, ++ int bmi2); + +extern const ZSTD_seqSymbol LL_defaultDTable[(1<entropy.LLTable, &dctx->LLTptr, -+ LLtype, MaxLL, LLFSELog, -+ p_src, p_src_end-p_src, -+ LL_base, LL_bits, -+ LL_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); ++ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, ++ LLtype, MaxLL, LLFSELog, ++ p_src, p_src_end-p_src, ++ LL_base, LL_bits, ++ LL_defaultDTable, dctx->fseEntropy, ++ dctx->ddictIsCold, n_seq, ++ dctx->workspace, sizeof(dctx->workspace), ++ 0); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + p_src += llhSize; + } @@ -168,10 +170,8 @@ index 00000000..e93523fb +extern void ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt); +extern seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq); + -+size_t BPSF_decodeSeqs ( -+ ZSTD_DCtx* dctx, const void* seqStart, size_t seqSize, int nbSeq, -+ uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of -+) { ++size_t BPSF_decodeSeqs (ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, ++ int nbSeq, uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of) { + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + @@ -203,13 +203,161 @@ index 00000000..e93523fb + + return 0; +} -\ No newline at end of file ++ ++size_t BPSF_decodeSeqs_and_reconstruct(ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, ++ int nbSeq, uint8_t *p_dst, size_t max_dst_len, ++ size_t dict_size, size_t *reconstructed_size) { ++ const BYTE *p_dict = dctx->litPtr; ++ const BYTE *p_dict_end = dctx->litPtr + dict_size; ++ ++ const BYTE *p_lit = dctx->litPtr + dict_size; ++ const BYTE *p_lit_end = dctx->litPtr + dctx->litSize; ++ ++ BYTE *p_dst_start = p_dst; ++ BYTE *p_dst_limit = p_dst + max_dst_len; ++ ++ uint8_t backup [16]; ++ MEM_COPY16B(backup, p_dst_limit); ++ ++ S32 ll_state, of_state, ml_state; ++ U64 data; ++ ++ const BYTE* p_src = (const BYTE*)seqStart; ++ size_t src_len = seqSize; ++ ++ S32 prev_of[] = {1, 4, 8}; ++ ++ p_src += (src_len - 8); ++ ++ #define FSE_READMOVE0(t,b,n) { if(n) { t=b+(data>>(64-n)); data<<=n; } else {t=b;} } ++ #define FSE_READMOVE1(t,b,n) { t=b+(data>>(64-n)); data<<=n; } ++ ++ if (nbSeq) { ++ dctx->fseEntropy = 1; ++ ++ U8 ll_m_bits = ((const ZSTD_seqSymbol_header*)dctx->LLTptr)->tableLog; ++ U8 of_m_bits = ((const ZSTD_seqSymbol_header*)dctx->OFTptr)->tableLog; ++ U8 ml_m_bits = ((const ZSTD_seqSymbol_header*)dctx->MLTptr)->tableLog; ++ ++ const ZSTD_seqSymbol* ll_table = (dctx->LLTptr + 1); ++ const ZSTD_seqSymbol* of_table = (dctx->OFTptr + 1); ++ const ZSTD_seqSymbol* ml_table = (dctx->MLTptr + 1); ++ ++ data = (1 | (*(U64*)p_src)); ++ data <<= (8 - highbit_u9(p_src[7])); ++ ++ FSE_READMOVE0(ll_state, 0, ll_m_bits); ++ FSE_READMOVE0(of_state, 0, of_m_bits); ++ FSE_READMOVE0(ml_state, 0, ml_m_bits); ++ ++ for (int i_seq = 0; i_seq < nbSeq; ++i_seq) { ++ ZSTD_seqSymbol ll_item = ll_table[ll_state]; ++ ZSTD_seqSymbol of_item = of_table[of_state]; ++ ZSTD_seqSymbol ml_item = ml_table[ml_state]; ++ S32 of, ml, ll; ++ ++ { ++ int8_t c = trailbit_u64(data); ++ p_src -= (c>>3); ++ data = (1 | (*(U64*)p_src)); ++ data <<= (c&7); ++ } ++ ++ if (of_item.nbAdditionalBits > 1) { ++ FSE_READMOVE1(of, of_item.baseValue, of_item.nbAdditionalBits); ++ prev_of[2] = prev_of[1]; ++ prev_of[1] = prev_of[0]; ++ prev_of[0] = of; ++ } else { ++ U8 ll0 = (ll_item.baseValue == 0); ++ if (of_item.nbAdditionalBits == 0) { ++ of = prev_of[ll0]; ++ prev_of[1] = prev_of[!ll0]; ++ prev_of[0] = of; ++ } else { ++ FSE_READMOVE1(of, (of_item.baseValue+ll0), 1); ++ size_t temp = (of==3) ? prev_of[0] -1 : prev_of[of]; ++ temp -= !temp; ++ if (of != 1) prev_of[2] = prev_of[1]; ++ prev_of[1] = prev_of[0]; ++ prev_of[0] = of = temp; ++ } ++ } ++ ++ FSE_READMOVE0(ml, ml_item.baseValue, ml_item.nbAdditionalBits); ++ FSE_READMOVE0(ll, ll_item.baseValue, ll_item.nbAdditionalBits); ++ ++ if (UNLIKELY(of_item.nbAdditionalBits + ml_item.nbAdditionalBits + ll_item.nbAdditionalBits > 30)) { ++ int8_t c = trailbit_u64(data); ++ p_src -= (c>>3); ++ data = (1 | (*(U64*)p_src)); ++ data <<= (c&7); ++ } ++ ++ FSE_READMOVE0(ll_state, ll_item.nextState, ll_item.nbBits); ++ FSE_READMOVE0(ml_state, ml_item.nextState, ml_item.nbBits); ++ FSE_READMOVE0(of_state, of_item.nextState, of_item.nbBits); ++ ++ MEM_COPY16B(p_dst, p_lit); ++ ++ if (UNLIKELY(ll > 16)) { ++ MEM_COPY(p_dst + 16, p_lit + 16, ll - 16); ++ } ++ p_dst += ll; ++ p_lit += ll; ++ ++ ++ if (of > p_dst - p_dst_start) { ++ const U8 *dict_end = p_dict + dict_size; ++ const U8 *dict_match = p_dict_end - (of - (p_dst - p_dst_start)); ++ if (dict_match + ml <= dict_end) { ++ ZSTD_wildcopy(p_dst, dict_match, ml, ZSTD_overlap_src_before_dst); ++ } else { ++ size_t copy_from_dict = dict_end - dict_match; ++ ZSTD_wildcopy(p_dst, dict_match, copy_from_dict, ZSTD_overlap_src_before_dst); ++ ZSTD_wildcopy(p_dst + copy_from_dict, p_dst_start, ml - copy_from_dict, ZSTD_overlap_src_before_dst); ++ } ++ } else { ++ const U8 *p_match = p_dst - of; ++ if (LIKELY(of >= 16)) { ++ MEM_COPY(p_dst, p_match, ml); ++ } else if (UNLIKELY(of == 4)) { ++ MEM_SET_4B(p_dst, *(uint32_t*)p_match, ml); ++ } else if (UNLIKELY(of == 2)) { ++ MEM_SET_2B(p_dst, *(uint16_t*)p_match, ml); ++ } else if (UNLIKELY(of == 1)) { ++ MEM_SET_1B(p_dst, *p_match, ml); ++ } else { ++ U8 *op = p_dst; ++ ZSTD_overlapCopy8(&op, &p_match, of); ++ if (ml > 8) { ++ ZSTD_wildcopy(op, p_match, (ptrdiff_t)ml - 8, ZSTD_overlap_src_before_dst); ++ } ++ } ++ } ++ p_dst += ml; ++ } ++ } ++ ++ { ++ size_t n_last_lit = p_lit_end - p_lit; ++ MEM_COPY(p_dst, p_lit, n_last_lit); ++ p_dst += n_last_lit; ++ } ++ ++ MEM_COPY16B(p_dst_limit, backup); ++ *reconstructed_size = p_dst - p_dst_start; ++ return 0; ++} diff --git a/lib/bpsf.h b/lib/bpsf.h new file mode 100644 -index 00000000..274342e1 +index 00000000..b2c2e84f --- /dev/null +++ b/lib/bpsf.h -@@ -0,0 +1,60 @@ +@@ -0,0 +1,58 @@ ++/* ++* 版权所有 (c) 华为技术有限公司 2025 ++*/ +#ifndef BPSF_BPSF_H +#define BPSF_BPSF_H + @@ -227,74 +375,45 @@ index 00000000..274342e1 + +void BPSF_init_CCtxParams(ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params, int compressionLevel); + -+void BPSF_compressBegin(ZSTD_CCtx* cctx, -+ const void* dict, size_t dictSize, -+ ZSTD_dictContentType_e dictContentType, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ const ZSTD_CDict* cdict, -+ const ZSTD_CCtx_params* params, U64 pledgedSrcSize, -+ ZSTD_buffered_policy_e zbuff); ++void BPSF_compressBegin(ZSTD_CCtx* cctx, const uint8_t* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ++ ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, ++ U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff); + -+void BPSF_getSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize); ++void BPSF_getSeqStore(ZSTD_CCtx* zc, const uint8_t* src, size_t srcSize); + -+U32 BPSF_update_window(ZSTD_window_t* window, void const* src, size_t srcSize, int forceNonContiguous); ++U32 BPSF_update_window(ZSTD_window_t* window, const uint8_t* src, size_t srcSize, int forceNonContiguous); + +// Huffman -+size_t BPSF_build_HUFTable(BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, HUF_CElt* CTable); ++size_t BPSF_build_HUFTable(uint8_t* dst, size_t dst_capacity, const uint8_t* src, size_t srcSize, HUF_CElt* CTable); + -+size_t BPSF_loadHUFTable(const BYTE* src, HUF_DTable* dtable); ++size_t BPSF_loadHUFTable(const uint8_t* src, HUF_DTable* dtable); + +// FSE encode -+ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats( -+ const seqStore_t *seqStorePtr, size_t nbSeq, -+ const ZSTD_fseCTables_t *prevEntropy, ZSTD_fseCTables_t *nextEntropy, -+ BYTE *dst, const BYTE *const dstEnd, -+ ZSTD_strategy strategy, unsigned *countWorkspace, -+ void *entropyWorkspace, size_t entropyWkspSize -+); -+ -+size_t BPSF_encodeSeqs( -+ void* dst, size_t dstCapacity, -+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, -+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, -+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2 ++ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats(const seqStore_t *seqStorePtr, size_t nbSeq, const ZSTD_fseCTables_t *prevEntropy, ++ ZSTD_fseCTables_t *nextEntropy, uint8_t *dst, const uint8_t* dstEnd, ZSTD_strategy strategy, ++ unsigned *countWorkspace, uint8_t *entropyWorkspace, size_t entropyWkspSize); ++ ++size_t BPSF_encodeSeqs(uint8_t* dst, size_t dstCapacity, ++ const FSE_CTable* CTable_MatchLength, const uint8_t* mlCodeTable, ++ const FSE_CTable* CTable_OffsetBits, const uint8_t* ofCodeTable, ++ const FSE_CTable* CTable_LitLength, const uint8_t* llCodeTable, ++ const seqDef* sequences, size_t nbSeq, int longOffsets, int bmi2 +); + +// FSE decode -+size_t BPSF_decodeSeqTable(ZSTD_DCtx* dctx, size_t n_seq, const BYTE* p_src); ++size_t BPSF_decodeSeqTable(ZSTD_DCtx* dctx, size_t n_seq, const uint8_t* p_src); + -+size_t BPSF_decodeSeqs( -+ ZSTD_DCtx* dctx, const void* seqStart, size_t seqSize, int nbSeq, -+ uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of -+); ++size_t BPSF_decodeSeqs(ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, ++ int nbSeq, uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of); ++ ++void ZSTD_setLiteralDict(ZSTD_DCtx* dctx, const uint8_t* litPtr, size_t litSize); ++ ++size_t BPSF_decodeSeqs_and_reconstruct(ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, ++ int nbSeq, uint8_t *p_dst, size_t max_dst_len, ++ size_t dict_size, size_t *reconstructed_size); + +#endif // BPSF_BPSF_H \ No newline at end of file -diff --git a/lib/common/error_private.h b/lib/common/error_private.h -index 0156010c..69bcdb82 100644 ---- a/lib/common/error_private.h -+++ b/lib/common/error_private.h -@@ -13,10 +13,6 @@ - #ifndef ERROR_H_MODULE - #define ERROR_H_MODULE - --#if defined (__cplusplus) --extern "C" { --#endif -- - - /* **************************************** - * Dependencies -@@ -161,8 +157,4 @@ void _force_has_format_string(const char *format, ...) { - } \ - } while(0) - --#if defined (__cplusplus) --} --#endif -- - #endif /* ERROR_H_MODULE */ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index ea000723..2b7d4c21 100644 --- a/lib/compress/huf_compress.c @@ -520,19 +639,172 @@ index 00000000..2ec00ce2 +#endif // BPSF_ZSTD_COMPRESS_H \ No newline at end of file diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c -index f85dd0be..f498e57a 100644 +index f85dd0be..ea6d2cf1 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c -@@ -406,7 +406,7 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize +@@ -24,6 +24,7 @@ + #include "../common/zstd_internal.h" + #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ + ++#define OPTIMIZE_HUF_TABLE_COPY 1 + /* ************************************************************** + * Constants + ****************************************************************/ +@@ -382,6 +383,12 @@ typedef struct { + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + } HUF_ReadDTableX1_Workspace; + ++#if OPTIMIZE_HUF_TABLE_COPY ++static U16 HUF_DEltX1_set1 (BYTE symbol, BYTE nbBits) { ++ U16 D = ((U16)(symbol << 8) + nbBits); ++ return D; ++} ++ + size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) + { + U32 tableLog = 0; +@@ -406,7 +413,131 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize U32 const maxTableLog = dtd.maxTableLog + 1; U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); - if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ ++ // if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ ++ dtd.tableType = 0; ++ dtd.tableLog = (BYTE)tableLog; ++ ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); ++ } ++ ++ { ++ int n; ++ int nextRankStart = 0; ++ int const unroll = 4; ++ int const nLimit = (int)nbSymbols - unroll + 1; ++ for (n=0; n<(int)tableLog+1; n++) { ++ U32 const curr = nextRankStart; ++ nextRankStart += wksp->rankVal[n]; ++ wksp->rankStart[n] = curr; ++ } ++ for (n=0; n < nLimit; n += unroll) { ++ int u; ++ for (u=0; u < unroll; ++u) { ++ size_t const w = wksp->huffWeight[n+u]; ++ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); ++ } ++ } ++ for (; n < (int)nbSymbols; ++n) { ++ size_t const w = wksp->huffWeight[n]; ++ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; ++ } ++ } ++ ++ /* fill DTable ++ * We fill all entries of each weight in order. ++ * That way length is a constant for each iteration of the outer loop. ++ * We can switch based on the length to a different inner loop which is ++ * optimized for that particular case. ++ */ ++ { ++ U32 w; ++ int symbol = wksp->rankVal[0]; ++ int rankStart = 0; ++ for (w=1; wrankVal[w]; ++ int const length = (1 << w) >> 1; ++ int uStart = rankStart; ++ BYTE const nbBits = (BYTE)(tableLog + 1 - w); ++ int s; ++ switch (length) { ++ case 1: ++ for (s=0; ssymbols[symbol + s]; ++ D.nbBits = nbBits; ++ dt[uStart] = D; ++ uStart += 1; ++ } ++ break; ++ case 2: ++ for (s=0; ssymbols[symbol + s]; ++ D.nbBits = nbBits; ++ dt[uStart+0] = D; ++ dt[uStart+1] = D; ++ uStart += 2; ++ } ++ break; ++ case 4: ++ for (s=0; ssymbols[symbol + s], nbBits); ++ U16 DH = HUF_DEltX1_set1(wksp->symbols[symbol + s + 1], nbBits); ++ vst1q_u16((U16*)(dt+uStart), vcombine_u16(vdup_n_u16(DL), vdup_n_u16(DH))); ++ uStart += 8; ++ } ++ break; ++ case 8: ++ for (s=0; ssymbols[symbol + s], nbBits); ++ vst1q_u16((U16*)(dt+uStart), vdupq_n_u16(D1)); ++ uStart += 8; ++ } ++ break; ++ default: ++ for (s=0; ssymbols[symbol + s], nbBits); ++ uint16x8_t vecD8 = vdupq_n_u16(D1); ++ for (int u=0; u= sizeof(*wksp)); ++ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); ++ ++ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); ++ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ ++ ++ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); ++ if (HUF_isError(iSize)) return iSize; ++ ++ ++ /* Table header */ ++ { DTableDesc dtd = HUF_getDTableDesc(DTable); ++ U32 const maxTableLog = dtd.maxTableLog + 1; ++ U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); ++ tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); + // if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ dtd.tableType = 0; dtd.tableLog = (BYTE)tableLog; ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); -@@ -941,6 +941,21 @@ static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t ds +@@ -517,6 +648,7 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize + } + return iSize; + } ++#endif + + FORCE_INLINE_TEMPLATE BYTE + HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +@@ -941,6 +1073,21 @@ static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t ds return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); } @@ -554,6 +826,20 @@ index f85dd0be..f498e57a 100644 #endif /* HUF_FORCE_DECOMPRESS_X2 */ +diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c +index 309ec0d0..199c145d 100644 +--- a/lib/decompress/zstd_ddict.c ++++ b/lib/decompress/zstd_ddict.c +@@ -242,3 +242,8 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) + if (ddict==NULL) return 0; + return ddict->dictID; + } ++ ++void ZSTD_setLiteralDict(ZSTD_DCtx* dctx, BYTE const* litPtr, size_t litSize) { ++ dctx->litPtr = litPtr; ++ dctx->litSize = litSize; ++} +\ No newline at end of file diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 76d7332e..378204e1 100644 --- a/lib/decompress/zstd_decompress_block.c @@ -629,3 +915,102 @@ index 76d7332e..378204e1 100644 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) { seq_t seq; +diff --git a/lib/mem.h b/lib/mem.h +new file mode 100644 +index 00000000..4bf8597e +--- /dev/null ++++ b/lib/mem.h +@@ -0,0 +1,92 @@ ++/* ++* 版权所有 (c) 华为技术有限公司 2025 ++*/ ++#ifndef MEM_H ++#define MEM_H ++ ++#include ++#include ++#include "decompress/zstd_decompress_block.h" ++ ++static inline void MEM_COPY16B (uint8_t *p_dst, const uint8_t *p_src) { ++ vst1q_u8(p_dst, vld1q_u8(p_src)); ++} ++ ++static inline void MEM_COPY (uint8_t *p_dst, const uint8_t *p_src, int len) { ++ do { ++ vst1q_u8(p_dst, vld1q_u8(p_src)); ++ p_dst += 16; ++ p_src += 16; ++ len -= 16; ++ } while (len > 0); ++} ++ ++static inline void MEM_SET_1B (uint8_t *p_dst, const uint8_t value, int len) { ++ uint8x16_t vec_data = vdupq_n_u8(value); ++ do { ++ vst1q_u8(p_dst, vec_data); ++ p_dst += 16; ++ len -= 16; ++ } while (len > 0); ++} ++ ++static inline void MEM_SET_2B (uint8_t *p_dst, const uint16_t value, int len) { ++ uint16x8_t vec_data = vdupq_n_u16(value); ++ do { ++ vst1q_u16((uint16_t*)p_dst, vec_data); ++ p_dst += 16; ++ len -= 16; ++ } while (len > 0); ++} ++ ++static inline void MEM_SET_4B (uint8_t *p_dst, const uint32_t value, int len) { ++ uint32x4_t vec_data = vdupq_n_u32(value); ++ do { ++ vst1q_u32((uint32_t*)p_dst, vec_data); ++ p_dst += 16; ++ len -= 16; ++ } while (len > 0); ++} ++ ++static inline void MEM_LZ_MOVE (uint8_t *p_dst, uint8_t *p_match, int32_t ml, int32_t of) { ++ uint8x16_t vec_data = vld1q_u8(p_match); ++ do { ++ vst1q_u8(p_dst, vec_data); ++ p_dst += of; ++ ml -= of; ++ } while (ml > 0); ++} ++ ++static inline int8_t trailbit_u64 (uint64_t val) { ++ return (int8_t)__builtin_ctzll(val); ++} ++ ++static inline int8_t highbit_u9 (uint16_t x) { ++ return 31 - __builtin_clz((uint32_t)x); ++} ++ ++static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } ++ ++static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { ++ assert(*ip <= *op); ++ if (offset < 8) { ++ /* close range match, overlap */ ++ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; ++ static const int dec64table[] = { 8, 8, 8, 7, 8, 9, 10, 11}; ++ int const sub2 = dec64table[offset]; ++ (*op)[0] = (*ip)[0]; ++ (*op)[1] = (*ip)[1]; ++ (*op)[2] = (*ip)[2]; ++ (*op)[3] = (*ip)[3]; ++ *ip += dec32table[offset]; ++ ZSTD_copy4(*op+4, *ip); ++ *ip -= sub2; ++ } else { ++ ZSTD_copy8(*op, *ip); ++ } ++ *ip += 8; ++ *op += 8; ++ assert(*op - *ip >= 8); ++} ++ ++#endif // MEM_H +\ No newline at end of file diff --git a/bpsf_zstd.patch b/bpsf_zstd.patch deleted file mode 100644 index 6ac93c2b..00000000 --- a/bpsf_zstd.patch +++ /dev/null @@ -1,631 +0,0 @@ -diff --git a/lib/bpsf.c b/lib/bpsf.c -new file mode 100644 -index 00000000..e93523fb ---- /dev/null -+++ b/lib/bpsf.c -@@ -0,0 +1,199 @@ -+#include "bpsf.h" -+#include "zstd.h" -+#include "compress/zstd_compress.h" -+#include "common/zstd_internal.h" -+#include "decompress/zstd_decompress_internal.h" -+ -+ZSTD_CCtx *BPSF_getCCtx(void) { -+ return ZSTD_createCCtx(); -+} -+ -+size_t BPSF_freeCCtx(ZSTD_CCtx *p_ctx) { -+ return ZSTD_freeCCtx(p_ctx); -+} -+ -+ZSTD_DCtx *BPSF_getDCtx(void) { -+ return ZSTD_createDCtx(); -+} -+ -+size_t BPSF_freeDCtx(ZSTD_DCtx *p_ctx) { -+ return ZSTD_freeDCtx(p_ctx); -+} -+ -+ZSTD_parameters -+BPSF_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { -+ return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, mode); -+} -+ -+void BPSF_init_CCtxParams(ZSTD_CCtx_params *cctxParams, const ZSTD_parameters *params, int compressionLevel) { -+ return ZSTD_CCtxParams_init_internal(cctxParams, params, compressionLevel); -+} -+ -+void BPSF_compressBegin(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, -+ ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict *cdict, const ZSTD_CCtx_params *params, -+ U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { -+ ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, params, pledgedSrcSize, zbuff); -+} -+ -+void BPSF_getSeqStore(ZSTD_CCtx *zc, const void *src, size_t srcSize) { -+ ZSTD_buildSeqStore(zc, src, srcSize); -+} -+ -+U32 BPSF_update_window(ZSTD_window_t *window, void const *src, size_t srcSize, int forceNonContiguous) { -+ return ZSTD_window_update(window, src, srcSize, forceNonContiguous); -+} -+ -+size_t BPSF_build_HUFTable(BYTE *dst, size_t dst_capacity, const BYTE *src, size_t srcSize, HUF_CElt *CTable) { -+ return HUF_build_table(dst, dst_capacity, src, srcSize, CTable); -+} -+ -+size_t BPSF_loadHUFTable(const BYTE *src, HUF_DTable *dtable) { -+ return HUF_load_table(src, dtable); -+} -+ -+ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats( -+ const seqStore_t *seqStorePtr, size_t nbSeq, -+ const ZSTD_fseCTables_t *prevEntropy, ZSTD_fseCTables_t *nextEntropy, -+ BYTE *dst, const BYTE *const dstEnd, -+ ZSTD_strategy strategy, unsigned *countWorkspace, -+ void *entropyWorkspace, size_t entropyWkspSize) { -+ return ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, prevEntropy, nextEntropy, dst, dstEnd, -+ strategy, countWorkspace, entropyWorkspace, entropyWkspSize); -+} -+ -+ -+ -+size_t BPSF_encodeSeqs( -+ void *dst, size_t dstCapacity, -+ FSE_CTable const *CTable_MatchLength, BYTE const *mlCodeTable, -+ FSE_CTable const *CTable_OffsetBits, BYTE const *ofCodeTable, -+ FSE_CTable const *CTable_LitLength, BYTE const *llCodeTable, -+ seqDef const *sequences, size_t nbSeq, int longOffsets, int bmi2) { -+ return ZSTD_encodeSequences(dst, dstCapacity, CTable_MatchLength, mlCodeTable, CTable_OffsetBits, -+ ofCodeTable, CTable_LitLength, llCodeTable, sequences, -+ nbSeq, longOffsets, bmi2); -+} -+ -+extern size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, -+ symbolEncodingType_e type, unsigned max, U32 maxLog, -+ const void* src, size_t srcSize, -+ const U32* baseValue, const U8* nbAdditionalBits, -+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, -+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, int bmi2); -+ -+extern const ZSTD_seqSymbol LL_defaultDTable[(1<> 6); -+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*p_src >> 4) & 3); -+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*p_src >> 2) & 3); -+ p_src++; -+ -+ -+ { -+ size_t const llhSize = ZSTD_buildSeqTable( -+ dctx->entropy.LLTable, &dctx->LLTptr, -+ LLtype, MaxLL, LLFSELog, -+ p_src, p_src_end-p_src, -+ LL_base, LL_bits, -+ LL_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); -+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); -+ p_src += llhSize; -+ } -+ -+ { -+ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, -+ OFtype, MaxOff, OffFSELog, -+ p_src, p_src_end-p_src, -+ OF_base, OF_bits, -+ OF_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); -+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); -+ p_src += ofhSize; -+ } -+ -+ { -+ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, -+ MLtype, MaxML, MLFSELog, -+ p_src, p_src_end-p_src, -+ ML_base, ML_bits, -+ ML_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); -+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); -+ p_src += mlhSize; -+ } -+ -+ return 0; -+} -+ -+typedef struct { -+ size_t state; -+ const ZSTD_seqSymbol* table; -+} ZSTD_fseState; -+ -+typedef struct { -+ BIT_DStream_t DStream; -+ ZSTD_fseState stateLL; -+ ZSTD_fseState stateOffb; -+ ZSTD_fseState stateML; -+ size_t prevOffset[ZSTD_REP_NUM]; -+} seqState_t; -+ -+typedef struct { -+ size_t litLength; -+ size_t matchLength; -+ size_t offset; -+} seq_t; -+ -+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -+ -+extern void ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt); -+extern seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq); -+ -+size_t BPSF_decodeSeqs ( -+ ZSTD_DCtx* dctx, const void* seqStart, size_t seqSize, int nbSeq, -+ uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of -+) { -+ const BYTE* ip = (const BYTE*)seqStart; -+ const BYTE* const iend = ip + seqSize; -+ -+ size_t i_seq = 0; -+ if (nbSeq) { -+ seqState_t seqState; -+ dctx->fseEntropy = 1; -+ seqState.prevOffset[0] = 1; -+ seqState.prevOffset[1] = 4; -+ seqState.prevOffset[2] = 8; -+ -+ RETURN_ERROR_IF(ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), corruption_detected, ""); -+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); -+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); -+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); -+ -+ for ( ; nbSeq ; nbSeq--) { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState, 0, nbSeq==1); -+ p_ll[i_seq] = sequence.litLength; -+ p_ml[i_seq] = sequence.matchLength; -+ p_of[i_seq] = sequence.offset; -+ i_seq ++; -+ } -+ -+ -+ assert(nbSeq == 0); -+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); -+ } -+ -+ return 0; -+} -\ No newline at end of file -diff --git a/lib/bpsf.h b/lib/bpsf.h -new file mode 100644 -index 00000000..274342e1 ---- /dev/null -+++ b/lib/bpsf.h -@@ -0,0 +1,60 @@ -+#ifndef BPSF_BPSF_H -+#define BPSF_BPSF_H -+ -+#include -+#include "zstd.h" -+#include "compress/zstd_compress.h" -+ -+// LZ77 -+ZSTD_CCtx* BPSF_getCCtx(void); -+size_t BPSF_freeCCtx(ZSTD_CCtx *p_cctx); -+ZSTD_DCtx* BPSF_getDCtx(void); -+size_t BPSF_freeDCtx(ZSTD_DCtx *p_cctx); -+ -+ZSTD_parameters BPSF_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ -+void BPSF_init_CCtxParams(ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params, int compressionLevel); -+ -+void BPSF_compressBegin(ZSTD_CCtx* cctx, -+ const void* dict, size_t dictSize, -+ ZSTD_dictContentType_e dictContentType, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ const ZSTD_CDict* cdict, -+ const ZSTD_CCtx_params* params, U64 pledgedSrcSize, -+ ZSTD_buffered_policy_e zbuff); -+ -+void BPSF_getSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize); -+ -+U32 BPSF_update_window(ZSTD_window_t* window, void const* src, size_t srcSize, int forceNonContiguous); -+ -+// Huffman -+size_t BPSF_build_HUFTable(BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, HUF_CElt* CTable); -+ -+size_t BPSF_loadHUFTable(const BYTE* src, HUF_DTable* dtable); -+ -+// FSE encode -+ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats( -+ const seqStore_t *seqStorePtr, size_t nbSeq, -+ const ZSTD_fseCTables_t *prevEntropy, ZSTD_fseCTables_t *nextEntropy, -+ BYTE *dst, const BYTE *const dstEnd, -+ ZSTD_strategy strategy, unsigned *countWorkspace, -+ void *entropyWorkspace, size_t entropyWkspSize -+); -+ -+size_t BPSF_encodeSeqs( -+ void* dst, size_t dstCapacity, -+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, -+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, -+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2 -+); -+ -+// FSE decode -+size_t BPSF_decodeSeqTable(ZSTD_DCtx* dctx, size_t n_seq, const BYTE* p_src); -+ -+size_t BPSF_decodeSeqs( -+ ZSTD_DCtx* dctx, const void* seqStart, size_t seqSize, int nbSeq, -+ uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of -+); -+ -+#endif // BPSF_BPSF_H -\ No newline at end of file -diff --git a/lib/common/error_private.h b/lib/common/error_private.h -index 0156010c..69bcdb82 100644 ---- a/lib/common/error_private.h -+++ b/lib/common/error_private.h -@@ -13,10 +13,6 @@ - #ifndef ERROR_H_MODULE - #define ERROR_H_MODULE - --#if defined (__cplusplus) --extern "C" { --#endif -- - - /* **************************************** - * Dependencies -@@ -161,8 +157,4 @@ void _force_has_format_string(const char *format, ...) { - } \ - } while(0) - --#if defined (__cplusplus) --} --#endif -- - #endif /* ERROR_H_MODULE */ -diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c -index ea000723..2b7d4c21 100644 ---- a/lib/compress/huf_compress.c -+++ b/lib/compress/huf_compress.c -@@ -1326,6 +1326,42 @@ unsigned HUF_optimalTableLog( - } - } - -+size_t HUF_compress_4x (BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, const HUF_CElt* CTable) { -+ HUF_compress4X_usingCTable_internal(dst, dst_capacity, src, srcSize, CTable, 0); -+} -+ -+size_t HUF_build_table (BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, HUF_CElt* CTable) { -+ unsigned huffLog = 11; -+ uint8_t workSpace [8192]; -+ size_t wkspSize = sizeof(workSpace); -+ -+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); -+ BYTE* const dst_limit = dst + dst_capacity; -+ -+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); -+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); -+ -+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); -+ if (!srcSize) return 0; -+ if (!dst_capacity) return 0; -+ if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); -+ if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); -+ if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; -+ -+ unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; -+ -+ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp))); -+ -+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), CTable, table->count, 0); -+ -+ size_t const maxBits = HUF_buildCTable_wksp(CTable, table->count, maxSymbolValue, huffLog, &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); -+ CHECK_F(maxBits); -+ huffLog = (U32)maxBits; -+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits((CTable+1), maxSymbolValue+1)); -+ -+ return HUF_writeCTable_wksp(dst, dst_capacity, CTable, maxSymbolValue, huffLog, &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)); -+} -+ - /* HUF_compress_internal() : - * `workSpace_align4` must be aligned on 4-bytes boundaries, - * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ -@@ -1375,7 +1411,9 @@ HUF_compress_internal (void* dst, size_t dstSize, - CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); - largestTotal += largestEnd; - } -- if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ -+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) { -+ return 0; /* heuristic : probably not compressible enough */ -+ } - } - - /* Scan input and build symbol stats */ -@@ -1386,9 +1424,7 @@ HUF_compress_internal (void* dst, size_t dstSize, - DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); - - /* Check validity of previous table */ -- if ( repeat -- && *repeat == HUF_repeat_check -- && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { -+ if ( repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ -@@ -1409,8 +1445,7 @@ HUF_compress_internal (void* dst, size_t dstSize, - } - - /* Write table description header */ -- { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, -- &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); -+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); - /* Check if using previous huffman table is beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); -diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c -index 9284e2a4..482cab91 100644 ---- a/lib/compress/zstd_compress.c -+++ b/lib/compress/zstd_compress.c -@@ -27,6 +27,7 @@ - #include "zstd_opt.h" - #include "zstd_ldm.h" - #include "zstd_compress_superblock.h" -+#include "zstd_compress.h" - #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */ - - /* *************************************************************** -@@ -376,7 +377,7 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) - * Initializes `cctxParams` from `params` and `compressionLevel`. - * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. - */ --static void -+void - ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, - const ZSTD_parameters* params, - int compressionLevel) -@@ -1610,7 +1611,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, - } - - static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); --static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); - - static void ZSTD_overrideCParams( - ZSTD_compressionParameters* cParams, -@@ -2728,18 +2729,6 @@ static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) - return (cctxParams->useBlockSplitter == ZSTD_ps_enable); - } - --/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types -- * and size of the sequences statistics -- */ --typedef struct { -- U32 LLtype; -- U32 Offtype; -- U32 MLtype; -- size_t size; -- size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ -- int longOffsets; --} ZSTD_symbolEncodingTypeStats_t; -- - /* ZSTD_buildSequencesStatistics(): - * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. - * Modifies `nextEntropy` to have the appropriate values as a side effect. -@@ -2747,7 +2736,7 @@ typedef struct { - * - * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) - */ --static ZSTD_symbolEncodingTypeStats_t -+ZSTD_symbolEncodingTypeStats_t - ZSTD_buildSequencesStatistics( - const seqStore_t* seqStorePtr, size_t nbSeq, - const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, -@@ -3199,7 +3188,7 @@ static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seq - - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; - --static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) -+size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - { - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); -@@ -3676,7 +3665,7 @@ ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, - * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, - * and updates nextEntropy to the appropriate repeatMode. - */ --static ZSTD_symbolEncodingTypeStats_t -+ZSTD_symbolEncodingTypeStats_t - ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) - { - ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; -@@ -5110,7 +5099,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - /*! ZSTD_compressBegin_internal() : - * Assumption : either @dict OR @cdict (or none) is non-NULL, never both - * @return : 0, or an error code */ --static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, -+size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, -@@ -7107,7 +7096,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ --static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { -+ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); -diff --git a/lib/compress/zstd_compress.h b/lib/compress/zstd_compress.h -new file mode 100644 -index 00000000..2ec00ce2 ---- /dev/null -+++ b/lib/compress/zstd_compress.h -@@ -0,0 +1,44 @@ -+#include "../common/allocations.h" -+#include "../common/zstd_deps.h" -+#include "../common/mem.h" -+#include "hist.h" -+#include "../common/fse.h" -+#include "../common/huf.h" -+#include "zstd_compress_internal.h" -+#include "zstd_compress_sequences.h" -+#include "zstd_compress_literals.h" -+#include "zstd_fast.h" -+#include "zstd_double_fast.h" -+#include "zstd_lazy.h" -+#include "zstd_opt.h" -+#include "zstd_ldm.h" -+#include "zstd_compress_superblock.h" -+#ifndef BPSF_ZSTD_COMPRESS_H -+#define BPSF_ZSTD_COMPRESS_H -+ -+typedef struct { -+ U32 LLtype; -+ U32 Offtype; -+ U32 MLtype; -+ size_t size; -+ size_t lastCountSize; -+ int longOffsets; -+} ZSTD_symbolEncodingTypeStats_t; -+ -+ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ -+size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize); -+ -+size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, -+ const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff); -+ -+void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params, int compressionLevel); -+ -+ZSTD_symbolEncodingTypeStats_t ZSTD_buildSequencesStatistics(const seqStore_t* seqStorePtr, size_t nbSeq, const ZSTD_fseCTables_t* prevEntropy, -+ ZSTD_fseCTables_t* nextEntropy, BYTE* dst, const BYTE* const dstEnd, ZSTD_strategy strategy, -+ unsigned* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize); -+ -+size_t HUF_load_table(const BYTE* src, HUF_DTable* dtable); -+ -+size_t HUF_build_table(BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, HUF_CElt* ctable); -+#endif // BPSF_ZSTD_COMPRESS_H -\ No newline at end of file -diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c -index f85dd0be..f498e57a 100644 ---- a/lib/decompress/huf_decompress.c -+++ b/lib/decompress/huf_decompress.c -@@ -406,7 +406,7 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize - U32 const maxTableLog = dtd.maxTableLog + 1; - U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); - tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); -- if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ -+ // if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); -@@ -941,6 +941,21 @@ static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t ds - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - -+size_t HUF_load_table (const BYTE* src, HUF_DTable* dtable) { -+ uint8_t workSpace [16384]; -+ size_t wkspSize = sizeof(workSpace); -+ size_t src_len_capacity = 4096; -+ size_t hSize = HUF_readDTableX1_wksp(dtable, src, src_len_capacity, workSpace, wkspSize, 0); -+ if (HUF_isError(hSize)) -+ return hSize; -+ else -+ return hSize; -+} -+ -+size_t HUF_decompress_x4 (BYTE* dst, size_t dst_capacity, const void* src, size_t src_len, HUF_DTable* dtable) { -+ return HUF_decompress4X1_usingDTable_internal(dst, dst_capacity, src, src_len, dtable, 0); -+} -+ - #endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c -index 76d7332e..378204e1 100644 ---- a/lib/decompress/zstd_decompress_block.c -+++ b/lib/decompress/zstd_decompress_block.c -@@ -16,14 +16,14 @@ - *********************************************************/ - #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ - #include "../common/compiler.h" /* prefetch */ --#include "../common/cpu.h" /* bmi2 */ -+#include "../common/cpu.h" - #include "../common/mem.h" /* low level memory routines */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" - #include "../common/huf.h" - #include "../common/zstd_internal.h" -+#include "zstd_ddict.h" - #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ --#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ - #include "zstd_decompress_block.h" - #include "../common/bits.h" /* ZSTD_highbit32 */ - -@@ -361,7 +361,7 @@ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, - * - pretify output, report below, test with fuzzer to ensure it's correct */ - - /* Default FSE distribution table for Literal Lengths */ --static const ZSTD_seqSymbol LL_defaultDTable[(1<> 6); -+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*p_src >> 4) & 3); -+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*p_src >> 2) & 3); -+ p_src++; -+ -+ -+ { -+ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, -+ LLtype, MaxLL, LLFSELog, -+ p_src, p_src_end-p_src, -+ LL_base, LL_bits, -+ LL_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); -+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); -+ p_src += llhSize; -+ } -+ -+ { -+ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, -+ OFtype, MaxOff, OffFSELog, -+ p_src, p_src_end-p_src, -+ OF_base, OF_bits, -+ OF_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); -+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); -+ p_src += ofhSize; -+ } -+ -+ { -+ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, -+ MLtype, MaxML, MLFSELog, -+ p_src, p_src_end-p_src, -+ ML_base, ML_bits, -+ ML_defaultDTable, dctx->fseEntropy, -+ dctx->ddictIsCold, n_seq, -+ dctx->workspace, sizeof(dctx->workspace), -+ 0); -+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); -+ p_src += mlhSize; -+ } -+ -+ return 0; -+} -+ -+typedef struct { -+ size_t state; -+ const ZSTD_seqSymbol* table; -+} ZSTD_fseState; -+ -+typedef struct { -+ BIT_DStream_t DStream; -+ ZSTD_fseState stateLL; -+ ZSTD_fseState stateOffb; -+ ZSTD_fseState stateML; -+ size_t prevOffset[ZSTD_REP_NUM]; -+} seqState_t; -+ -+typedef struct { -+ size_t litLength; -+ size_t matchLength; -+ size_t offset; -+} seq_t; -+ -+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -+ -+extern void ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt); -+extern seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq); -+ -+size_t BPSF_decodeSeqs (ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, -+ int nbSeq, uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of) { -+ const BYTE* ip = (const BYTE*)seqStart; -+ const BYTE* const iend = ip + seqSize; -+ -+ size_t i_seq = 0; -+ if (nbSeq) { -+ seqState_t seqState; -+ dctx->fseEntropy = 1; -+ seqState.prevOffset[0] = 1; -+ seqState.prevOffset[1] = 4; -+ seqState.prevOffset[2] = 8; -+ -+ RETURN_ERROR_IF(ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), corruption_detected, ""); -+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); -+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); -+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); -+ -+ for ( ; nbSeq ; nbSeq--) { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState, 0, nbSeq==1); -+ p_ll[i_seq] = sequence.litLength; -+ p_ml[i_seq] = sequence.matchLength; -+ p_of[i_seq] = sequence.offset; -+ i_seq ++; -+ } -+ -+ -+ assert(nbSeq == 0); -+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); -+ } -+ -+ return 0; -+} -+ -+size_t BPSF_decodeSeqs_and_reconstruct(ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, -+ int nbSeq, uint8_t *p_dst, size_t max_dst_len, -+ size_t dict_size, size_t *reconstructed_size) { -+ const BYTE *p_dict = dctx->litPtr; -+ const BYTE *p_dict_end = dctx->litPtr + dict_size; -+ -+ const BYTE *p_lit = dctx->litPtr + dict_size; -+ const BYTE *p_lit_end = dctx->litPtr + dctx->litSize; -+ -+ BYTE *p_dst_start = p_dst; -+ BYTE *p_dst_limit = p_dst + max_dst_len; -+ -+ uint8_t backup [16]; -+ MEM_COPY16B(backup, p_dst_limit); -+ -+ S32 ll_state, of_state, ml_state; -+ U64 data; -+ -+ const BYTE* p_src = (const BYTE*)seqStart; -+ size_t src_len = seqSize; -+ -+ S32 prev_of[] = {1, 4, 8}; -+ -+ p_src += (src_len - 8); -+ -+ #define FSE_READMOVE0(t,b,n) { if(n) { t=b+(data>>(64-n)); data<<=n; } else {t=b;} } -+ #define FSE_READMOVE1(t,b,n) { t=b+(data>>(64-n)); data<<=n; } -+ -+ if (nbSeq) { -+ dctx->fseEntropy = 1; -+ -+ U8 ll_m_bits = ((const ZSTD_seqSymbol_header*)dctx->LLTptr)->tableLog; -+ U8 of_m_bits = ((const ZSTD_seqSymbol_header*)dctx->OFTptr)->tableLog; -+ U8 ml_m_bits = ((const ZSTD_seqSymbol_header*)dctx->MLTptr)->tableLog; -+ -+ const ZSTD_seqSymbol* ll_table = (dctx->LLTptr + 1); -+ const ZSTD_seqSymbol* of_table = (dctx->OFTptr + 1); -+ const ZSTD_seqSymbol* ml_table = (dctx->MLTptr + 1); -+ -+ data = (1 | (*(U64*)p_src)); -+ data <<= (8 - highbit_u9(p_src[7])); -+ -+ FSE_READMOVE0(ll_state, 0, ll_m_bits); -+ FSE_READMOVE0(of_state, 0, of_m_bits); -+ FSE_READMOVE0(ml_state, 0, ml_m_bits); -+ -+ for (int i_seq = 0; i_seq < nbSeq; ++i_seq) { -+ ZSTD_seqSymbol ll_item = ll_table[ll_state]; -+ ZSTD_seqSymbol of_item = of_table[of_state]; -+ ZSTD_seqSymbol ml_item = ml_table[ml_state]; -+ S32 of, ml, ll; -+ -+ { -+ int8_t c = trailbit_u64(data); -+ p_src -= (c>>3); -+ data = (1 | (*(U64*)p_src)); -+ data <<= (c&7); -+ } -+ -+ if (of_item.nbAdditionalBits > 1) { -+ FSE_READMOVE1(of, of_item.baseValue, of_item.nbAdditionalBits); -+ prev_of[2] = prev_of[1]; -+ prev_of[1] = prev_of[0]; -+ prev_of[0] = of; -+ } else { -+ U8 ll0 = (ll_item.baseValue == 0); -+ if (of_item.nbAdditionalBits == 0) { -+ of = prev_of[ll0]; -+ prev_of[1] = prev_of[!ll0]; -+ prev_of[0] = of; -+ } else { -+ FSE_READMOVE1(of, (of_item.baseValue+ll0), 1); -+ size_t temp = (of==3) ? prev_of[0] -1 : prev_of[of]; -+ temp -= !temp; -+ if (of != 1) prev_of[2] = prev_of[1]; -+ prev_of[1] = prev_of[0]; -+ prev_of[0] = of = temp; -+ } -+ } -+ -+ FSE_READMOVE0(ml, ml_item.baseValue, ml_item.nbAdditionalBits); -+ FSE_READMOVE0(ll, ll_item.baseValue, ll_item.nbAdditionalBits); -+ -+ if (UNLIKELY(of_item.nbAdditionalBits + ml_item.nbAdditionalBits + ll_item.nbAdditionalBits > 30)) { -+ int8_t c = trailbit_u64(data); -+ p_src -= (c>>3); -+ data = (1 | (*(U64*)p_src)); -+ data <<= (c&7); -+ } -+ -+ FSE_READMOVE0(ll_state, ll_item.nextState, ll_item.nbBits); -+ FSE_READMOVE0(ml_state, ml_item.nextState, ml_item.nbBits); -+ FSE_READMOVE0(of_state, of_item.nextState, of_item.nbBits); -+ -+ MEM_COPY16B(p_dst, p_lit); -+ -+ if (UNLIKELY(ll > 16)) { -+ MEM_COPY(p_dst + 16, p_lit + 16, ll - 16); -+ } -+ p_dst += ll; -+ p_lit += ll; -+ -+ -+ if (of > p_dst - p_dst_start) { -+ const U8 *dict_end = p_dict + dict_size; -+ const U8 *dict_match = p_dict_end - (of - (p_dst - p_dst_start)); -+ if (dict_match + ml <= dict_end) { -+ ZSTD_wildcopy(p_dst, dict_match, ml, ZSTD_overlap_src_before_dst); -+ } else { -+ size_t copy_from_dict = dict_end - dict_match; -+ ZSTD_wildcopy(p_dst, dict_match, copy_from_dict, ZSTD_overlap_src_before_dst); -+ ZSTD_wildcopy(p_dst + copy_from_dict, p_dst_start, ml - copy_from_dict, ZSTD_overlap_src_before_dst); -+ } -+ } else { -+ const U8 *p_match = p_dst - of; -+ if (LIKELY(of >= 16)) { -+ MEM_COPY(p_dst, p_match, ml); -+ } else if (UNLIKELY(of == 4)) { -+ MEM_SET_4B(p_dst, *(uint32_t*)p_match, ml); -+ } else if (UNLIKELY(of == 2)) { -+ MEM_SET_2B(p_dst, *(uint16_t*)p_match, ml); -+ } else if (UNLIKELY(of == 1)) { -+ MEM_SET_1B(p_dst, *p_match, ml); -+ } else { -+ U8 *op = p_dst; -+ ZSTD_overlapCopy8(&op, &p_match, of); -+ if (ml > 8) { -+ ZSTD_wildcopy(op, p_match, (ptrdiff_t)ml - 8, ZSTD_overlap_src_before_dst); -+ } -+ } -+ } -+ p_dst += ml; -+ } -+ } -+ -+ { -+ size_t n_last_lit = p_lit_end - p_lit; -+ MEM_COPY(p_dst, p_lit, n_last_lit); -+ p_dst += n_last_lit; -+ } -+ -+ MEM_COPY16B(p_dst_limit, backup); -+ *reconstructed_size = p_dst - p_dst_start; -+ return 0; -+} -diff --git a/lib/bpsf.h b/lib/bpsf.h -new file mode 100644 -index 00000000..b2c2e84f ---- /dev/null -+++ b/lib/bpsf.h -@@ -0,0 +1,58 @@ -+/* -+* 版权所有 (c) 华为技术有限公司 2025 -+*/ -+#ifndef BPSF_BPSF_H -+#define BPSF_BPSF_H -+ -+#include -+#include "zstd.h" -+#include "compress/zstd_compress.h" -+ -+// LZ77 -+ZSTD_CCtx* BPSF_getCCtx(void); -+size_t BPSF_freeCCtx(ZSTD_CCtx *p_cctx); -+ZSTD_DCtx* BPSF_getDCtx(void); -+size_t BPSF_freeDCtx(ZSTD_DCtx *p_cctx); -+ -+ZSTD_parameters BPSF_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ -+void BPSF_init_CCtxParams(ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params, int compressionLevel); -+ -+void BPSF_compressBegin(ZSTD_CCtx* cctx, const uint8_t* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, -+ ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, -+ U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff); -+ -+void BPSF_getSeqStore(ZSTD_CCtx* zc, const uint8_t* src, size_t srcSize); -+ -+U32 BPSF_update_window(ZSTD_window_t* window, const uint8_t* src, size_t srcSize, int forceNonContiguous); -+ -+// Huffman -+size_t BPSF_build_HUFTable(uint8_t* dst, size_t dst_capacity, const uint8_t* src, size_t srcSize, HUF_CElt* CTable); -+ -+size_t BPSF_loadHUFTable(const uint8_t* src, HUF_DTable* dtable); -+ -+// FSE encode -+ZSTD_symbolEncodingTypeStats_t BPSF_buildSeqsStats(const seqStore_t *seqStorePtr, size_t nbSeq, const ZSTD_fseCTables_t *prevEntropy, -+ ZSTD_fseCTables_t *nextEntropy, uint8_t *dst, const uint8_t* dstEnd, ZSTD_strategy strategy, -+ unsigned *countWorkspace, uint8_t *entropyWorkspace, size_t entropyWkspSize); -+ -+size_t BPSF_encodeSeqs(uint8_t* dst, size_t dstCapacity, -+ const FSE_CTable* CTable_MatchLength, const uint8_t* mlCodeTable, -+ const FSE_CTable* CTable_OffsetBits, const uint8_t* ofCodeTable, -+ const FSE_CTable* CTable_LitLength, const uint8_t* llCodeTable, -+ const seqDef* sequences, size_t nbSeq, int longOffsets, int bmi2 -+); -+ -+// FSE decode -+size_t BPSF_decodeSeqTable(ZSTD_DCtx* dctx, size_t n_seq, const uint8_t* p_src); -+ -+size_t BPSF_decodeSeqs(ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, -+ int nbSeq, uint16_t *p_ll, uint16_t *p_ml, uint16_t *p_of); -+ -+void ZSTD_setLiteralDict(ZSTD_DCtx* dctx, const uint8_t* litPtr, size_t litSize); -+ -+size_t BPSF_decodeSeqs_and_reconstruct(ZSTD_DCtx* dctx, const uint8_t* seqStart, size_t seqSize, -+ int nbSeq, uint8_t *p_dst, size_t max_dst_len, -+ size_t dict_size, size_t *reconstructed_size); -+ -+#endif // BPSF_BPSF_H -\ No newline at end of file -diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c -index ea000723..2b7d4c21 100644 ---- a/lib/compress/huf_compress.c -+++ b/lib/compress/huf_compress.c -@@ -1326,6 +1326,42 @@ unsigned HUF_optimalTableLog( - } - } - -+size_t HUF_compress_4x (BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, const HUF_CElt* CTable) { -+ HUF_compress4X_usingCTable_internal(dst, dst_capacity, src, srcSize, CTable, 0); -+} -+ -+size_t HUF_build_table (BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, HUF_CElt* CTable) { -+ unsigned huffLog = 11; -+ uint8_t workSpace [8192]; -+ size_t wkspSize = sizeof(workSpace); -+ -+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); -+ BYTE* const dst_limit = dst + dst_capacity; -+ -+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); -+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); -+ -+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); -+ if (!srcSize) return 0; -+ if (!dst_capacity) return 0; -+ if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); -+ if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); -+ if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; -+ -+ unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; -+ -+ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp))); -+ -+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), CTable, table->count, 0); -+ -+ size_t const maxBits = HUF_buildCTable_wksp(CTable, table->count, maxSymbolValue, huffLog, &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); -+ CHECK_F(maxBits); -+ huffLog = (U32)maxBits; -+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits((CTable+1), maxSymbolValue+1)); -+ -+ return HUF_writeCTable_wksp(dst, dst_capacity, CTable, maxSymbolValue, huffLog, &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)); -+} -+ - /* HUF_compress_internal() : - * `workSpace_align4` must be aligned on 4-bytes boundaries, - * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ -@@ -1375,7 +1411,9 @@ HUF_compress_internal (void* dst, size_t dstSize, - CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); - largestTotal += largestEnd; - } -- if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ -+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) { -+ return 0; /* heuristic : probably not compressible enough */ -+ } - } - - /* Scan input and build symbol stats */ -@@ -1386,9 +1424,7 @@ HUF_compress_internal (void* dst, size_t dstSize, - DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); - - /* Check validity of previous table */ -- if ( repeat -- && *repeat == HUF_repeat_check -- && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { -+ if ( repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ -@@ -1409,8 +1445,7 @@ HUF_compress_internal (void* dst, size_t dstSize, - } - - /* Write table description header */ -- { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, -- &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); -+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); - /* Check if using previous huffman table is beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); -diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c -index 9284e2a4..482cab91 100644 ---- a/lib/compress/zstd_compress.c -+++ b/lib/compress/zstd_compress.c -@@ -27,6 +27,7 @@ - #include "zstd_opt.h" - #include "zstd_ldm.h" - #include "zstd_compress_superblock.h" -+#include "zstd_compress.h" - #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */ - - /* *************************************************************** -@@ -376,7 +377,7 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) - * Initializes `cctxParams` from `params` and `compressionLevel`. - * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. - */ --static void -+void - ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, - const ZSTD_parameters* params, - int compressionLevel) -@@ -1610,7 +1611,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, - } - - static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); --static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); - - static void ZSTD_overrideCParams( - ZSTD_compressionParameters* cParams, -@@ -2728,18 +2729,6 @@ static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) - return (cctxParams->useBlockSplitter == ZSTD_ps_enable); - } - --/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types -- * and size of the sequences statistics -- */ --typedef struct { -- U32 LLtype; -- U32 Offtype; -- U32 MLtype; -- size_t size; -- size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ -- int longOffsets; --} ZSTD_symbolEncodingTypeStats_t; -- - /* ZSTD_buildSequencesStatistics(): - * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. - * Modifies `nextEntropy` to have the appropriate values as a side effect. -@@ -2747,7 +2736,7 @@ typedef struct { - * - * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) - */ --static ZSTD_symbolEncodingTypeStats_t -+ZSTD_symbolEncodingTypeStats_t - ZSTD_buildSequencesStatistics( - const seqStore_t* seqStorePtr, size_t nbSeq, - const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, -@@ -3199,7 +3188,7 @@ static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seq - - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; - --static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) -+size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - { - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); -@@ -3676,7 +3665,7 @@ ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, - * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, - * and updates nextEntropy to the appropriate repeatMode. - */ --static ZSTD_symbolEncodingTypeStats_t -+ZSTD_symbolEncodingTypeStats_t - ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) - { - ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; -@@ -5110,7 +5099,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - /*! ZSTD_compressBegin_internal() : - * Assumption : either @dict OR @cdict (or none) is non-NULL, never both - * @return : 0, or an error code */ --static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, -+size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, -@@ -7107,7 +7096,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ --static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { -+ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); -diff --git a/lib/compress/zstd_compress.h b/lib/compress/zstd_compress.h -new file mode 100644 -index 00000000..2ec00ce2 ---- /dev/null -+++ b/lib/compress/zstd_compress.h -@@ -0,0 +1,44 @@ -+#include "../common/allocations.h" -+#include "../common/zstd_deps.h" -+#include "../common/mem.h" -+#include "hist.h" -+#include "../common/fse.h" -+#include "../common/huf.h" -+#include "zstd_compress_internal.h" -+#include "zstd_compress_sequences.h" -+#include "zstd_compress_literals.h" -+#include "zstd_fast.h" -+#include "zstd_double_fast.h" -+#include "zstd_lazy.h" -+#include "zstd_opt.h" -+#include "zstd_ldm.h" -+#include "zstd_compress_superblock.h" -+#ifndef BPSF_ZSTD_COMPRESS_H -+#define BPSF_ZSTD_COMPRESS_H -+ -+typedef struct { -+ U32 LLtype; -+ U32 Offtype; -+ U32 MLtype; -+ size_t size; -+ size_t lastCountSize; -+ int longOffsets; -+} ZSTD_symbolEncodingTypeStats_t; -+ -+ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ -+size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize); -+ -+size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, -+ const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff); -+ -+void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params, int compressionLevel); -+ -+ZSTD_symbolEncodingTypeStats_t ZSTD_buildSequencesStatistics(const seqStore_t* seqStorePtr, size_t nbSeq, const ZSTD_fseCTables_t* prevEntropy, -+ ZSTD_fseCTables_t* nextEntropy, BYTE* dst, const BYTE* const dstEnd, ZSTD_strategy strategy, -+ unsigned* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize); -+ -+size_t HUF_load_table(const BYTE* src, HUF_DTable* dtable); -+ -+size_t HUF_build_table(BYTE* dst, size_t dst_capacity, const BYTE* src, size_t srcSize, HUF_CElt* ctable); -+#endif // BPSF_ZSTD_COMPRESS_H -\ No newline at end of file -diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c -index f85dd0be..ea6d2cf1 100644 ---- a/lib/decompress/huf_decompress.c -+++ b/lib/decompress/huf_decompress.c -@@ -24,6 +24,7 @@ - #include "../common/zstd_internal.h" - #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ - -+#define OPTIMIZE_HUF_TABLE_COPY 1 - /* ************************************************************** - * Constants - ****************************************************************/ -@@ -382,6 +383,12 @@ typedef struct { - BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; - } HUF_ReadDTableX1_Workspace; - -+#if OPTIMIZE_HUF_TABLE_COPY -+static U16 HUF_DEltX1_set1 (BYTE symbol, BYTE nbBits) { -+ U16 D = ((U16)(symbol << 8) + nbBits); -+ return D; -+} -+ - size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) - { - U32 tableLog = 0; -@@ -406,7 +413,131 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize - U32 const maxTableLog = dtd.maxTableLog + 1; - U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); - tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); -- if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ -+ // if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ -+ dtd.tableType = 0; -+ dtd.tableLog = (BYTE)tableLog; -+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); -+ } -+ -+ { -+ int n; -+ int nextRankStart = 0; -+ int const unroll = 4; -+ int const nLimit = (int)nbSymbols - unroll + 1; -+ for (n=0; n<(int)tableLog+1; n++) { -+ U32 const curr = nextRankStart; -+ nextRankStart += wksp->rankVal[n]; -+ wksp->rankStart[n] = curr; -+ } -+ for (n=0; n < nLimit; n += unroll) { -+ int u; -+ for (u=0; u < unroll; ++u) { -+ size_t const w = wksp->huffWeight[n+u]; -+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); -+ } -+ } -+ for (; n < (int)nbSymbols; ++n) { -+ size_t const w = wksp->huffWeight[n]; -+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; -+ } -+ } -+ -+ /* fill DTable -+ * We fill all entries of each weight in order. -+ * That way length is a constant for each iteration of the outer loop. -+ * We can switch based on the length to a different inner loop which is -+ * optimized for that particular case. -+ */ -+ { -+ U32 w; -+ int symbol = wksp->rankVal[0]; -+ int rankStart = 0; -+ for (w=1; wrankVal[w]; -+ int const length = (1 << w) >> 1; -+ int uStart = rankStart; -+ BYTE const nbBits = (BYTE)(tableLog + 1 - w); -+ int s; -+ switch (length) { -+ case 1: -+ for (s=0; ssymbols[symbol + s]; -+ D.nbBits = nbBits; -+ dt[uStart] = D; -+ uStart += 1; -+ } -+ break; -+ case 2: -+ for (s=0; ssymbols[symbol + s]; -+ D.nbBits = nbBits; -+ dt[uStart+0] = D; -+ dt[uStart+1] = D; -+ uStart += 2; -+ } -+ break; -+ case 4: -+ for (s=0; ssymbols[symbol + s], nbBits); -+ U16 DH = HUF_DEltX1_set1(wksp->symbols[symbol + s + 1], nbBits); -+ vst1q_u16((U16*)(dt+uStart), vcombine_u16(vdup_n_u16(DL), vdup_n_u16(DH))); -+ uStart += 8; -+ } -+ break; -+ case 8: -+ for (s=0; ssymbols[symbol + s], nbBits); -+ vst1q_u16((U16*)(dt+uStart), vdupq_n_u16(D1)); -+ uStart += 8; -+ } -+ break; -+ default: -+ for (s=0; ssymbols[symbol + s], nbBits); -+ uint16x8_t vecD8 = vdupq_n_u16(D1); -+ for (int u=0; u= sizeof(*wksp)); -+ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); -+ -+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); -+ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ -+ -+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); -+ if (HUF_isError(iSize)) return iSize; -+ -+ -+ /* Table header */ -+ { DTableDesc dtd = HUF_getDTableDesc(DTable); -+ U32 const maxTableLog = dtd.maxTableLog + 1; -+ U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); -+ tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); -+ // if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); -@@ -517,6 +648,7 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize - } - return iSize; - } -+#endif - - FORCE_INLINE_TEMPLATE BYTE - HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) -@@ -941,6 +1073,21 @@ static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t ds - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - -+size_t HUF_load_table (const BYTE* src, HUF_DTable* dtable) { -+ uint8_t workSpace [16384]; -+ size_t wkspSize = sizeof(workSpace); -+ size_t src_len_capacity = 4096; -+ size_t hSize = HUF_readDTableX1_wksp(dtable, src, src_len_capacity, workSpace, wkspSize, 0); -+ if (HUF_isError(hSize)) -+ return hSize; -+ else -+ return hSize; -+} -+ -+size_t HUF_decompress_x4 (BYTE* dst, size_t dst_capacity, const void* src, size_t src_len, HUF_DTable* dtable) { -+ return HUF_decompress4X1_usingDTable_internal(dst, dst_capacity, src, src_len, dtable, 0); -+} -+ - #endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c -index 309ec0d0..199c145d 100644 ---- a/lib/decompress/zstd_ddict.c -+++ b/lib/decompress/zstd_ddict.c -@@ -242,3 +242,8 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) - if (ddict==NULL) return 0; - return ddict->dictID; - } -+ -+void ZSTD_setLiteralDict(ZSTD_DCtx* dctx, BYTE const* litPtr, size_t litSize) { -+ dctx->litPtr = litPtr; -+ dctx->litSize = litSize; -+} -\ No newline at end of file -diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c -index 76d7332e..378204e1 100644 ---- a/lib/decompress/zstd_decompress_block.c -+++ b/lib/decompress/zstd_decompress_block.c -@@ -16,14 +16,14 @@ - *********************************************************/ - #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ - #include "../common/compiler.h" /* prefetch */ --#include "../common/cpu.h" /* bmi2 */ -+#include "../common/cpu.h" - #include "../common/mem.h" /* low level memory routines */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" - #include "../common/huf.h" - #include "../common/zstd_internal.h" -+#include "zstd_ddict.h" - #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ --#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ - #include "zstd_decompress_block.h" - #include "../common/bits.h" /* ZSTD_highbit32 */ - -@@ -361,7 +361,7 @@ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, - * - pretify output, report below, test with fuzzer to ensure it's correct */ - - /* Default FSE distribution table for Literal Lengths */ --static const ZSTD_seqSymbol LL_defaultDTable[(1< -+#include -+#include "decompress/zstd_decompress_block.h" -+ -+static inline void MEM_COPY16B (uint8_t *p_dst, const uint8_t *p_src) { -+ vst1q_u8(p_dst, vld1q_u8(p_src)); -+} -+ -+static inline void MEM_COPY (uint8_t *p_dst, const uint8_t *p_src, int len) { -+ do { -+ vst1q_u8(p_dst, vld1q_u8(p_src)); -+ p_dst += 16; -+ p_src += 16; -+ len -= 16; -+ } while (len > 0); -+} -+ -+static inline void MEM_SET_1B (uint8_t *p_dst, const uint8_t value, int len) { -+ uint8x16_t vec_data = vdupq_n_u8(value); -+ do { -+ vst1q_u8(p_dst, vec_data); -+ p_dst += 16; -+ len -= 16; -+ } while (len > 0); -+} -+ -+static inline void MEM_SET_2B (uint8_t *p_dst, const uint16_t value, int len) { -+ uint16x8_t vec_data = vdupq_n_u16(value); -+ do { -+ vst1q_u16((uint16_t*)p_dst, vec_data); -+ p_dst += 16; -+ len -= 16; -+ } while (len > 0); -+} -+ -+static inline void MEM_SET_4B (uint8_t *p_dst, const uint32_t value, int len) { -+ uint32x4_t vec_data = vdupq_n_u32(value); -+ do { -+ vst1q_u32((uint32_t*)p_dst, vec_data); -+ p_dst += 16; -+ len -= 16; -+ } while (len > 0); -+} -+ -+static inline void MEM_LZ_MOVE (uint8_t *p_dst, uint8_t *p_match, int32_t ml, int32_t of) { -+ uint8x16_t vec_data = vld1q_u8(p_match); -+ do { -+ vst1q_u8(p_dst, vec_data); -+ p_dst += of; -+ ml -= of; -+ } while (ml > 0); -+} -+ -+static inline int8_t trailbit_u64 (uint64_t val) { -+ return (int8_t)__builtin_ctzll(val); -+} -+ -+static inline int8_t highbit_u9 (uint16_t x) { -+ return 31 - __builtin_clz((uint32_t)x); -+} -+ -+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } -+ -+static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { -+ assert(*ip <= *op); -+ if (offset < 8) { -+ /* close range match, overlap */ -+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; -+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9, 10, 11}; -+ int const sub2 = dec64table[offset]; -+ (*op)[0] = (*ip)[0]; -+ (*op)[1] = (*ip)[1]; -+ (*op)[2] = (*ip)[2]; -+ (*op)[3] = (*ip)[3]; -+ *ip += dec32table[offset]; -+ ZSTD_copy4(*op+4, *ip); -+ *ip -= sub2; -+ } else { -+ ZSTD_copy8(*op, *ip); -+ } -+ *ip += 8; -+ *op += 8; -+ assert(*op - *ip >= 8); -+} -+ -+#endif // MEM_H -\ No newline at end of file diff --git a/ksal_bpsf.spec b/ksal_bpsf.spec deleted file mode 100644 index 5e7c8be2..00000000 --- a/ksal_bpsf.spec +++ /dev/null @@ -1,23 +0,0 @@ -# os_type -%{!?os_type: %define os_type openEuler} - -Name: ksal_bpsf%{?version_suffix:_debug} -Version: 1.0.0 -Release: %{os_type} -Summary: ksal bpsf compress -License: Commercial - -%description -To obtain bpsf rpm - -%install -mkdir -p %{buildroot}/usr/lib64 -mkdir -p %{buildroot}/usr/include -cp %{_builddir}/*.so %{buildroot}/usr/lib64 -cp %{_builddir}/*.h %{buildroot}/usr/include -ln -sf /usr/lib64/libksal_bpsf.so %{buildroot}/usr/lib64/ksal_bpsf.so.1 - -%files -/usr/lib64/*.so -/usr/lib64/*.so.1 -/usr/include/*.h \ No newline at end of file diff --git a/libksal_bpsf_zstd_so_create.sh b/libksal_bpsf_zstd_so_create.sh deleted file mode 100644 index 6200647c..00000000 --- a/libksal_bpsf_zstd_so_create.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash -set -e - -execsuccess=0 -execfailed=1 - -# release/debug -version_type=$1 - -if [ "$version_type" = "debug" ]; then - echo "[ --- debug build in progress ---]" -else - echo "[ --- release build in progress ---]" -fi - -packagename="ksal_bpsf" -packagever=$(cat ./ksal_bpsf.spec | grep 'Version:' | head -1 | awk '{print $2}') - -curd=$(pwd) -echo "Current dir: $curd" - -static_library=./usr/lib64/libksal_bpsf.a -static_library_debug=./usr/lib64/libksal_bpsf_debug.a -include_file=./usr/include/ksal/ksal_bpsf.h -include_log_file=./usr/include/ksal/bpsf_log.h - -if [ "$version_type" = "debug" ]; then - rpm2cpio libksal-release-1.11.0.oe1.aarch64.rpm | cpio -idv $static_library_debug - rpm2cpio libksal-release-1.11.0.oe1.aarch64.rpm | cpio -idv $include_file - rpm2cpio libksal-release-1.11.0.oe1.aarch64.rpm | cpio -idv $include_log_file - cp $static_library_debug ./ - cp $include_file ./ - cp $include_log_file ./ -else - rpm2cpio libksal-release-1.11.0.oe1.aarch64.rpm | cpio -idv $static_library - rpm2cpio libksal-release-1.11.0.oe1.aarch64.rpm | cpio -idv $include_file - rpm2cpio libksal-release-1.11.0.oe1.aarch64.rpm | cpio -idv $include_log_file - cp $static_library ./ - cp $include_file ./ - cp $include_log_file ./ -fi - -tar -zxf "zstd-1.5.6.tar.gz" -cp ksal-bpsf-zstd.patch zstd-1.5.6/ -cd zstd-1.5.6 -patch -p1 < ksal-bpsf-zstd.patch -cd .. - -if [ "$version_type" = "debug" ]; then - make BUILD_TYPE=debug -else - make -fi - -rm -rf ./zstd-1.5.6 -rm -rf ./usr -rm -rf libksal_bpsf.a -rm -rf libksal_bpsf_debug.a - -function initialize() -{ - if [[ -d "$curd/rpmbuild" ]]; then - rm -rf $curd/rpmbuild - fi - mkdir -p $curd/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS,BUILDROOT} - - mkdir -p $curd/ksal_bpsf - cp -rf $curd/ksal_bpsf.h $curd/ksal_bpsf/ - cp -rf $curd/bpsf_log.h $curd/ksal_bpsf/ - cp -rf $curd/libksal_bpsf.so $curd/ksal_bpsf/ - - cp -rf $curd/ksal_bpsf/* $curd/rpmbuild/BUILD/ - cp $curd/ksal_bpsf.spec $curd/rpmbuild/SPECS/ - - rm -rf $curd/ksal_bpsf/ - rm -rf $curd/libksal_bpsf.so - rm -rf $curd/ksal_bpsf.h - rm -rf $curd/bpsf_log.h -} - -function pack_binary() -{ - local rpm_name="ksal_bpsf" - cd "$curd/rpmbuild/SPECS" || { echo "enter SPECS dir failed"; exit $execfailed; } - - if [ "$version_type" = "debug" ]; then - rpmbuild -bb --define "version_suffix debug" "ksal_bpsf.spec" - rpm_name="ksal_bpsf_debug" - else - rpmbuild -bb "ksal_bpsf.spec" - fi - - if [ $? -ne $execsuccess ]; then - echo "rpmbuild failed" - exit $execfailed - fi - - if find "$curd/rpmbuild/RPMS/aarch64/" -name "$rpm_name-$packagever"*.aarch64.rpm | grep -q .; then - echo "rpmbuild success" - else - echo "RPM not found" - exit $execfailed - fi -} - -initialize -pack_binary \ No newline at end of file -- Gitee