diff --git a/.gitignore b/.gitignore index faadc624fc368fd8cdef1fc1e805b520b91af063..1c81985c1637d9dce733190d53596bd2ffa3794f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .vscode kae_build KAEZlib/open_source/zlib-1.2.11 -open_source +# open_source # *Test* *.so.* *.a diff --git a/KAESnappy/open_source/kaesnappy_1_10_0.patch b/KAESnappy/open_source/kaesnappy_1_10_0.patch new file mode 100644 index 0000000000000000000000000000000000000000..ac626801909bd0e35b23d8a714278af979a9f042 --- /dev/null +++ b/KAESnappy/open_source/kaesnappy_1_10_0.patch @@ -0,0 +1,486 @@ +Binary files snappy-1.1.10/.vscode/.cache/clangd/wecode-cpp.db and snappy-1.1.10-KAE/.vscode/.cache/clangd/wecode-cpp.db differ +Binary files snappy-1.1.10/.vscode/tags-34.wecode-db and snappy-1.1.10-KAE/.vscode/tags-34.wecode-db differ +Binary files snappy-1.1.10/.vscode/tags-34.wecode-lock and snappy-1.1.10-KAE/.vscode/tags-34.wecode-lock differ +diff -ruN snappy-1.1.10/CMakeLists.txt snappy-1.1.10-KAE/CMakeLists.txt +--- snappy-1.1.10/CMakeLists.txt 2023-03-09 07:44:00.000000000 +0800 ++++ snappy-1.1.10-KAE/CMakeLists.txt 2025-08-30 18:09:22.616282200 +0800 +@@ -84,7 +84,7 @@ + + # BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make + # it prominent in the GUI. +-option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) ++option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." ON) + + option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) + +@@ -236,6 +236,25 @@ + "${PROJECT_BINARY_DIR}/snappy-stubs-public.h") + + add_library(snappy "") ++ ++#add_compile_options(-g) ++ ++#add_definitions("-Wall -g") ++ ++target_include_directories(snappy PRIVATE ++ /usr/local/kaesnappy/include ++) ++target_compile_definitions(snappy PRIVATE CONF_KAESNAPPY) ++ ++target_link_directories(snappy PRIVATE ++ /usr/local/kaesnappy/lib ++) ++ ++set_property(TARGET snappy ++ APPEND PROPERTY INSTALL_RPATH ++ /usr/local/kaesnappy/lib;/usr/local/lib) ++target_link_libraries(snappy PRIVATE :libkaelz4.so.2.0.4) ++ + target_sources(snappy + PRIVATE + "snappy-internal.h" +@@ -328,6 +347,19 @@ + PRIVATE + "snappy_unittest.cc" + ) ++ ++ target_compile_definitions(snappy_unittest PRIVATE CONF_KAESNAPPY) ++ target_link_libraries(snappy_unittest snappy_test_support libkaelz4.so.2.0.4) ++ target_include_directories(snappy_unittest PRIVATE ++ /usr/local/kaesnappy/include ++ ) ++ target_link_directories(snappy_unittest PRIVATE ++ /usr/local/kaesnappy/lib ++ ) ++ set_property(TARGET snappy_unittest ++ APPEND PROPERTY INSTALL_RPATH ++ /usr/local/kaesnappy/lib;/usr/local/lib) ++ + target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest) + + add_test( +@@ -340,7 +372,18 @@ + PRIVATE + "snappy_test_tool.cc" + ) +- target_link_libraries(snappy_test_tool snappy_test_support) ++ ++ target_compile_definitions(snappy_test_tool PRIVATE CONF_KAESNAPPY) ++ target_link_libraries(snappy_test_tool snappy_test_support libkaelz4.so.2.0.4) ++ target_include_directories(snappy_test_tool PRIVATE ++ /usr/local/kaesnappy/include ++ ) ++ target_link_directories(snappy_test_tool PRIVATE ++ /usr/local/kaesnappy/lib ++ ) ++ set_property(TARGET snappy_test_tool ++ APPEND PROPERTY INSTALL_RPATH ++ /usr/local/kaesnappy/lib;/usr/local/lib) + endif(SNAPPY_BUILD_TESTS) + + if(SNAPPY_BUILD_BENCHMARKS) +@@ -351,6 +394,18 @@ + ) + target_link_libraries(snappy_benchmark snappy_test_support benchmark_main) + ++ target_compile_definitions(snappy_benchmark PRIVATE CONF_KAESNAPPY) ++ target_link_libraries(snappy_benchmark snappy_test_support libkaelz4.so.2.0.4) ++ target_include_directories(snappy_benchmark PRIVATE ++ /usr/local/kaesnappy/include ++ ) ++ target_link_directories(snappy_benchmark PRIVATE ++ /usr/local/kaesnappy/lib ++ ) ++ set_property(TARGET snappy_benchmark ++ APPEND PROPERTY INSTALL_RPATH ++ /usr/local/kaesnappy/lib;/usr/local/lib) ++ + # This project uses Google benchmark for benchmarking. + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) + set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE) +diff -ruN snappy-1.1.10/lz4_accelerater.h snappy-1.1.10-KAE/lz4_accelerater.h +--- snappy-1.1.10/lz4_accelerater.h 1970-01-01 08:00:00.000000000 +0800 ++++ snappy-1.1.10-KAE/lz4_accelerater.h 2025-08-30 18:09:22.619464100 +0800 +@@ -0,0 +1,131 @@ ++/* * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. ++ * Create: 2023-04 ++ */ ++ ++#ifndef LZ4_ACCELERATER_H ++#define LZ4_ACCELERATER_H ++ ++#include ++#include "stddef.h" ++#define NO_PREFETCH ++#define RAW_LZ4 ++ ++#define G_KZLPRIME5BYTES (889523592379ULL) ++#define G_KZLPRIME8BYTES (11400714785074694791ULL) ++ ++#ifndef KZL_FORCE_INLINE ++# ifdef _MSC_VER ++# define KZL_FORCE_INLINE static __forceinline ++# else ++# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ ++# ifdef __GNUC__ ++# define KZL_FORCE_INLINE static inline __attribute__((always_inline)) ++# else ++# define KZL_FORCE_INLINE static inline ++# endif ++# else ++# define KZL_FORCE_INLINE static ++# endif ++# endif ++#endif ++ ++#if defined(__GNUC__) && (__GNUC__ >= 4) ++# define KZL_MEMCPY_2(dst, src, size) __builtin_memcpy(dst, src, size) ++# define KZL_MEMCPY_4(dst, src, size) __builtin_memcpy(dst, src, size) ++# define KZL_MEMCPY_8(dst, src, size) vst1_u8((dst), vld1_u8(src)) ++# define KZL_MEMCPY_16(dst, src, size) vst1q_u8((dst), vld1q_u8(src)) ++# define KZL_MEMCPY_32(dst, src, size) vst1q_u8((dst), vld1q_u8(src)); vst1q_u8(((dst)+16), vld1q_u8(((src)+16))) ++# define KZL_MEMCPY_16X1(dst, src, size) vst1q_u64((dst), vld1q_u64(src)) ++# define KZL_MEMCPY_32X1(dst, src, size) vst1q_u64((dst), vld1q_u64(src)); \ ++ vst1q_u64(((dst)+16), vld1q_u64(((src)+16))) ++#endif ++ ++/* prefetch 321 ++ * can be disabled, by declaring NO_PREFETCH build macro */ ++#if defined(NO_PREFETCH) ++# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ ++# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ ++# define PREFETCH_L3(ptr) (void)(ptr) /* disabled */ ++#else ++# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ ++# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ ++# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) ++# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) ++# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T2) ++# elif defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) ++# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 1 /* rw==write */, 3 /* locality */) ++# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 1 /* rw==write */, 2 /* locality */) ++# define PREFETCH_L3(ptr) __builtin_prefetch((ptr), 1 /* rw==write */, 1 /* locality */) ++# elif defined(__aarch64__) ++# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) ++# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) ++# define PREFETCH_L3(ptr) __asm__ __volatile__("prfm pldl3keep, %0" ::"Q"(*(ptr))) ++# else ++# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ ++# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ ++# define PREFETCH_L3(ptr) (void)(ptr) /* disabled */ ++# endif ++#endif /* NO_PREFETCH */ ++ ++#define L1_CACHELINE_SIZE 64 ++#define L2_CACHELINE_SIZE 64 ++#define L3_CACHELINE_SIZE 128 ++ ++#define PREFETCH_AREA_L1(p, s) { \ ++ const char* const ptr = (const char*)(p); \ ++ size_t const size = (size_t)(s); \ ++ size_t pos; \ ++ for (pos=0; pos> (HASH_SIZE - HashLogUsage)); ++} ++ ++KZL_FORCE_INLINE uint32_t KZL_BigEndianfastHash5(uint64_t sequence, uint8_t HashLogUsage) ++{ ++ // for data > 64KB ++ return (uint32_t)(((sequence >> SEQUENCE_MOVE) * G_KZLPRIME8BYTES) >> (HASH_SIZE - HashLogUsage)); ++} ++ ++void accerlerater(int srcSize, uint8_t *acceleration); ++void skipTrigger(int srcSize, uint8_t *skipStep); ++void PrefetchCpuCacheArea(const void* p, size_t s, CpuCacheType cacheType); ++ ++int PlatformIsSupport(void); ++ ++#endif +diff -ruN snappy-1.1.10/snappy-internal.h snappy-1.1.10-KAE/snappy-internal.h +--- snappy-1.1.10/snappy-internal.h 2023-03-09 07:44:00.000000000 +0800 ++++ snappy-1.1.10-KAE/snappy-internal.h 2025-08-30 18:09:22.619464100 +0800 +@@ -40,6 +40,13 @@ + #include + #endif + ++#ifdef CONF_KAESNAPPY ++extern "C" { ++ #include "kaelz4.h" ++ #include "kaelz4_log.h" ++} ++#endif ++ + #if SNAPPY_HAVE_NEON + #include + #endif +@@ -152,7 +159,8 @@ + size_t input_length, + char* op, + uint16_t* table, +- const int table_size); ++ const int table_size, ++ LZ4_CCtx* cctx); + + // Find the largest n such that + // +diff -ruN snappy-1.1.10/snappy.cc snappy-1.1.10-KAE/snappy.cc +--- snappy-1.1.10/snappy.cc 2023-03-09 07:44:00.000000000 +0800 ++++ snappy-1.1.10-KAE/snappy.cc 2025-08-30 18:09:22.621458000 +0800 +@@ -74,6 +74,14 @@ + #define SNAPPY_PREFETCH(ptr) (void)(ptr) + #endif + ++#ifdef CONF_KAESNAPPY ++extern "C" { ++ #include "kaelz4.h" ++ #include "kaelz4_log.h" ++ #include "lz4_accelerater.h" ++} ++#endif ++ + #include + #include + #include +@@ -617,6 +625,7 @@ + *op++ = LITERAL | (n << 2); + + UnalignedCopy128(literal, op); ++ // KZL_MEMCPY_16((unsigned char*)(op), (unsigned char*)(literal), len); + return op + len; + } + +@@ -637,11 +646,12 @@ + } + // When allow_fast_path is true, we can overwrite up to 16 bytes. + if (allow_fast_path) { +- char* destination = op; +- const char* source = literal; +- const char* end = destination + len; ++ unsigned char* destination = (unsigned char*)op; ++ unsigned char* source = (unsigned char*)literal; ++ unsigned char* end = destination + len; + do { + std::memcpy(destination, source, 16); ++ // KZL_MEMCPY_16(destination, source, 16); + destination += 16; + source += 16; + } while (destination < end); +@@ -777,7 +787,7 @@ + // "end - op" is the compressed size of "input". + namespace internal { + char* CompressFragment(const char* input, size_t input_size, char* op, +- uint16_t* table, const int table_size) { ++ uint16_t* table, const int table_size, LZ4_CCtx* cctx) { + // "ip" is the input pointer, and "op" is the output pointer. + const char* ip = input; + assert(input_size <= kBlockSize); +@@ -787,6 +797,85 @@ + const char* base_ip = ip; + + const size_t kInputMarginBytes = 15; ++ ++#ifdef CONF_KAESNAPPY ++ if (cctx == NULL) { ++ goto _softVersion; ++ } ++ // 硬件压缩 ++ ++ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { ++ // 硬件压缩 ++ LZ4_CCtx& ctxBody = *cctx; ++ int result = kaelz4_compress(&ctxBody, input, input_size); ++ if (result) { ++ printf("Snappy kaealz4_compress failed\n"); ++ return NULL; ++ } ++ ++ // 回收三元组进行软拼接 ++ uint32_t sequenceCount = 0; ++ uint32_t tempLiteralLength = 0; ++ ++ uint32_t offBase = 0; ++ uint32_t litLength = 0; ++ uint16_t mlBase = 0; ++ uint32_t idx = 0; ++ ++ seqDef* sequencespoint = ctxBody.seqStore.sequencesStart; ++ BYTE* litpoint = ctxBody.seqStore.litStart; ++ size_t lit_size = ctxBody.seqStore.maxNbLit; ++ while (sequenceCount < ctxBody.seqnum) { ++ offBase = sequencespoint->offBase + 1; ++ litLength = sequencespoint->litLength; ++ mlBase = sequencespoint->mlBase; ++ sequenceCount++; // sequence计数 ++ sequencespoint++; ++ ++ if (mlBase == 0) { ++ tempLiteralLength += litLength + mlBase + 3; ++ continue; ++ } ++ ++ mlBase -= 1; ++ litLength += tempLiteralLength; ++ tempLiteralLength = 0; ++ ++ // printf("litLength:%d, mlBase:%d, literal: %.*s, match: %.*s \n", litLength, (int)mlBase, litLength, ip, mlBase+4, ip+litLength); ++ ++ // Literal部分编码 ++ if (litLength) { ++ op = EmitLiteral(op, ip, litLength); ++ ip += litLength; ++ } ++ ++ // // Copy部分编码 ++ if (mlBase < 8) { ++ op = EmitCopy(op, offBase, mlBase + 4); ++ } else { ++ op = EmitCopy(op, offBase, mlBase + 4); ++ } ++ ip += mlBase + 4; ++ if (SNAPPY_PREDICT_FALSE(ip >= input + input_size - kInputMarginBytes)) { ++ break; ++ } ++ } ++ //释放硬件资源 ++ // ctxBody.seqStore.lit = ctxBody.seqStore.litStart = NULL; ++ // ctxBody.seqStore.sequences = ctxBody.seqStore.sequencesStart = NULL; ++ // kaelz4_release(&ctxBody); ++ } ++ ++ //处理尾部数据 ++ if (ip < ip_end) { ++ op = EmitLiteral(op, ip, ip_end - ip); ++ // printf("last litLength:%d,last literal: %.*s \n", ip_end - ip, ip_end - ip, ip); ++ } ++ ++ return op; ++ ++_softVersion: ++#endif + if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { + const char* ip_limit = input + input_size - kInputMarginBytes; + +@@ -1616,7 +1705,45 @@ + writer->Append(ulength, p - ulength); + written += (p - ulength); + ++// std::string hex_str; ++// for (char* ptr = ulength; ptr < p; ++ptr) { ++// char buffer[3]; ++// sprintf(buffer, "%02X", static_cast(*ptr)); ++// hex_str += buffer; ++// } ++ ++// // 打印十六进制结果 ++// printf("Varint编码结果(十六进制): %s\n", hex_str.c_str()); ++ + internal::WorkingMemory wmem(N); ++#ifdef CONF_KAESNAPPY ++ LZ4_CCtx ctxBody; ++ LZ4_CCtx* cctx = &ctxBody; ++ ctxBody.kaeInited = 0; ++ ctxBody.kaeFrameMode = 1; // 相当于每个都强刷 ++ ctxBody.kaeConfig = NULL; ++ ++ ctxBody.seqStore.llCode = NULL; ++ ctxBody.seqStore.mlCode = NULL; ++ ctxBody.seqStore.ofCode = NULL; ++ ctxBody.seqStore.lit = NULL; ++ ctxBody.seqStore.litStart = NULL; ++ ctxBody.seqStore.sequencesStart = NULL; ++ ctxBody.seqStore.sequences = NULL; ++ ++ ctxBody.compressionLevel = 8; ++ ctxBody.kaeLevel = 8; ++ ctxBody.seqnum = 0; ++ ++ int ret; ++ ret = kaelz4_init(&ctxBody); ++ if (ret == 0) { ++ ctxBody.kaeInited = 1; ++ } else { ++ printf("Snappy kealz4_init failed\n"); ++ cctx = NULL; ++ } ++#endif + + while (N > 0) { + // Get next block to compress (without copying if possible) +@@ -1652,7 +1779,6 @@ + // Get encoding table for compression + int table_size; + uint16_t* table = wmem.GetHashTable(num_to_read, &table_size); +- + // Compress input_fragment and append to dest + const int max_output = MaxCompressedLength(num_to_read); + +@@ -1664,14 +1790,18 @@ + // scratch_output[] region is big enough for this iteration. + char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); + char* end = internal::CompressFragment(fragment, fragment_size, dest, table, +- table_size); ++ table_size, cctx); + writer->Append(dest, end - dest); + written += (end - dest); + + N -= num_to_read; + reader->Skip(pending_advance); + } +- ++#ifdef CONF_KAESNAPPY ++ ctxBody.seqStore.lit = ctxBody.seqStore.litStart = NULL; ++ ctxBody.seqStore.sequences = ctxBody.seqStore.sequencesStart = NULL; ++ kaelz4_release(&ctxBody); ++#endif + Report("snappy_compress", written, uncompressed_size); + + return written; +diff -ruN snappy-1.1.10/snappy_unittest.cc snappy-1.1.10-KAE/snappy_unittest.cc +--- snappy-1.1.10/snappy_unittest.cc 2023-03-09 07:44:00.000000000 +0800 ++++ snappy-1.1.10-KAE/snappy_unittest.cc 2025-08-30 18:09:22.623451300 +0800 +@@ -225,7 +225,7 @@ + compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size())); + char* dest = string_as_array(&compressed) + prefix.size(); + char* end = snappy::internal::CompressFragment(input.data(), input.size(), +- dest, table, table_size); ++ dest, table, table_size, NULL); + compressed.resize(end - compressed.data()); + + // Uncompress into std::string