From 5d475bfaa288d98caf44bcbf197b594804fc136b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 23 Jul 2025 08:12:21 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E4=BD=BF=E7=94=A8Gzip=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E4=B8=8B=E5=8F=91=E6=95=B0=E6=8D=AE,=20=E5=A4=8D?= =?UTF-8?q?=E7=94=A8=E7=A1=AC=E4=BB=B6CRC=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAEZlib/src/v1/kaezip_async_comp.c | 54 +++++++++++++++++++++++++++++- KAEZlib/src/v1/kaezip_async_comp.h | 1 + KAEZlib/src/v1/kaezip_init.c | 7 +++- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/KAEZlib/src/v1/kaezip_async_comp.c b/KAEZlib/src/v1/kaezip_async_comp.c index 38f326e..17850bd 100644 --- a/KAEZlib/src/v1/kaezip_async_comp.c +++ b/KAEZlib/src/v1/kaezip_async_comp.c @@ -199,7 +199,7 @@ static void kaezip_compress_async_callback(struct kaezip_compress_ctx *compress_ struct kaezip_result *result = compress_ctx->result; result->status = status; result->dst_len = compress_ctx->dst_len; - if (result->ibuf_crc != NULL && status == KAE_ZLIB_SUCC) { + if (result->ibuf_crc != NULL && status == KAE_ZLIB_SUCC && compress_ctx->ibuf_checksum_flag != 1) { for (int i = 0; i < compress_ctx->src->buf_num; i++) { *result->ibuf_crc = KAEZIPCRC32(*result->ibuf_crc, compress_ctx->src->buf[i].data, compress_ctx->src->buf[i].buf_len); @@ -634,6 +634,48 @@ static void kaezip_async_compress_process(struct kaezip_async_ctrl *ctrl, void * return; } +static uint32_t extract_checksum(struct kaezip_async_req *req, unsigned int output_len) +{ + if (req == NULL || output_len < 8) { + return 0; + } + + struct kaezip_buffer_list *dst = req->compress_ctx->dst; + size_t start_pos = output_len - 8; + size_t current_pos = 0; + size_t bytes_copied = 0; + unsigned char result[4]; // CRC32 checksum + + for (int i = 0; i < dst->buf_num && bytes_copied < 4; i++) { + // skip the buf without checksum data + if (current_pos + dst->buf[i].buf_len <= start_pos) { + current_pos += dst->buf[i].buf_len; + continue; + } + // get the valid starting position of the current buffer + size_t offset_in_buf = start_pos - current_pos; + // get bytes can be copied in current buf + size_t bytes_available = dst->buf[i].buf_len - offset_in_buf; + size_t bytes_needed = 4 - bytes_copied; + size_t bytes_to_copy = (bytes_available < bytes_needed) ? + bytes_available : bytes_needed; + // read crc32 data from dst buf + for (size_t j = 0; j < bytes_to_copy; j++) { + result[bytes_copied++] = ((unsigned char *)dst->buf[i].data)[offset_in_buf + j]; + } + current_pos += dst->buf[i].buf_len; + } + if (bytes_copied != 4) { + return 0; + } + // convert the 4 bytes to CRC32 checksum + uint32_t checksum = ((uint32_t)result[3] << 24) | + ((uint32_t)result[2] << 16) | + ((uint32_t)result[1] << 8) | + (uint32_t)result[0]; + return checksum; +} + static int kaezip_async_block_padding(struct kaezip_async_req *req, const struct wd_buf_list *source, void *dst_tmp, struct kaezip_priv_save_info *save_info) { @@ -641,6 +683,15 @@ static int kaezip_async_block_padding(struct kaezip_async_req *req, const struct struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; unsigned int output_len = op_data->produced; + if (req->kz_ctx[0].comp_type == WCRYPTO_DEFLATE) { + // extract checksum from dst buffer + if (req->compress_ctx->result->ibuf_crc != NULL) { + *req->compress_ctx->result->ibuf_crc = extract_checksum(req, output_len); + req->compress_ctx->ibuf_checksum_flag = 1; + } + // remove checksum (4 Bytes) and isize (4 Bytes) in dst buffer + output_len -= 8; + } return output_len; } @@ -669,6 +720,7 @@ int kaezip_compress_async(struct kaezip_async_ctrl *ctrl, const struct kaezip_bu compress_ctx->save_info.prev_last_lit_ptr = NULL; compress_ctx->save_info.prev_last_lit_len = 0; compress_ctx->save_info.src = src; + compress_ctx->ibuf_checksum_flag = 0; if (ctrl->ctx_head) { ctrl->tail->next = compress_ctx; diff --git a/KAEZlib/src/v1/kaezip_async_comp.h b/KAEZlib/src/v1/kaezip_async_comp.h index 29f5af0..f40959b 100644 --- a/KAEZlib/src/v1/kaezip_async_comp.h +++ b/KAEZlib/src/v1/kaezip_async_comp.h @@ -75,6 +75,7 @@ struct kaezip_compress_ctx { struct kaezip_async_req req; struct kaezip_compress_ctx *next; int status; + int ibuf_checksum_flag; // identify the checksum of inbuf is calculated or not }; struct kaezip_seq_result { diff --git a/KAEZlib/src/v1/kaezip_init.c b/KAEZlib/src/v1/kaezip_init.c index 7c4f6e8..6d848db 100644 --- a/KAEZlib/src/v1/kaezip_init.c +++ b/KAEZlib/src/v1/kaezip_init.c @@ -12,7 +12,12 @@ void *kaezip_init_v1(int win_size, int is_sgl, int comp_type) { - kaezip_ctx_t *kaezip_ctx = kaezip_get_ctx(WCRYPTO_RAW_DEFLATE, comp_type, win_size, is_sgl); + kaezip_ctx_t *kaezip_ctx = NULL; + if (comp_type == WCRYPTO_DEFLATE) { + kaezip_ctx = kaezip_get_ctx(WCRYPTO_GZIP, comp_type, win_size, is_sgl); + } else { + kaezip_ctx = kaezip_get_ctx(WCRYPTO_RAW_DEFLATE, comp_type, win_size, is_sgl); + } if (!kaezip_ctx) { US_ERR("kaezlib failed to get kaezip ctx!"); return NULL; -- Gitee From 92c20ba143b765c643d2a20e430901ca3ea7ab4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 23 Jul 2025 08:22:19 +0000 Subject: [PATCH 2/2] =?UTF-8?q?test:=20=E9=92=88=E5=AF=B9=20-s=20=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E5=A4=A7=E4=BA=8E8M=E7=9A=84=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E5=9C=BA=E6=99=AF=EF=BC=8C=E8=87=AA=E5=8A=A8=E8=AE=A1=E7=AE=97?= =?UTF-8?q?src=5Fbuf=5Fnum=EF=BC=8C=E4=BF=9D=E8=AF=81=E5=BC=82=E6=AD=A5?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E5=8F=82=E6=95=B0=E6=A0=A1=E9=AA=8C=E9=80=9A?= =?UTF-8?q?=E8=BF=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- .../kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c | 2 +- KAELz4/test/kzip/main.c | 42 ++++++++++++------- KAELz4/test/kzip/runFunc.sh | 7 ++-- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c index f5d6b13..4dc0331 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c @@ -16,7 +16,7 @@ static int lz4async_block_decompress(void *sess, const struct kaelz4_buffer_list } static int lz4_bound(int src_len) { - return LZ4F_compressFrameBound(src_len, NULL); + return LZ4F_compressFrameBound(src_len, NULL) * 1.2; } // LZ4 初始化 static int lz4_async_block_init(struct compress_ctx *ctx) { diff --git a/KAELz4/test/kzip/main.c b/KAELz4/test/kzip/main.c index c0defd8..6fc048e 100644 --- a/KAELz4/test/kzip/main.c +++ b/KAELz4/test/kzip/main.c @@ -451,7 +451,7 @@ static void compress_async_callback(struct kaelz4_result *result) const char *alg_name = param->ctx->algorithm->name; if(strcmp(alg_name, "kaelz4async_lz77_frame") == 0) { if (KAELZ4_rebuild_lz77_to_frame(¶m->src, ¶m->tuple, ¶m->dst, result, NULL) != 0) { - printf("[user]KAELZ4_rebuild_lz77_to_block : %d\n", result->status); + printf("[user]KAELZ4_rebuild_lz77_to_frame : %d\n", result->status); } } else { if (KAELZ4_rebuild_lz77_to_block(¶m->src, ¶m->tuple, ¶m->dst, result) != 0) { @@ -527,15 +527,6 @@ static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompre ctx->with_crc = is_test_crc; ctx->src_buf_num = 1; ctx->usr_map = NULL; - if (g_file_chunk_size && ((size_t)g_file_chunk_size * 1024) <= HPAGE_SIZE && ((ctx->algorithm->async_compress != NULL && ctx->compress_or_decompress != 0) - || (ctx->algorithm->async_decompress != NULL && !ctx->compress_or_decompress))) { - // 此处src_buf_num可修改为其他值,用于测试多个链表节点的功能和性能。 - ctx->src_buf_num = 4; - // 分片为4k的模式下,使用单个buf节点性能最优,比4个节点的情况性能提升约4%。 - if(g_file_chunk_size == 4) { - ctx->src_buf_num = 1; - } - } ctx->all_delays = (uint64_t *)malloc(sizeof(uint64_t) * MAX_LATENCY_COUNT); memset(ctx->param_buf, 0, ctx->inflight_num * sizeof(struct compress_param)); @@ -545,8 +536,9 @@ static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompre int is_test_lz77_block = strcmp(algorithm->name, "kaelz4async_lz77") == 0; int is_test_lz77_frame = strcmp(algorithm->name, "kaelz4async_lz77_frame") == 0; if ((is_test_lz77_block || is_test_lz77_frame) && ctx->compress_or_decompress != 0) { - if (g_file_chunk_size == 0 || (size_t)g_file_chunk_size * 1024 > HPAGE_SIZE || (size_t)g_file_chunk_size * 1024 >= HW_MAX_SGE_LEN / 2) { + if (g_file_chunk_size == 0 || (size_t)g_file_chunk_size * 1024 > HPAGE_SIZE || (size_t)g_file_chunk_size * 1024 >= HW_MAX_SGE_LEN / 4) { // TBM: 当前chunk_size超过2M kzip不支持lz77模式,因为大页内存不连续 + // 对于原始输入内容较大的 silesia.tar文件,需要严格限制2M,否则连续空间分配会失败。对于较小的数据集chunk_size可超过2M。 ctx->algorithm = get_algorithm("kaelz4async_block"); if (is_test_lz77_frame) { ctx->algorithm = get_algorithm("kaelz4async_frame"); @@ -671,6 +663,22 @@ static uLong get_src_content(struct compress_ctx *ctx, const char* in_filename, } return src_len; } +static void check_and_reset_src_buf_num(struct compress_ctx *ctx) { + int is_asyc_compress = ctx->algorithm->async_compress != NULL && ctx->compress_or_decompress != 0; + int is_asyc_decompress = ctx->algorithm->async_decompress != NULL && !ctx->compress_or_decompress; + int has_async_test = is_asyc_compress || is_asyc_decompress; + if (((size_t)g_file_chunk_size * 1024) <= HPAGE_SIZE && has_async_test) { + // 分片为4k的模式下,使用单个buf节点性能最优,比4个节点的情况性能提升约4%。 + if(g_file_chunk_size == 4) { + ctx->src_buf_num = 1; + } else if ((size_t)(g_file_chunk_size*1024) > HW_MAX_SGE_LEN) { // 如果分片大小超过8M,那么不能一次性传下去 + ctx->src_buf_num = g_file_chunk_size*1024 / HW_MAX_SGE_LEN + 1; // 分片输入较大时,对分片进行8M的切分组装sgl。 + } else { + // 此处src_buf_num可修改为其他值,用于测试将一段src组装为多个链表sge节点的功能和性能。 + ctx->src_buf_num = 4; + } + } +} #define PRINT_DELAY_DATA_LEN 6 static void printf_perf_data(struct compress_ctx *ctx, struct timeval start, struct timeval stop, uLong src_len, @@ -804,7 +812,7 @@ void comp_and_decomp_fill_dst_buf(struct compress_param *param, size_t dst_len, if ((ctx->is_lz77_mode && ctx->compress_or_decompress) || ctx->is_zlib) { if (ctx->is_lz77_mode) { - dst_len = KAELZ4_compress_get_tuple_buf_len(dst_len); + dst_len = KAELZ4_compress_get_tuple_buf_len(param->src_len); } unsigned int tmp_size = MIN(dst_len, HW_MAX_SGE_LEN); // HW_MAX_SGE_LEN: hisi_zip约束sge len不超过8M comp_and_decomp_fill_buffer_list(¶m->tuple, tmp_size, dst_len, ctx->tuple_buf, ctx->tuple_buf_offset); @@ -916,6 +924,7 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c { void *inbuf = NULL; uLong src_len = get_src_content(ctx, in_filename, &inbuf); + check_and_reset_src_buf_num(ctx); // fprintf(stdout, "input_size is %luB\n", src_len); ctx->src_buf = inbuf; ctx->src_len = src_len; @@ -1010,6 +1019,9 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c int huge_page_num = (int)(src_len * sizeof(Bytef) / HPAGE_SIZE) + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 size_t total_size = huge_page_num * HPAGE_SIZE; release_huge_pages(inbuf, total_size); + if(ctx->tuple_buf != NULL) { + release_huge_pages(ctx->tuple_buf, ctx->tuple_buf_len); + } } else { free(inbuf); } @@ -1044,6 +1056,7 @@ static int start_work_decompress( { void *inbuf = NULL; uLong src_len = get_src_content(ctx, in_filename, &inbuf); + check_and_reset_src_buf_num(ctx); // 从文件读取元数据 struct fragment_metadata *loaded_fragments = NULL; @@ -1093,7 +1106,6 @@ static int start_work_decompress( gettimeofday(&start, NULL); size_t out_offset = 0; // 总内存中的偏移,每一小块儿使用不同的偏移。 - // 解压仅1次 for (j = 0; j < ctx->loop_times; j++) { if (j > 0) { // 为第1次之后的循环的产物复用空间 out_offset = output_sz; @@ -1172,6 +1184,9 @@ static int start_work_decompress( int huge_page_num = (int)(src_len * sizeof(Bytef) / HPAGE_SIZE) + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 size_t total_size = huge_page_num * HPAGE_SIZE; release_huge_pages(inbuf, total_size); + if(ctx->tuple_buf != NULL) { + release_huge_pages(ctx->tuple_buf, ctx->tuple_buf_len); + } } else { free(inbuf); } @@ -1202,7 +1217,6 @@ static void auto_get_parent_cpu_affinity(int *arr, int *count) perror("sched_getaffinity"); return; } - // 打印父进程的亲和性 for (int i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, &parent_affinity)) { arr[(*count)++] = i; diff --git a/KAELz4/test/kzip/runFunc.sh b/KAELz4/test/kzip/runFunc.sh index 529c796..5d2e9d6 100644 --- a/KAELz4/test/kzip/runFunc.sh +++ b/KAELz4/test/kzip/runFunc.sh @@ -5,10 +5,10 @@ export KAE_LZ4_COMP_TYPE=8 sh build.sh kaelz4 -Algthm=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77" "kaezlib_deflate" "kaezlibasync_deflate") +Algthm=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77" "kaelz4async_lz77_frame" "kaezlib_deflate" "kaezlibasync_deflate") Datasets=("calgary" "itemdata" "dickens" "mozilla" "mr" "nci" "ooffice" "osdb" "reymont" "samba" "sao" "webster" "xml" "x-ray") Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") -BlockSize=("4" "8" "16" "60" "64" "68" "128" "512" "1024" "2090" "8191" "8192" "8193" "10244") +BlockSize=("4" "8" "16" "60" "64" "68" "128" "512" "1024" "2090" "8191" "8192" "8193" "10244" "0") Polling=("1" "0") @@ -52,10 +52,11 @@ for da in "${Datasets[@]}"; do rm -rf $testFileComped.meta rm -rf $testFileOrigin.meta ./kzip -A $alg -m 1 -f $testFile -o $testFileComped -n 2 -s $bs -i 256 -p $polling >> $LogFile # 压缩测试 - ./kzip -d -A $alg -m 1 -f $testFileComped -o $testFileOrigin -n 2 -s $bs -i 256 >> $LogFile # 压缩测试 + ./kzip -d -A $alg -m 1 -f $testFileComped -o $testFileOrigin -n 2 -s $bs -i 256 -p $polling >> $LogFile # 压缩测试 diffFile $testFile $testFileOrigin done done + sleep 1 done done -- Gitee