From 2f33c66bd94502f105ee3f6fbf633803b47e2754 Mon Sep 17 00:00:00 2001 From: shen-chenyang1 Date: Mon, 1 Sep 2025 10:51:30 +0800 Subject: [PATCH 1/2] add: perftest/kzip in /script --- scripts/perftest/kzip/Makefile | 66 + scripts/perftest/kzip/README.md | 156 ++ scripts/perftest/kzip/alg/common/manage.c | 62 + scripts/perftest/kzip/alg/common/manage.h | 74 + scripts/perftest/kzip/alg/kaelz4/lz4.c | 49 + scripts/perftest/kzip/alg/kaelz4/lz4Async.h | 19 + .../perftest/kzip/alg/kaelz4/lz4AsyncBlock.c | 235 +++ .../perftest/kzip/alg/kaelz4/lz4AsyncFrame.c | 72 + .../perftest/kzip/alg/kaelz4/lz4AsyncLz77.c | 51 + .../kzip/alg/kaelz4/lz4AsyncLz77Frame.c | 51 + scripts/perftest/kzip/alg/kaelz4/lz4Frame.c | 70 + scripts/perftest/kzip/alg/kaezlib/deflate.c | 80 + .../perftest/kzip/alg/kaezlib/deflateAsync.c | 225 +++ scripts/perftest/kzip/alg/kaezlib/zlib.c | 54 + scripts/perftest/kzip/alg/kaezlib/zlibAsync.h | 15 + scripts/perftest/kzip/build.sh | 21 + scripts/perftest/kzip/framework/datagen.c | 189 +++ scripts/perftest/kzip/framework/datagen.h | 40 + scripts/perftest/kzip/framework/delayRecord.c | 89 ++ scripts/perftest/kzip/framework/hugepage.c | 134 ++ scripts/perftest/kzip/framework/main.c | 1304 +++++++++++++++++ scripts/perftest/kzip/include/compress_ctx.h | 155 ++ scripts/perftest/kzip/include/delayRecord.h | 17 + scripts/perftest/kzip/include/platform.h | 155 ++ scripts/perftest/kzip/include/util.h | 697 +++++++++ scripts/perftest/kzip/kzelz4.cnf | 2 + .../perftest/kzip/scripts/parse_perf_log.py | 171 +++ .../kzip/scripts/parse_perf_log_by_path.py | 56 + .../perftest/kzip/scripts/runAffinityFunc.sh | 79 + scripts/perftest/kzip/scripts/runDelay.sh | 187 +++ scripts/perftest/kzip/scripts/runError.sh | 113 ++ scripts/perftest/kzip/scripts/runFunc.sh | 71 + scripts/perftest/kzip/scripts/runLogfunc.sh | 57 + scripts/perftest/kzip/scripts/runPerf.sh | 132 ++ 34 files changed, 4948 insertions(+) create mode 100644 scripts/perftest/kzip/Makefile create mode 100644 scripts/perftest/kzip/README.md create mode 100644 scripts/perftest/kzip/alg/common/manage.c create mode 100644 scripts/perftest/kzip/alg/common/manage.h create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4.c create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4Async.h create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4AsyncFrame.c create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77.c create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77Frame.c create mode 100644 scripts/perftest/kzip/alg/kaelz4/lz4Frame.c create mode 100644 scripts/perftest/kzip/alg/kaezlib/deflate.c create mode 100644 scripts/perftest/kzip/alg/kaezlib/deflateAsync.c create mode 100644 scripts/perftest/kzip/alg/kaezlib/zlib.c create mode 100644 scripts/perftest/kzip/alg/kaezlib/zlibAsync.h create mode 100644 scripts/perftest/kzip/build.sh create mode 100644 scripts/perftest/kzip/framework/datagen.c create mode 100644 scripts/perftest/kzip/framework/datagen.h create mode 100644 scripts/perftest/kzip/framework/delayRecord.c create mode 100644 scripts/perftest/kzip/framework/hugepage.c create mode 100644 scripts/perftest/kzip/framework/main.c create mode 100644 scripts/perftest/kzip/include/compress_ctx.h create mode 100644 scripts/perftest/kzip/include/delayRecord.h create mode 100644 scripts/perftest/kzip/include/platform.h create mode 100644 scripts/perftest/kzip/include/util.h create mode 100644 scripts/perftest/kzip/kzelz4.cnf create mode 100644 scripts/perftest/kzip/scripts/parse_perf_log.py create mode 100644 scripts/perftest/kzip/scripts/parse_perf_log_by_path.py create mode 100644 scripts/perftest/kzip/scripts/runAffinityFunc.sh create mode 100644 scripts/perftest/kzip/scripts/runDelay.sh create mode 100644 scripts/perftest/kzip/scripts/runError.sh create mode 100644 scripts/perftest/kzip/scripts/runFunc.sh create mode 100644 scripts/perftest/kzip/scripts/runLogfunc.sh create mode 100644 scripts/perftest/kzip/scripts/runPerf.sh diff --git a/scripts/perftest/kzip/Makefile b/scripts/perftest/kzip/Makefile new file mode 100644 index 0000000..b01e269 --- /dev/null +++ b/scripts/perftest/kzip/Makefile @@ -0,0 +1,66 @@ +# 编译器 +CC = gcc +CFLAGS = -g -O3 -fstack-protector-all -Wall -Werror +LDFLAGS = -lz -lnuma -lrt -lpthread + +# 目标可执行文件 +TARGET = kzip + +INCLD = -I alg/common -I include + +# 主程序源文件 +MAIN_SRC = $(wildcard framework/*.c) +MAIN_SRC += $(wildcard alg/common/*.c) +#MAIN_SRC += $(wildcard scene_test_functions/*.c) + +# 模块选择(默认编译所有模块) +MODULES ?= $(notdir $(wildcard alg/kae)) +MODULES_ALL ?= $(notdir $(wildcard alg/*)) + +MODULE_DEFINES := $(foreach mod,$(MODULES),-DCONFIG_$(shell echo $(mod) | tr '[:lower:]' '[:upper:]')) +CFLAGS += $(MODULE_DEFINES) +CFLAGS += $(INCLD) + +# 定义每个模块需要的链接库 +MODULE_LIBS_kaelz4 := -L/usr/local/kaelz4/lib -llz4 -lkaelz4 +MODULE_LIBS_kaezlib := -L/usr/local/kaezip/lib -lkaezip + +# 收集所有模块的链接库 +MODULE_LDLIBS := $(foreach mod,$(MODULES),$(MODULE_LIBS_$(mod))) + +# 添加到全局链接选项 +LDFLAGS += $(MODULE_LDLIBS) + +# 递归编译模块(子 Makefile 负责生成 OBJS) +MAIN_SRC += $(foreach mod,$(MODULES), $(wildcard alg/$(mod)/*.c)) +MAIN_OBJ = $(MAIN_SRC:.c=.o) + +ALL_SRC += $(foreach mod,$(MODULES_ALL), $(wildcard alg/$(mod)/*.c)) +ALL_OBJS = $(ALL_SRC:.c=.o) + +# 最终所有目标文件 +OBJS = $(MAIN_OBJ) + +# 默认目标 +all: $(TARGET) + +# 链接生成可执行文件 +$(TARGET): $(OBJS) + $(CC) -o $@ $^ $(LDFLAGS) + +# 主程序编译 +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +# 清理(递归清理所有模块) +clean: + rm -f $(OBJS) $(TARGET) $(ALL_OBJS) + +# 打印帮助信息 +help: + @echo "Usage:" + @echo " make # 编译所有模块" + @echo " make MODULES=\"module1 module2\" # 选择性编译模块" + @echo " make clean # 清理所有生成的文件" + +.PHONY: all clean help FORCE diff --git a/scripts/perftest/kzip/README.md b/scripts/perftest/kzip/README.md new file mode 100644 index 0000000..361f799 --- /dev/null +++ b/scripts/perftest/kzip/README.md @@ -0,0 +1,156 @@ + +# 压缩性能测试小工具 kzip + +## 安装 +1、安装依赖 +``` +cd KAE +# 安装 frame 相关头文件 +yum install lz4-devel +# 覆盖安装本次新增异步接口相关头文件 +sh build.sh uadk +sh build.sh lz4 +sh build.sh zlib +``` +2、打包 kzip +``` +# 在测试目录中 +sh build.sh +``` + +## 参数说明 +所有参数均可选 + +- -A 算法类型 +``` +kaelz4: 同步lz4 block格式压缩 +kaelz4_frame: 同步lz4 frame格式压缩 +kaelz4async_block: 异步lz4 block格式压缩 +kaelz4async_frame: 异步lz4 frame格式压缩 +kaelz4async_lz77: 异步lz4 原始lz77_raw格式压缩 +kaezlib_deflate: 同步zlib deflate_raw格式压缩 +kaezlibasync_deflate: 异步zlib deflate_raw格式压缩 +``` +- -d 处理压缩任务或解压任务 +default:null 默认压缩任务。 + +- -m 并发进程数量 +默认值1,表示仅一个主进程,对应单并发场景。大于1时,使用fork()复制进程进行测试。异步测试时推荐并发1。 + +- -t 并发线程数量 +默认1,不使用pthread_create()创建更多子线程。大于1时-m参数失效。 + +- -i 客户端流量控制,inflight num。 +异步压缩时,同一时间依次下发压缩任务的数量。默认256,最大1024。 + +- -g 是否展示时延数据 +默认1展示。 + +- -f 待处理的文件路径 +需要是存在的文件,将读取文件的内容进行压缩 + +- -o 处理的结果 +如果存在,则将压缩或解压的结果保存到该路径 + +- -s 输入分片大小 +对输入数据的分片处理。单位KB。默认0不分片。解压时无效 + +- -n 测试循环次数 +默认1000 + +- -P 大页配置 +是否使用大页存储待压缩数据。默认0不使用 + +- -p poll模式配置 +是否开启poll模式进行压缩。默认0不开启。 + +- -r crc32校验处理 +是否携带crc32校验值。默认0 不携带。 + + +## 使用限制 +1、异步接口硬件环境限制:kunpeng 920 7280z +2、最大性能测试时需要开启fast,详见 KAELZ4/README.md 《KAELz4 异步压缩接口用户使用指南》 +3、KAE 加速器与 NUMA 节点存在绑定关系。将进程绑定至特定 NUMA 节点后,该进程即可使用该节点对应的 KAE 硬件加速器。 +4、不支持SGL模式分段buffer切软算。 +5、kzip工具通过使用大页内存获取真实的物理地址,测试SGL模式的时候,要先申请大页内存。推荐参考如下命令: +``` +sysctl vm.nr_hugepages=10000 +echo 10 | tee /sys/devices/system/node/node0/hugepages/hugepages-1048576kB/nr_hugepages +``` + +## 测试命令 + +```shell +# 不同数据集下接口功能测试 +sh runFunc.sh +``` + +polling模式lz77_raw格式转换为frame格式压缩接口测试 +```shell +# 1、单IO时延数据 +sh runPerf.sh -A kaelz4async_lz77_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 1 -p 1 -f [path to calgary.tar] +``` + +polling模式frame格式压缩接口测试: +```shell +# 1、单IO时延数据:等价串行流程,结果表示单个IO的压缩时延。 +sh runPerf.sh -A kaelz4async_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 1 -p 1 -f [path to calgary.tar] +# 2、单核压缩能力:单线程加压,结果表示单线程能提供的压缩带宽与时延。 +sh runPerf.sh -A kaelz4async_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 4 -p 1 -f [path to calgary.tar] +``` + +非polling模式frame格式压缩接口测试: +```shell +# 1、单IO时延测试:等价串行流程,结果表示单个IO的压缩时延。 +export KAE_LZ4_ASYNC_THREAD_NUM=1 +sh runPerf.sh -A kaelz4async_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 1 -p 0 -f [path to calgary.tar] + +# 2、单核压缩能力测试:单线程加压,结果表示单线程能够提供的压缩带宽与时延。 +export KAE_LZ4_ASYNC_THREAD_NUM=1 +sh runPerf.sh -A kaelz4async_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 4 -p 0 -f [path to calgary.tar] + +# 3、单KAE能力:多线程加压,结果表示满足5G@4K的压缩带宽前提的时延。 +export KAE_LZ4_ASYNC_THREAD_NUM=5 # 可选5或6 +sh runPerf.sh -A kaelz4async_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 16 -p 0 -f [path to calgary.tar] + +#4、单KAE最大能力:多线程满压,结果表示单KAE能够提供的最大压缩带宽。 +export KAE_LZ4_ASYNC_THREAD_NUM=8 +sh runPerf.sh -A kaelz4async_frame -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 64 -p 0 -f [path to calgary.tar] +``` + + +zlib下deflate_raw格式异步压缩接口测试: +```shell +# 1、单IO时延测试:等价串行流程,结果表示单个IO的压缩时延。 +sh runPerf.sh -A kaezlibasync_deflate -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 1 -p 1 -f [path to calgary.tar] + +# 2、单核压缩能力测试:单线程加压,结果表示单线程能够提供的压缩带宽与时延。 +sh runPerf.sh -A kaezlibasync_deflate -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 4 -p 1 -f [path to calgary.tar] + +# 3、单KAE能力测试:单线程继续加压,结果表示单个KAE能够提供的压缩带宽与时延。 +sh runPerf.sh -A kaezlibasync_deflate -m 1 -n 20000 -s [4/8/16/32/64] -r 1 -k 1 -i 8 -p 1 -f [path to calgary.tar] +``` + +zlib下deflate_raw格式单个进程同时使用多个KAE的测试: +```shell +# 设置环境变量,以使用多个KAE。 +# 注意:跨numa使用KAE会影响性能。达到最优性能推荐当前进程使用自身所处CPU节点对应的numa上的KAE。 +# 以下测试命令将进程绑定到numa0上,并同时使用numa0和numa1对应的KAE。 +export KAE_ZIP_QUEUE_NODES_MASK=3 # 使用NUMA 0,1 +# 双KAE压缩解压能力测试 +sh runPerf.sh -A kaezlibasync_deflate -m 1 -n 20000 -s [4/8/16/32/64] -k 1 -i 64 -p 1 -e 2 -f [path to calgary.tar] + +# 环境变量 KAE_ZIP_QUEUE_NODES_MASK 的使用说明: +# export KAE_ZIP_QUEUE_NODES_MASK=15 # 十进制15 → 二进制 1111 → 使用NUMA 0,1,2,3 +# export KAE_ZIP_QUEUE_NODES_MASK=12 # 十进制12 → 二进制 0011 → 使用NUMA 2,3 +# export KAE_ZIP_QUEUE_NODES_MASK=11 # 十进制11 → 二进制 1011 → 使用NUMA 0,1,3 +# export KAE_ZIP_QUEUE_NODES_MASK=7 # 十进制7 → 二进制 0111 → 使用NUMA 0,1,2 +# export KAE_ZIP_QUEUE_NODES_MASK=5 # 十进制5 → 二进制 0101 → 使用NUMA 0,3 +``` + +``` +# 单一场景接口组合使用demo测试 +export LD_LIBRARY_PATH=/usr/local/kaezip/lib/:/usr/local/kaelz4/lib/:$LD_LIBRARY_PATH +./kzip -T 1 +``` \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/common/manage.c b/scripts/perftest/kzip/alg/common/manage.c new file mode 100644 index 0000000..2b74695 --- /dev/null +++ b/scripts/perftest/kzip/alg/common/manage.c @@ -0,0 +1,62 @@ + +#include "manage.h" + +#include +#include +#include + + +#define MAX_ALGORITHMS 10 // 支持最多 10 种算法 + +static compression_algorithm_t *algorithm_list[MAX_ALGORITHMS]; +static int algorithm_count = 0; + +// 注册算法 +void register_algorithm(compression_algorithm_t *algorithm) { + if (algorithm_count < MAX_ALGORITHMS) { + algorithm_list[algorithm_count++] = algorithm; + } else { + fprintf(stderr, "Error: Too many compression algorithms registered.\n"); + } +} + +// 查找算法 +compression_algorithm_t *get_algorithm(const char *name) +{ + for (int i = 0; i < algorithm_count; i++) { + if (strcmp(algorithm_list[i]->name, name) == 0) { + return algorithm_list[i]; + } + } + return NULL; +} +int vaild_algorithm(const char *name) +{ + for (int i = 0; i < algorithm_count; i++) { + if (strcmp(algorithm_list[i]->name, name) == 0) { + return 0; + break; + } + } + printf("Error: Invalid compression algorithm: %s\n", name); + return -1; +} + +// 初始化所有算法 +void initialize_algorithms(void) { +#ifdef CONFIG_KAELZ4 + register_lz4_algorithm(); + register_lz4_frame_algorithm(); + register_lz4async_block_algorithm(); + register_lz4async_frame_algorithm(); + register_lz4async_lz77_algorithm(); + register_lz4async_lz77_frame_algorithm(); +#endif + +#ifdef CONFIG_KAEZLIB + register_zlib_algorithm(); + register_zlib_deflate_algorithm(); + register_zlibasync_block_algorithm(); +#endif + +} \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/common/manage.h b/scripts/perftest/kzip/alg/common/manage.h new file mode 100644 index 0000000..82be9fb --- /dev/null +++ b/scripts/perftest/kzip/alg/common/manage.h @@ -0,0 +1,74 @@ + +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * @Description: contain and manage all support algorithms + * @Author: Ma Xiaofeng + * @Date: 2025-3-31 + * @LastEditTime: 2025-3-31 + */ + + +#ifndef MANAGE_H +#define MANAGE_H +#include +#include + +#define HPAGE_SIZE (1024 * 1024 * 1024) // 1GB大页 +#define HW_MAX_SGE_LEN 0x800000UL + +typedef enum ALG_TYPE { + ALG_KAE_LZ4, + ALG_KAE_ZLIB, + ALG_QAT_LZ4, + ALG_QAT_DEFLATE, +} alg_type_enum; + +struct compress_ctx; +struct compress_param; +struct compress_out_buf; +struct compress_session; + +typedef struct { + enum ALG_TYPE alg_type; + const char *name; + // 同步接口 + int (*init)(struct compress_ctx *ctx); + int (*bound)(int src_len); + // 我们约定:由框架统一读取待处理的数据以及大小。统一申请待存储的空间以及大小。 + // 压缩解压算法需要输出正确的处理后产物,输出正确的 dst_len。 + // 统一返回 0 表示算法OK + // 返回其他表示压缩解压异常 + int (*compress)(struct compress_param *param); + int (*decompress)(struct compress_param *param); + void (*cleanup)(struct compress_ctx *ctx); + void (*prepare_param)(struct compress_ctx *ctx, struct compress_param *param); + void (*prepare_outbuf)(struct compress_ctx *ctx, struct compress_out_buf *out_buf, struct compress_param *param); + void (*poll)(struct compress_session *sess, int budget); // polling 模式下,根据session查询结果的接口 + // 异步接口 + int (*async_compress)(struct compress_session *sess, struct compress_param *param); + + int (*async_decompress)(struct compress_session *sess, struct compress_param *param); +} compression_algorithm_t; + +// 注册算法 +void register_algorithm(compression_algorithm_t *algorithm); + +// 根据名称查找算法 +compression_algorithm_t *get_algorithm(const char *name); + +int vaild_algorithm(const char *name); + +// 初始化所有算法(自动注册) +void initialize_algorithms(void); + +void register_lz4_algorithm(void); +void register_lz4_frame_algorithm(void); +void register_lz4async_block_algorithm(void); +void register_lz4async_frame_algorithm(void); +void register_lz4async_lz77_algorithm(void); +void register_lz4async_lz77_frame_algorithm(void); +void register_zlib_algorithm(void); +void register_zlib_deflate_algorithm(void); +void register_zlibasync_block_algorithm(void); + +#endif diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4.c b/scripts/perftest/kzip/alg/kaelz4/lz4.c new file mode 100644 index 0000000..05248ca --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4.c @@ -0,0 +1,49 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include "lz4Async.h" + +// LZ4 压缩实现 +static int lz4_compress(struct compress_param *param) +{ + int ret = LZ4_compress_default((const char *)param->src_buf + param->src_buf_offset, (char *)param->dst_buf, param->src_len, param->dst_len); + param->dst_len = ret > 0 ? ret : 0; + param->done = 1; + return ret > 0 ? 0 : ret; +} + +// LZ4 解压实现 +static int lz4_decompress(struct compress_param *param) +{ + int ret = LZ4_decompress_safe((const char *)param->src_buf + param->src_buf_offset, (char *)param->dst_buf, param->src_len, param->dst_len); + param->dst_len = ret > 0 ? ret : 0; + param->done = 1; + return ret > 0 ? 0 : ret; +} + +static int lz4_bound(int src_len) { + return LZ4_compressBound(src_len); +} + +// LZ4 初始化 +static int lz4_init(struct compress_ctx *ctx) { + return 0; +} + +// LZ4 算法实例 +compression_algorithm_t lz4_algorithm = { + .name = "kaelz4", + .bound = lz4_bound, + .compress = lz4_compress, + .decompress = lz4_decompress, + .prepare_param = lz4_prepare_param_from_ctx, + .prepare_outbuf = lz4_prepre_out_buf, + .init = lz4_init +}; + +// 注册 LZ4 算法 +void register_lz4_algorithm(void) +{ + register_algorithm(&lz4_algorithm); +} \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4Async.h b/scripts/perftest/kzip/alg/kaelz4/lz4Async.h new file mode 100644 index 0000000..bf3c7c7 --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4Async.h @@ -0,0 +1,19 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * @Description: common functions for algorithms + * @Author: Ma Xiaofeng + * @Date: 2025-7-31 + * @LastEditTime: 2025-7-31 + */ + +#ifndef LZ4_ASYNC_H +#define LZ4_ASYNC_H + +int lz4_async_init(struct compress_ctx *ctx); +void lz4_async_cleanup(struct compress_ctx *ctx); +void lz4_async_polling(struct compress_session *sess, int budget); +void lz4_compress_async_callback(struct kaelz4_result *result); +void lz4_prepare_param_from_ctx(struct compress_ctx *ctx, struct compress_param *params); +void lz4_prepre_out_buf(struct compress_ctx *ctx, struct compress_out_buf *out_buf, struct compress_param *params); + +#endif diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c new file mode 100644 index 0000000..dbb4109 --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c @@ -0,0 +1,235 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include +#include +#include "delayRecord.h" + +extern int g_log_level; + +void lz4_compress_async_callback(struct kaelz4_result *result) +{ + // printf("[user]异步 callback 了!!\n"); + if (unlikely(result->status != 0)) { + printf("[user]回调压缩异常 : %d\n", result->status); + } + struct compress_param *param = (struct compress_param *)result->user_data; + + if (unlikely(param->ctx->is_lz77_mode)) { + const char *alg_name = param->ctx->algorithm->name; + if(strcmp(alg_name, "kaelz4async_lz77_frame") == 0) { + if (KAELZ4_rebuild_lz77_to_frame(¶m->kaelz4_param.src, ¶m->kaelz4_param.tuple, ¶m->kaelz4_param.dst, result, NULL) != 0) { + printf("[user]KAELZ4_rebuild_lz77_to_frame : %d\n", result->status); + } + } else { + if (KAELZ4_rebuild_lz77_to_block(¶m->kaelz4_param.src, ¶m->kaelz4_param.tuple, ¶m->kaelz4_param.dst, result) != 0) { + printf("[user]KAELZ4_rebuild_lz77_to_block : %d\n", result->status); + } + } + } + + param->dst_len = result->dst_len; + if ((!param->ctx->is_polling) && ((param->ctx->algorithm->async_compress != NULL && param->ctx->compress_or_decompress != 0) || + ((param->ctx->algorithm->async_decompress != NULL && param->ctx->compress_or_decompress == 0)))) { + wmb(); + } + + if (g_log_level == 1) { + uint64_t end = get_ns(); + uint64_t timeonce = end - param->start_time; + if(timeonce > 0) { + record_latency(param->ctx->all_delays, timeonce, param->sn); + } + } + + param->done = 1; + return; +} + +static void comp_and_decomp_fill_buffer_list(struct kaelz4_buffer_list *buf_list, size_t sge_len, size_t rem_len, void *start_addr, size_t offset) +{ + size_t tmp_offset = 0; + unsigned int i = 0; + unsigned int tmp_size; + + while (rem_len) { + tmp_size = MIN(sge_len, rem_len); + buf_list->buf[i].data = start_addr + offset + tmp_offset; + if (((offset + tmp_offset) % HPAGE_SIZE) + tmp_size <= HPAGE_SIZE) { + buf_list->buf[i].buf_len = tmp_size; + } else { + buf_list->buf[i].buf_len = HPAGE_SIZE - ((offset + tmp_offset) % HPAGE_SIZE); + } + tmp_offset += buf_list->buf[i].buf_len; + rem_len -= buf_list->buf[i].buf_len; + i++; + buf_list->buf_num = i; + } +} + +static void comp_and_decomp_fill_src_buf(struct compress_param *param) +{ + struct compress_ctx *ctx = param->ctx; + size_t src_len = param->src_len; + void *start_addr = param->src_buf; + size_t offset = param->src_buf_offset; + + kaelz4_param *now_alg_params = ¶m->kaelz4_param; + + now_alg_params->src.buf = now_alg_params->src_buf; + now_alg_params->src.usr_data = ctx->page_info; + unsigned int tmp_size = src_len / ctx->src_buf_num; + comp_and_decomp_fill_buffer_list(&now_alg_params->src, tmp_size, src_len, start_addr, offset); + + param->src_len = src_len; + now_alg_params->result.src_size = src_len; +} + +static void comp_and_decomp_fill_dst_buf(struct compress_param *param) +{ + struct compress_ctx *ctx = param->ctx; + void *start_addr = param->dst_buf; + size_t dst_len = param->dst_len; + + kaelz4_param *now_alg_params = ¶m->kaelz4_param; + + now_alg_params->dst.buf_num = 1; + now_alg_params->dst.buf = now_alg_params->dst_buf; + now_alg_params->dst.buf[0].data = start_addr; + now_alg_params->dst.buf[0].buf_len = dst_len; + now_alg_params->tuple.buf = now_alg_params->tuple_buf; + now_alg_params->tuple.usr_data = ctx->tuple_page_info; + now_alg_params->result.dst_len = dst_len; + + if ((ctx->is_lz77_mode && ctx->compress_or_decompress) || ctx->is_zlib) { + if (ctx->is_lz77_mode) { + dst_len = param->src_len * 2; + } + unsigned int tmp_size = MIN(dst_len, HW_MAX_SGE_LEN); // HW_MAX_SGE_LEN: hisi_zip约束sge len不超过8M + comp_and_decomp_fill_buffer_list(&now_alg_params->tuple, tmp_size, dst_len, ctx->tuple_buf, ctx->tuple_buf_offset); + ctx->tuple_buf_offset += dst_len; + if (ctx->tuple_buf_offset > ctx->tuple_buf_len) { + printf("ctx->tuple_buf_offset[0x%lx] > ctx->tuple_buf_len[0x%lx]\n", ctx->tuple_buf_offset, ctx->tuple_buf_len); + exit(-1); + } + now_alg_params->dst_buf_list = &now_alg_params->tuple; + } else { + now_alg_params->dst_buf_list = &now_alg_params->dst; + } +} +// LZ4 压缩实现 +static int lz4async_block_compress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + if (sess->kae_sess == NULL) + return LZ4_compress_async(src, dst, lz4_compress_async_callback, result); + + return KAELZ4_compress_async_in_session(sess->kae_sess, src, dst, lz4_compress_async_callback, result); +} + +// LZ4 解压实现 +static int lz4async_block_decompress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + int ret = LZ4_decompress_async(src, dst, lz4_compress_async_callback, result); + return ret; +} + +static int lz4_bound(int src_len) +{ + return LZ4_compressBound(src_len); +} +// LZ4 初始化 +int lz4_async_init(struct compress_ctx *ctx) +{ + if (ctx->is_polling && ctx->compress_or_decompress) { + ctx->sess.kae_sess = KAELZ4_create_async_compress_session(get_physical_address_wrapper); + } else { + LZ4_async_compress_init(get_physical_address_wrapper); + } + return 0; +} + +void lz4_async_cleanup(struct compress_ctx *ctx) +{ + if (ctx->sess.kae_sess) + KAELZ4_destroy_async_compress_session(ctx->sess.kae_sess); + else + LZ4_teardown_async_compress(); +} + +void lz4_prepare_param_from_ctx(struct compress_ctx *ctx, struct compress_param *params) +{ + params->ibuf_crc = 0; + params->obuf_crc = 0; + + kaelz4_param *param = ¶ms->kaelz4_param; + if (ctx->with_crc == 1) { + param->result.ibuf_crc = ¶ms->ibuf_crc; + param->result.obuf_crc = ¶ms->obuf_crc; + } else { + param->result.ibuf_crc = NULL; + param->result.obuf_crc = NULL; + } + param->result.user_data = params; + + comp_and_decomp_fill_src_buf(params); + comp_and_decomp_fill_dst_buf(params); +}; + +void lz4_prepre_out_buf(struct compress_ctx *ctx, struct compress_out_buf *out_buf, struct compress_param *params) +{ + out_buf->src_len = params->src_len; + out_buf->len = params->dst_len; + out_buf->sn = params->sn; + out_buf->ibuf_crc = params->ibuf_crc; + out_buf->obuf_crc = params->obuf_crc; + ctx->out_total_len += params->dst_len; + + kaelz4_param *param = ¶ms->kaelz4_param; + + out_buf->buf_addr = param->dst.buf[0].data; + out_buf->src = param->src.buf[0].data; + out_buf->next = NULL; + + + if (ctx->out_buf_tail) { + ctx->out_buf_tail->next = out_buf; + } else { + ctx->out_buf_list = out_buf; + } + ctx->out_buf_tail = out_buf; +}; + +void lz4_async_polling(struct compress_session *sess, int budget) +{ + KAELZ4_compress_async_polling_in_session(sess->kae_sess, budget); +} + +// LZ4 算法实例 +compression_algorithm_t lz4async_block_algorithm = { + .name = "kaelz4async_block", + .alg_type = ALG_KAE_LZ4, + .async_compress = lz4async_block_compress, + .poll = lz4_async_polling, + .bound = lz4_bound, + .async_decompress = lz4async_block_decompress, + .init = lz4_async_init, + .prepare_param = lz4_prepare_param_from_ctx, + .prepare_outbuf = lz4_prepre_out_buf, + .cleanup = lz4_async_cleanup +}; + +// 注册 LZ4 算法 +void register_lz4async_block_algorithm(void) +{ + register_algorithm(&lz4async_block_algorithm); +} \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4AsyncFrame.c b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncFrame.c new file mode 100644 index 0000000..bc1b90e --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncFrame.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include "manage.h" +#include "compress_ctx.h" +#include "lz4Async.h" + +static int g_has_custom_frameinfo_config = 0; // 是否 自定义 frameinfo 格式 + +static int lz4_async_frame_compress(struct compress_session *sess, struct compress_param *params) +{ + int ret; + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + + if (g_has_custom_frameinfo_config == 0) { + if (sess->kae_sess) + return KAELZ4_compress_frame_async_in_session(sess->kae_sess, src, dst, lz4_compress_async_callback, result, NULL); + + ret = LZ4F_compressFrame_async(src, dst, lz4_compress_async_callback, result, NULL); + } else { + // 初始化LZ4F压缩的参数 + LZ4F_preferences_t preferences = {0}; + preferences.frameInfo.blockSizeID = LZ4F_max64KB; // 设定块大小 + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + preferences.frameInfo.contentSize = result->src_size; + if (sess->kae_sess) + return KAELZ4_compress_frame_async_in_session(sess->kae_sess, src, dst, lz4_compress_async_callback, result, &preferences); + + ret = LZ4F_compressFrame_async(src, dst, lz4_compress_async_callback, result, &preferences); + } + return ret; +} + +// 单个 LZ4 frame 格式文件的解压实现 +static int lz4_async_frame_decompress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + int ret = LZ4F_decompress_async(src, dst, lz4_compress_async_callback, result, NULL); + return ret; +} + +static int lz4_frame_bound(int src_len) { + return LZ4F_compressFrameBound(src_len, NULL) * 1.2; +} + +// LZ4 frame 算法实例 +compression_algorithm_t lz4_async_frame_algorithm = { + .name = "kaelz4async_frame", + .bound = lz4_frame_bound, + .poll = lz4_async_polling, + .async_compress = lz4_async_frame_compress, + .async_decompress = lz4_async_frame_decompress, + .init = lz4_async_init, + .prepare_param = lz4_prepare_param_from_ctx, + .prepare_outbuf = lz4_prepre_out_buf, + .cleanup = lz4_async_cleanup +}; + +// 注册 LZ4 frame 算法 +void register_lz4async_frame_algorithm(void) +{ + register_algorithm(&lz4_async_frame_algorithm); +} \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77.c b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77.c new file mode 100644 index 0000000..27f6018 --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77.c @@ -0,0 +1,51 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include "lz4Async.h" + +// LZ4 压缩实现 +static int lz4async_block_compress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + return KAELZ4_compress_lz77_async_in_session(sess->kae_sess, src, dst, lz4_compress_async_callback, result); +} + +// LZ4 解压实现 +static int lz4async_block_decompress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + int ret = LZ4_decompress_async(src, dst, lz4_compress_async_callback, result); + return ret; +} + +static int lz4_bound(int src_len) { + return LZ4_compressBound(src_len); +} + +// LZ4 算法实例 +compression_algorithm_t lz4async_lz77_algorithm = { + .name = "kaelz4async_lz77", + .async_compress = lz4async_block_compress, + .poll = lz4_async_polling, + .bound = lz4_bound, + .async_decompress = lz4async_block_decompress, + .init = lz4_async_init, + .prepare_param = lz4_prepare_param_from_ctx, + .prepare_outbuf = lz4_prepre_out_buf, + .cleanup = lz4_async_cleanup, +}; + +// 注册 LZ4 算法 +void register_lz4async_lz77_algorithm(void) +{ + register_algorithm(&lz4async_lz77_algorithm); +} \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77Frame.c b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77Frame.c new file mode 100644 index 0000000..60e3fcc --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncLz77Frame.c @@ -0,0 +1,51 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include +#include "lz4Async.h" + +// LZ4 压缩实现 +static int lz4async_block_compress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + return KAELZ4_compress_lz77_async_in_session(sess->kae_sess, src, dst, lz4_compress_async_callback, result); +} + +static int lz4async_block_decompress(struct compress_session *sess, struct compress_param *params) +{ + kaelz4_param *param = ¶ms->kaelz4_param; + + const struct kaelz4_buffer_list *src = ¶m->src; + struct kaelz4_buffer_list *dst = param->dst_buf_list; + struct kaelz4_result *result = ¶m->result; + int ret = LZ4F_decompress_async(src, dst, lz4_compress_async_callback, result, NULL); + return ret; +} + +static int lz4_bound(int src_len) { + return LZ4F_compressFrameBound(src_len, NULL) * 1.2; +} + +// LZ4 算法实例 +compression_algorithm_t lz4async_lz77_frame_algorithm = { + .name = "kaelz4async_lz77_frame", + .async_compress = lz4async_block_compress, + .poll = lz4_async_polling, + .bound = lz4_bound, + .async_decompress = lz4async_block_decompress, + .init = lz4_async_init, + .prepare_param = lz4_prepare_param_from_ctx, + .prepare_outbuf = lz4_prepre_out_buf, + .cleanup = lz4_async_cleanup, +}; + +// 注册 LZ4 算法 +void register_lz4async_lz77_frame_algorithm(void) +{ + register_algorithm(&lz4async_lz77_frame_algorithm); +} \ No newline at end of file diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4Frame.c b/scripts/perftest/kzip/alg/kaelz4/lz4Frame.c new file mode 100644 index 0000000..df0257b --- /dev/null +++ b/scripts/perftest/kzip/alg/kaelz4/lz4Frame.c @@ -0,0 +1,70 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include +#include "lz4Async.h" + +static int g_custom_frameinfo_config = 0; // 是否 自定义 frameinfo 格式 + +// 单个 LZ4 frame 格式文件的压缩实现 +static int lz4_frame_compress(struct compress_param *param) +{ + int ret; + if (g_custom_frameinfo_config == 0) { + ret = LZ4F_compressFrame(param->dst_buf, param->dst_len, param->src_buf + param->src_buf_offset, param->src_len, NULL); + } else { + LZ4F_preferences_t preferences = {0}; + preferences.frameInfo.blockSizeID = LZ4F_max64KB; // 设定块大小 + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + preferences.frameInfo.contentSize = param->src_len; + ret = LZ4F_compressFrame(param->dst_buf, param->dst_len, param->src_buf + param->src_buf_offset, param->src_len, &preferences); + } + + param->dst_len = ret > 0 ? ret : 0; + param->done = 1; + return ret > 0 ? 0 : ret; +} + +// 单个 LZ4 frame 格式文件的解压实现 +static int lz4_frame_decompress(struct compress_param *param) +{ + LZ4F_decompressionContext_t dctx; + LZ4F_createDecompressionContext(&dctx, 100); + size_t tmp_src_len = param->src_len; + size_t tmp_dst_len = param->dst_len; + int ret = LZ4F_decompress(dctx, param->dst_buf, &tmp_dst_len, param->src_buf + param->src_buf_offset, &tmp_src_len, NULL); + LZ4F_freeDecompressionContext(dctx); + param->dst_len = tmp_dst_len > 0 ? tmp_dst_len : 0; + param->done = 1; + return ret > 0 ? 0 : ret; +} +static int lz4_frame_bound(int src_len) { + int needlen = LZ4F_compressFrameBound(src_len, NULL); + if (g_custom_frameinfo_config == 1) { + needlen += 1024; + } + return needlen; +} +// LZ4 frame 初始化 +static int lz4_frame_init(struct compress_ctx *ctx) { + return 0; +} + +// LZ4 frame 算法实例 +compression_algorithm_t lz4_frame_algorithm = { + .name = "kaelz4_frame", + .bound = lz4_frame_bound, + .compress = lz4_frame_compress, + .decompress = lz4_frame_decompress, + .prepare_param = lz4_prepare_param_from_ctx, + .prepare_outbuf = lz4_prepre_out_buf, + .init = lz4_frame_init +}; + +// 注册 LZ4 frame 算法 +void register_lz4_frame_algorithm(void) +{ + register_algorithm(&lz4_frame_algorithm); +} diff --git a/scripts/perftest/kzip/alg/kaezlib/deflate.c b/scripts/perftest/kzip/alg/kaezlib/deflate.c new file mode 100644 index 0000000..28430c2 --- /dev/null +++ b/scripts/perftest/kzip/alg/kaezlib/deflate.c @@ -0,0 +1,80 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include "zlibAsync.h" + +static int g_level = 6; +static int g_windowBits = -15; + +static int zlib_bound(int src_len) { + return compressBound(src_len); +} + +// LZ4 压缩实现 +static int zlib_compress(struct compress_param *param) +{ + z_stream strm; + strm.zalloc = (alloc_func)0; + strm.zfree = (free_func)0; + strm.opaque = (voidpf)0; + (void)deflateInit2_(&strm, g_level, Z_DEFLATED, g_windowBits, 8, Z_DEFAULT_STRATEGY, "1.2.11", sizeof(z_stream)); + + strm.next_in = (z_const Bytef*)param->src_buf + param->src_buf_offset; + strm.next_out = (void *)param->dst_buf; + strm.avail_in = param->src_len; + strm.avail_out = param->dst_len; + int ret = deflate(&strm, Z_FINISH); + + (void)deflateEnd(&strm); + if (ret < Z_OK) { + printf("[KAE_ERR]:compress2 failed, ret is:%d. (dst_len = %d; src_len = %d.)\n", ret, param->dst_len, param->src_len); + } + param->dst_len = strm.total_out > 0 ? strm.total_out : 0; + param->done = 1; + return ret > 0 ? 0 : ret; +} + +// LZ4 解压实现 +static int zlib_decompress(struct compress_param *param) +{ + z_stream strm; + strm.zalloc = (alloc_func)0; + strm.zfree = (free_func)0; + strm.opaque = (voidpf)0; + (void)inflateInit2_(&strm, g_windowBits, "1.2.11", sizeof(z_stream)); + strm.next_in = (z_const Bytef*)param->src_buf + param->src_buf_offset; + strm.next_out = (void *)param->dst_buf; + strm.avail_in = param->src_len; + strm.avail_out = param->dst_len; + int ret = inflate(&strm, Z_FINISH); + + (void)inflateEnd(&strm); + if (ret < Z_OK) { + printf("[KAE_ERR]:uncompress2 failed, ret is:%d.\n", ret); + } + param->dst_len = strm.total_out > 0 ? strm.total_out : 0; + param->done = 1; + return ret > 0 ? 0 : ret; +} + +// LZ4 初始化 +static int zlib_init(struct compress_ctx *ctx) { + return 0; +} + +// LZ4 算法实例 +compression_algorithm_t zlib_deflate_algorithm = { + .name = "kaezlib_deflate", + .bound = zlib_bound, + .compress = zlib_compress, + .decompress = zlib_decompress, + .prepare_param = zlib_prepare_ctx, + .prepare_outbuf = zlib_prepre_out_buf, + .init = zlib_init +}; + +void register_zlib_deflate_algorithm(void) +{ + register_algorithm(&zlib_deflate_algorithm); +} diff --git a/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c b/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c new file mode 100644 index 0000000..37794ea --- /dev/null +++ b/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c @@ -0,0 +1,225 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include +#include +#include +#include "delayRecord.h" + +extern int g_log_level; + +static void compress_async_callback(struct kaezip_result *result) +{ + if (unlikely(result->status != 0)) { + printf("[user]回调压缩异常 : %d\n", result->status); + } + struct compress_param *param = (struct compress_param *)result->user_data; + + param->dst_len = result->dst_len; + + if (g_log_level == 1) { + uint64_t end = get_ns(); + uint64_t timeonce = end - param->start_time; + if(timeonce > 0) { + record_latency(param->ctx->all_delays, timeonce, param->sn); + } + } + param->done = 1; + return; +} + +static void comp_and_decomp_fill_buffer_list(struct kaezip_buffer_list *buf_list, size_t sge_len, size_t rem_len, void *start_addr, size_t offset) +{ + size_t tmp_offset = 0; + unsigned int i = 0; + unsigned int tmp_size; + + while (rem_len) { + tmp_size = MIN(sge_len, rem_len); + buf_list->buf[i].data = start_addr + offset + tmp_offset; + if (((offset + tmp_offset) % HPAGE_SIZE) + tmp_size <= HPAGE_SIZE) { + buf_list->buf[i].buf_len = tmp_size; + } else { + buf_list->buf[i].buf_len = HPAGE_SIZE - ((offset + tmp_offset) % HPAGE_SIZE); + } + tmp_offset += buf_list->buf[i].buf_len; + rem_len -= buf_list->buf[i].buf_len; + i++; + buf_list->buf_num = i; + } +} + +static void comp_and_decomp_fill_src_buf(struct compress_param *param) +{ + struct compress_ctx *ctx = param->ctx; + size_t src_len = param->src_len; + void *start_addr = param->src_buf; + size_t offset = param->src_buf_offset; + + kaezip_param *now_alg_params = ¶m->kaezip_param; + + now_alg_params->src.buf = now_alg_params->src_buf; + now_alg_params->src.usr_data = ctx->page_info; + unsigned int tmp_size = src_len / ctx->src_buf_num; + comp_and_decomp_fill_buffer_list(&now_alg_params->src, tmp_size, src_len, start_addr, offset); + + param->src_len = src_len; + now_alg_params->result.src_size = src_len; +} + +static void comp_and_decomp_fill_dst_buf(struct compress_param *param) +{ + struct compress_ctx *ctx = param->ctx; + void *start_addr = param->dst_buf; + size_t dst_len = param->dst_len; + + kaezip_param *now_alg_params = ¶m->kaezip_param; + + now_alg_params->dst.buf_num = 1; + now_alg_params->dst.buf = now_alg_params->dst_buf; + now_alg_params->dst.buf[0].data = start_addr; + now_alg_params->dst.buf[0].buf_len = dst_len; + now_alg_params->tuple.buf = now_alg_params->tuple_buf; + now_alg_params->tuple.usr_data = ctx->tuple_page_info; + now_alg_params->result.dst_len = dst_len; + + unsigned int tmp_size = MIN(dst_len, HW_MAX_SGE_LEN); // HW_MAX_SGE_LEN: hisi_zip约束sge len不超过8M + comp_and_decomp_fill_buffer_list(&now_alg_params->tuple, tmp_size, dst_len, ctx->tuple_buf, ctx->tuple_buf_offset); + ctx->tuple_buf_offset += dst_len; + if (ctx->tuple_buf_offset > ctx->tuple_buf_len) { + printf("ctx->tuple_buf_offset[0x%lx] > ctx->tuple_buf_len[0x%lx]\n", ctx->tuple_buf_offset, ctx->tuple_buf_len); + exit(1); + } + now_alg_params->dst_buf_list = &now_alg_params->tuple; +} + +// Zlib 压缩实现 +static int zlibasync_deflate_compress(struct compress_session *sess, struct compress_param *params) +{ + kaezip_param *param = ¶ms->kaezip_param; + + const struct kaezip_buffer_list *src = ¶m->src; + struct kaezip_buffer_list *dst = param->dst_buf_list; + struct kaezip_result *result = ¶m->result; + + return KAEZIP_compress_async_in_session(sess->kae_sess, src, dst, compress_async_callback, result); +} + +static int zlibasync_deflate_decompress(struct compress_session *sess, struct compress_param *params) +{ + kaezip_param *param = ¶ms->kaezip_param; + const struct kaezip_buffer_list *src = ¶m->src; + struct kaezip_buffer_list *dst = param->dst_buf_list; + struct kaezip_result *result = ¶m->result; + int ret = KAEZIP_decompress_async_in_session(sess->kae_sess, src, dst, compress_async_callback, result); + return ret; +} + +static int zlib_bound(int src_len) { + return compressBound(src_len); +} + +// Zlib 初始化 +static int zlib_async_deflate_init(struct compress_ctx *ctx) { + if(ctx->sess_count > 1) { + for (int i = 0; i < ctx->sess_count; ++i) { + if(ctx->compress_or_decompress == 1) { + ctx->sess_array[i].kae_sess = KAEZIP_create_async_compress_session(get_physical_address_wrapper); + } else { + ctx->sess_array[i].kae_sess = KAEZIP_create_async_decompress_session(get_physical_address_wrapper); + } + if (!ctx->sess_array[i].kae_sess) { + fprintf(stderr, "Failed to create session %d\n", i); + } + } + } else { + if(ctx->compress_or_decompress == 1) { + ctx->sess.kae_sess = KAEZIP_create_async_compress_session(get_physical_address_wrapper); + } else { + ctx->sess.kae_sess = KAEZIP_create_async_decompress_session(get_physical_address_wrapper); + } + } + return 0; +} + +static void zlib_async_deflate_cleanup(struct compress_ctx *ctx) +{ + if(ctx->sess_count > 1) { + for (int i = 0; i < ctx->sess_count; ++i) { + if (ctx->sess_array[i].kae_sess) { + KAEZIP_destroy_async_compress_session(ctx->sess_array[i].kae_sess); + } + } + } else { + KAEZIP_destroy_async_compress_session(ctx->sess.kae_sess); + } +} +void zlib_prepare_ctx(struct compress_ctx *ctx, struct compress_param *params) +{ + params->ibuf_crc = 0; + params->obuf_crc = 0; + + kaezip_param *param = ¶ms->kaezip_param; + if (ctx->with_crc == 1) { + param->result.ibuf_crc = ¶ms->ibuf_crc; + param->result.obuf_crc = ¶ms->obuf_crc; + } else { + param->result.ibuf_crc = NULL; + param->result.obuf_crc = NULL; + } + param->result.user_data = params; + comp_and_decomp_fill_src_buf(params); + comp_and_decomp_fill_dst_buf(params); +}; +void zlib_prepre_out_buf(struct compress_ctx *ctx, struct compress_out_buf *out_buf, struct compress_param *params) +{ + out_buf->src_len = params->src_len; + out_buf->len = params->dst_len; + out_buf->sn = params->sn; + out_buf->ibuf_crc = params->ibuf_crc; + out_buf->obuf_crc = params->obuf_crc; + ctx->out_total_len += params->dst_len; + + kaezip_param *param = ¶ms->kaezip_param; + + if (ctx->is_zlib) + out_buf->buf_addr = param->tuple.buf[0].data; + else + out_buf->buf_addr = param->dst.buf[0].data; + out_buf->src = param->src.buf[0].data; + out_buf->next = NULL; + + + if (ctx->out_buf_tail) { + ctx->out_buf_tail->next = out_buf; + } else { + ctx->out_buf_list = out_buf; + } + ctx->out_buf_tail = out_buf; +}; + +static void zlib_polling(struct compress_session *sess, int budget) +{ + KAEZIP_compress_async_polling_in_session(sess->kae_sess, budget); +} + +// Zlib 算法实例 +compression_algorithm_t zlibasync_block_algorithm = { + .name = "kaezlibasync_deflate", + .alg_type = ALG_KAE_ZLIB, + .async_compress = zlibasync_deflate_compress, + .poll = zlib_polling, + .bound = zlib_bound, + .async_decompress = zlibasync_deflate_decompress, + .prepare_param = zlib_prepare_ctx, + .prepare_outbuf = zlib_prepre_out_buf, + .init = zlib_async_deflate_init, + .cleanup = zlib_async_deflate_cleanup, +}; + +// 注册 Zlib 算法 +void register_zlibasync_block_algorithm(void) +{ + register_algorithm(&zlibasync_block_algorithm); +} diff --git a/scripts/perftest/kzip/alg/kaezlib/zlib.c b/scripts/perftest/kzip/alg/kaezlib/zlib.c new file mode 100644 index 0000000..591cf93 --- /dev/null +++ b/scripts/perftest/kzip/alg/kaezlib/zlib.c @@ -0,0 +1,54 @@ +#include "manage.h" +#include "compress_ctx.h" +#include +#include +#include "zlibAsync.h" + +static int g_level = 6; + +// LZ4 压缩实现 +static int zlib_compress(struct compress_param *param) +{ + int ret = compress2(param->dst_buf, (unsigned long *)¶m->dst_len, param->src_buf, param->src_len, g_level); + if (ret != Z_OK) { + printf("[KAE_ERR]:compress2 failed, ret is:%d. (dst_len = %d; src_len = %d.)\n", ret, param->dst_len, param->src_len); + } + param->done = 1; + return ret > 0 ? 0 : ret; +} + +// LZ4 解压实现 +static int zlib_decompress(struct compress_param *param) +{ + int ret = uncompress2(param->dst_buf, (unsigned long *)¶m->dst_len, param->src_buf, (unsigned long *)¶m->src_len); + if (ret != Z_OK) { + printf("[KAE_ERR]:uncompress2 failed, ret is:%d.\n", ret); + } + param->done = 1; + return ret > 0 ? 0 : ret; +} + +static int zlib_bound(int src_len) { + return compressBound(src_len); +} + +// LZ4 初始化 +static int zlib_init(struct compress_ctx *ctx) { + return 0; +} + +// LZ4 算法实例 +compression_algorithm_t zlib_algorithm = { + .name = "kaezlib", + .bound = zlib_bound, + .compress = zlib_compress, + .decompress = zlib_decompress, + .prepare_param = zlib_prepare_ctx, + .prepare_outbuf = zlib_prepre_out_buf, + .init = zlib_init +}; + +void register_zlib_algorithm(void) +{ + register_algorithm(&zlib_algorithm); +} diff --git a/scripts/perftest/kzip/alg/kaezlib/zlibAsync.h b/scripts/perftest/kzip/alg/kaezlib/zlibAsync.h new file mode 100644 index 0000000..6a21d69 --- /dev/null +++ b/scripts/perftest/kzip/alg/kaezlib/zlibAsync.h @@ -0,0 +1,15 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * @Description: common functions for algorithms + * @Author: Ma Xiaofeng + * @Date: 2025-7-31 + * @LastEditTime: 2025-7-31 + */ + +#ifndef ZLIB_ASYNC_H +#define ZLIB_ASYNC_H + +void zlib_prepare_ctx(struct compress_ctx *ctx, struct compress_param *params); +void zlib_prepre_out_buf(struct compress_ctx *ctx, struct compress_out_buf *out_buf, struct compress_param *params); + +#endif diff --git a/scripts/perftest/kzip/build.sh b/scripts/perftest/kzip/build.sh new file mode 100644 index 0000000..4a81fd9 --- /dev/null +++ b/scripts/perftest/kzip/build.sh @@ -0,0 +1,21 @@ +export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/kaelz4/lib:/usr/local/kaezstd/lib:/usr/local/kaezip/lib:$LD_LIBRARY_PATH +export C_INCLUDE_PATH=/usr/local/include:/usr/local/kaelz4/include:/usr/local/kaezip/include:$C_INCLUDE_PATH + +# echo "测试kaezip 和 kaelz4,请使用 sh build.sh kaelz4, 默认kaelz4" +# echo "测试kaezip 和 kaezstd,请使用 sh build.sh kaezstd" + +TestEnv=$1 +TestEnv=${TestEnv:=kae} +echo "build kzip for $TestEnv..." +case "$TestEnv" in + kae) + make clean + make MODULES="kaelz4 kaezlib" + ;; + *) + make clean + make MODULES=$TestEnv + ;; +esac + +echo "build kzip done" diff --git a/scripts/perftest/kzip/framework/datagen.c b/scripts/perftest/kzip/framework/datagen.c new file mode 100644 index 0000000..f448640 --- /dev/null +++ b/scripts/perftest/kzip/framework/datagen.c @@ -0,0 +1,189 @@ +/* + datagen.c - compressible data generator test tool + Copyright (C) Yann Collet 2012-2020 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** +* Includes +**************************************/ +#include "platform.h" /* Compiler options, SET_BINARY_MODE */ +#include "util.h" /* U32 */ +#include /* malloc */ +#include /* FILE, fwrite */ +#include /* memcpy */ +#include + + +/************************************** +* Constants +**************************************/ +#define KB *(1 <<10) + +#define PRIME1 2654435761U +#define PRIME2 2246822519U + + +/************************************** +* Local types +**************************************/ +#define LTLOG 13 +#define LTSIZE (1<> (32 - r))) +static unsigned int RDG_rand(U32* src) +{ + U32 rand32 = *src; + rand32 *= PRIME1; + rand32 ^= PRIME2; + rand32 = RDG_rotl32(rand32, 13); + *src = rand32; + return rand32; +} + + +static void RDG_fillLiteralDistrib(litDistribTable lt, double ld) +{ + BYTE const firstChar = ld <= 0.0 ? 0 : '('; + BYTE const lastChar = ld <= 0.0 ? 255 : '}'; + BYTE character = ld <= 0.0 ? 0 : '0'; + U32 u = 0; + + while (u lastChar) character = firstChar; + } +} + + +static BYTE RDG_genChar(U32* seed, const litDistribTable lt) +{ + U32 id = RDG_rand(seed) & LTMASK; + return (lt[id]); +} + + +#define RDG_DICTSIZE (32 KB) +#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) +#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) +void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr) +{ + BYTE* buffPtr = (BYTE*)buffer; + const U32 matchProba32 = (U32)(32768 * matchProba); + size_t pos = prefixSize; + U32* seed = seedPtr; + + /* special case */ + while (matchProba >= 1.0) { + size_t size0 = RDG_rand(seed) & 3; + size0 = (size_t)1 << (16 + size0 * 2); + size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/ + if (buffSize < pos + size0) { + memset(buffPtr+pos, 0, buffSize-pos); + return; + } + memset(buffPtr+pos, 0, size0); + pos += size0; + buffPtr[pos-1] = RDG_genChar(seed, lt); + } + + /* init */ + if (pos==0) { + buffPtr[0] = RDG_genChar(seed, lt); + pos=1; + } + + /* Generate compressible data */ + while (pos < buffSize) { + /* Select : Literal (char) or Match (within 32K) */ + if (RDG_RAND15BITS < matchProba32) { + /* Copy (within 32K) */ + size_t match; + size_t d; + int length = RDG_RANDLENGTH + 4; + U32 offset = RDG_RAND15BITS + 1; + if (offset > pos) offset = (U32)pos; + match = pos - offset; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = buffPtr[match++]; + } else { + /* Literal (noise) */ + size_t d; + size_t length = RDG_RANDLENGTH; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt); + } + } +} + + +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) +{ + litDistribTable lt; + if (litProba==0.0) litProba = matchProba / 4.5; + RDG_fillLiteralDistrib(lt, litProba); + RDG_genBlock(buffer, size, 0, matchProba, lt, &seed); +} + + +#define RDG_BLOCKSIZE (128 KB) +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) +{ + BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE]; + U64 total = 0; + size_t genBlockSize = RDG_BLOCKSIZE; + litDistribTable lt; + + /* init */ + if (litProba==0.0) litProba = matchProba / 4.5; + RDG_fillLiteralDistrib(lt, litProba); + SET_BINARY_MODE(stdout); + + /* Generate dict */ + RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed); + + /* Generate compressible data */ + while (total < size) { + RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed); + if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total); + total += genBlockSize; + fwrite(buff, 1, genBlockSize, stdout); /* should check potential write error */ + /* update dict */ + memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); + } +} diff --git a/scripts/perftest/kzip/framework/datagen.h b/scripts/perftest/kzip/framework/datagen.h new file mode 100644 index 0000000..c20c9c7 --- /dev/null +++ b/scripts/perftest/kzip/framework/datagen.h @@ -0,0 +1,40 @@ +/* + datagen.h - compressible data generator header + Copyright (C) Yann Collet 2012-2020 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + + +#include /* size_t */ + +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed); +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); +/* RDG_genOut + Generate 'size' bytes of compressible data into stdout. + Compressibility can be controlled using 'matchProba'. + 'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used. + Generated data can be selected using 'seed'. + If (matchProba, litProba and seed) are equal, the function always generate the same content. + + RDG_genBuffer + Same as RDG_genOut, but generate data into provided buffer +*/ diff --git a/scripts/perftest/kzip/framework/delayRecord.c b/scripts/perftest/kzip/framework/delayRecord.c new file mode 100644 index 0000000..ad0f45a --- /dev/null +++ b/scripts/perftest/kzip/framework/delayRecord.c @@ -0,0 +1,89 @@ +#include +#include +#include // printf +#include // clock_gettime CLOCK_MONOTONIC +#include "delayRecord.h" +// 定义哈希表条目的最大数量 +#define MAX_REASONABLE_LATENCY_NS (10ULL * 1000000000ULL) // 10s +#define MIN_REASONABLE_LATENCY_NS 0 // 0ns + +// 获取当前 ns 时间 +uint64_t get_ns(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + +// 记录时延数据 +void record_latency(uint64_t *all_delays, uint64_t latency, size_t sn) +{ + if (latency < MIN_REASONABLE_LATENCY_NS || latency > MAX_REASONABLE_LATENCY_NS) { + return; + } + + if (all_delays == NULL) { + all_delays = (uint64_t *)malloc(MAX_LATENCY_COUNT * sizeof(uint64_t)); + if (all_delays == NULL) { + return; + } + } + if (sn < MAX_LATENCY_COUNT) { + all_delays[sn] = latency; + } +} + +static int compare_uint64(const void *a, const void *b) +{ + uint64_t ua = *(uint64_t *)a; + uint64_t ub = *(uint64_t *)b; + return (ua > ub) - (ua < ub); +} + +double get_average_latency(uint64_t *all_delays, size_t cnt) +{ + uint64_t total_latency = 0; // 总时延(单位 ns) + + if (cnt > MAX_LATENCY_COUNT) { + cnt = MAX_LATENCY_COUNT; + } + if (cnt == 0) return -1.0; + for (int i = 0; i < cnt; ++i) { + total_latency += all_delays[i]; + } + + return total_latency / 1000.0 / cnt; +} +// 获取百分位置的时延数据。单位 us +double get_percent_delay(uint64_t *all_delays, int num, size_t cnt) +{ + if (cnt == 0) return -1.0; + uint64_t idx; + if (num >= 0 && num <= 100) { + idx = cnt * num / 100; + } else if (num > 100 && num <= 1000) { + idx = cnt * num / 1000; + } else if (num < 0) { // 负值从尾部开始计数 + idx = cnt + num; + } else { + return -1.0; // 非法参数 + } + if (idx >= cnt || idx < 0) return -1.0; // 防越界 + return all_delays[idx] / 1000.0; // 返回微秒,double 类型 +} + +void get_percent_latencies(uint64_t *all_delays, double *out_latencies, const int *percentiles, int count, size_t sn) +{ + if (sn > MAX_LATENCY_COUNT) { + sn = MAX_LATENCY_COUNT; + } + if (!out_latencies || !percentiles || count <= 0) return; + qsort(all_delays, sn, sizeof(uint64_t), compare_uint64); + for (int i = 0; i < count; ++i) { + out_latencies[i] = get_percent_delay(all_delays, percentiles[i], sn); + } + // for(int j = 0; j < 200; j++) { + // uint64_t idx = latency_count - 1 - j; + // printf("%ld:%.2f \n",idx, all_delays[idx] / 1000.0); + // } +} \ No newline at end of file diff --git a/scripts/perftest/kzip/framework/hugepage.c b/scripts/perftest/kzip/framework/hugepage.c new file mode 100644 index 0000000..5f61ed9 --- /dev/null +++ b/scripts/perftest/kzip/framework/hugepage.c @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include +#include // open O_RDONLY +#include // close lseek read + +#define HPAGE_SIZE (1024 * 1024 * 1024) // 1GB大页 +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PFN_MASK ((1UL << 55) - 1) +#define HW_MAX_SGE_LEN 0x800000UL +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) + +struct cache_page_map { + uint64_t *entries; + size_t entries_num; + void *base_vaddr; +}; + +struct cache_page_map* init_cache_page_map(void *base_vaddr, size_t total_size) +{ + struct cache_page_map *cache = malloc(sizeof(struct cache_page_map)); + if (!cache) return NULL; + + int fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) { + perror("打开/proc/self/pagemap失败"); + free(cache); + return NULL; + } + + // 根据申请大小计算需要读取的条目数 + size_t pages_num = total_size / PAGE_SIZE; + cache->entries_num = pages_num; + + cache->base_vaddr = base_vaddr; + + // 分配缓存空间 + cache->entries = malloc(pages_num * sizeof(uint64_t)); + if (!cache->entries) { + close(fd); + free(cache); + return NULL; + } + + // 计算文件偏移量(基地址为第一个条目,即申请到的虚拟地址对应的页面) + uintptr_t base = (uintptr_t)base_vaddr; + uintptr_t first_offset = (base / PAGE_SIZE) * sizeof(uint64_t); + + // 定位到起始位置 + if (lseek(fd, first_offset, SEEK_SET) != first_offset) { + perror("lseek失败"); + close(fd); + free(cache->entries); + free(cache); + return NULL; + } + + // 读取该次申请到的所有条目 + if (read(fd, cache->entries, pages_num * sizeof(uint64_t)) != (ssize_t)(pages_num * sizeof(uint64_t))) { + perror("读取条目失败"); + close(fd); + free(cache->entries); + free(cache); + return NULL; + } + close(fd); + return cache; +} + +static uint64_t get_physical_address_cache_page_map(struct cache_page_map *cache, void *vaddr) { + uintptr_t virtual_addr = (uintptr_t)vaddr; + + // 计算在缓存中的条目索引 + uintptr_t base = (uintptr_t)cache->base_vaddr; + uintptr_t index = (virtual_addr - base) / PAGE_SIZE; + + if (index >= cache->entries_num) { + // fprintf(stderr, "地址超出缓存范围\n"); + return 0; + } + + uint64_t entry = cache->entries[index]; + + if (!(entry & (1ULL << 63))) { + // fprintf(stderr, "页面不存在\n"); + return 0; + } + + // 提取物理帧号(PFN) + uint64_t pfn = entry & PFN_MASK; + return (pfn << PAGE_SHIFT) | (virtual_addr & (PAGE_SIZE - 1)); +} + +void free_cache_page_map(struct cache_page_map *cache) { + if (cache) { + free(cache->entries); + free(cache); + } +} + +void *get_huge_pages(size_t total_size) +{ + void *addr = mmap( + NULL, + total_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_1GB, + -1, 0 + ); // 申请内存大页 + + if (addr == MAP_FAILED) { + fprintf(stderr, "申请内存大页失败。\n"); + fprintf(stderr, "系统可能没有足够的大页可用。\n"); + fprintf(stderr, "请尝试分配更多大页: echo 10 | tee /sys/devices/system/node/node0/hugepages/hugepages-1048576kB/nr_hugepages\n"); + exit(EXIT_FAILURE); + } + + return addr; +} + +void release_huge_pages(void *addr, size_t total_size) +{ + munmap(addr, total_size); +} + +void* get_physical_address_wrapper(void *usr, void *vaddr, size_t sz) +{ + struct cache_page_map *cache = (struct cache_page_map *)usr; + uint64_t phys_addr = get_physical_address_cache_page_map(cache, vaddr); + return (void*)(uintptr_t)phys_addr; +} \ No newline at end of file diff --git a/scripts/perftest/kzip/framework/main.c b/scripts/perftest/kzip/framework/main.c new file mode 100644 index 0000000..348d2b5 --- /dev/null +++ b/scripts/perftest/kzip/framework/main.c @@ -0,0 +1,1304 @@ +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lz4.h" +#include "manage.h" +#include "delayRecord.h" +#include "compress_ctx.h" +#include "datagen.h" /* RDG_generate */ + +#define KB *(1 << 10) +#define MB *(1 << 20) +#define GB *(1U << 30) + +int g_file_chunk_size = 0; // 测试分片大小。 默认 0kb 不分片 +int g_log_level = 0; // 打印日志级别。 0:不统计时延。 1:时延统计 +int g_cpu_threads_per_core = 1; // 是否开启超线程。 1: 未开启。 2:开启 +int g_enable_huge_pages = 1; // 是否使用内存大页 +int g_enable_polling_mode = 0; // 是否使用单线程polling模式 + + +static uLong read_inputFile(struct compress_ctx *ctx, const char* fileName, void** input) +{ + FILE* sourceFile = fopen(fileName, "r"); + if (sourceFile == NULL) { + fprintf(stderr, "%s not exist!\n", fileName); + return 0; + } + int fd = fileno(sourceFile); + struct stat fs; + (void)fstat(fd, &fs); + + uLong input_size = fs.st_size; + + if (g_enable_huge_pages) { + int huge_page_num = (int)(input_size * sizeof(Bytef) / HPAGE_SIZE) + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 + size_t total_size = huge_page_num * HPAGE_SIZE; + *input = get_huge_pages(total_size); + // printf("申请的大页虚拟地址: %p len:%ld\n", *input, total_size); + + if (*input == NULL) { + return 0; + } + (void)fread(*input, 1, input_size, sourceFile); + + struct cache_page_map* cache = init_cache_page_map(*input, total_size); + // uint64_t phys_addr = get_physical_address_cache_page_map(cache, *input); + + // printf("大页物理地址: 0x%" PRIx64 "\n", phys_addr); + ctx->page_info = cache; + ctx->total_size = total_size; + } else { + *input = malloc(input_size * sizeof(Bytef)); + if (*input == NULL) { + return 0; + } + (void)fread(*input, 1, input_size, sourceFile); + } + + fclose(sourceFile); + + return input_size; +} + +static void save_metadata_to_file( + const char *metadata_filename, struct fragment_metadata *fragments, unsigned int fragment_count) +{ + FILE *file = fopen(metadata_filename, "wb"); + if (file == NULL) { + perror("Failed to open metadata file"); + return; + } + // 写入分片数量 + fwrite(&fragment_count, sizeof(unsigned int), 1, file); + + // 写入每个分片的元数据(偏移量和长度) + for (unsigned int i = 0; i < fragment_count; i++) { + fwrite(&fragments[i], sizeof(struct fragment_metadata), 1, file); + } + + fclose(file); +} +// 从文件中读取元数据 +static void load_metadata_from_file( + const char *in_filename, struct fragment_metadata **fragments, unsigned int *fragment_count) +{ + size_t meta_file_length = strlen(in_filename) + strlen(".meta") + 1; // +1 用于'\0'结束符 + char *metadata_filename = (char *)malloc(meta_file_length); // 分配内存 + if (metadata_filename == NULL) { + perror("Failed to allocate memory"); + exit(-1); + } + // 拼接文件名 + snprintf(metadata_filename, meta_file_length, "%s.meta", in_filename); + + FILE *file = fopen(metadata_filename, "rb"); + if (file == NULL) { + perror("Failed to open metadata file"); + return; + } + // 读取分片数量 + fread(fragment_count, sizeof(unsigned int), 1, file); + // 为分片元数据分配内存 + *fragments = (struct fragment_metadata *)malloc(*fragment_count * sizeof(struct fragment_metadata)); + if (*fragments == NULL) { + perror("Failed to allocate memory for fragment metadata"); + fclose(file); + return; + } + + // 读取每个分片的元数据 + fread(*fragments, sizeof(struct fragment_metadata), *fragment_count, file); + + fclose(file); + free(metadata_filename); +} + +static struct compress_out_buf *get_buf_node_and_del_it(struct compress_out_buf **out_buf_list, unsigned int sn) +{ + struct compress_out_buf *prev = NULL; + struct compress_out_buf *current = *out_buf_list; + while (current != NULL) { + if (current->sn == sn) { + // 如果是第一个节点 + if (prev == NULL) { + // 移动头指针 + *out_buf_list = current->next; + } else { + // 删除当前节点,调整前一个节点的 next 指针 + prev->next = current->next; + } + + // 返回匹配的节点(如果需要) + return current; + } + + prev = current; + current = current->next; + } + + return NULL; +} + +static size_t write_outputFile_2(const char* outFileName, void* output, uLong output_size) +{ + FILE* outputFile = fopen(outFileName, "w"); + if (!outputFile) { + fprintf(stderr, "%s create failed!\n", outFileName); + return 0; + } + size_t count = fwrite(output, sizeof(Bytef), output_size, outputFile); + fclose(outputFile); + return count; +} + +static size_t write_outputFile(const char* outFileName, struct compress_out_buf **out_buf_list, unsigned int output_num) +{ + FILE* outputFile = fopen(outFileName, "w"); + if (!outputFile) { + fprintf(stderr, "%s create failed!\n", outFileName); + return 0; + } + + struct fragment_metadata *fragments = (struct fragment_metadata *)malloc(output_num * sizeof(struct fragment_metadata)); + if (fragments == NULL) { + perror("Failed to allocate memory"); + return -1; + } + unsigned int base_offset = 0; // 记录每个分片的偏移量。 + + size_t count = 0; + unsigned int num = 0; + while (output_num > num) { + struct compress_out_buf *out_buf_node = get_buf_node_and_del_it(out_buf_list, num); + if (out_buf_node == NULL) + break; + + fragments[num].offset = base_offset; + fragments[num].len = out_buf_node->len; // 假设每个分片固定大小 100 + fragments[num].src_chunk_len = (size_t)g_file_chunk_size * 1024; + base_offset += fragments[num].len; // 更新下一个分片的偏移量 + uint32_t tmp_crc = crc32(0, out_buf_node->buf_addr, out_buf_node->len); + if (out_buf_node->obuf_crc != 0 && out_buf_node->obuf_crc != tmp_crc) { + printf("Obuf crc err ! expected 0x%x recv 0x%x\n", tmp_crc, out_buf_node->obuf_crc); + return -1; + } + tmp_crc = crc32(0, out_buf_node->src, out_buf_node->src_len); + if (out_buf_node->ibuf_crc != 0 && out_buf_node->ibuf_crc != tmp_crc) { + printf("Ibuf crc err ! expected 0x%x recv 0x%x\n", tmp_crc, out_buf_node->ibuf_crc); + return -1; + } + + count += fwrite(out_buf_node->buf_addr, sizeof(Bytef), out_buf_node->len, outputFile); + num++; + + free(out_buf_node); + } + + size_t meta_file_length = strlen(outFileName) + strlen(".meta") + 1; // +1 用于'\0'结束符 + char *meta_filename = (char *)malloc(meta_file_length); // 分配内存 + if (meta_filename == NULL) { + perror("Failed to allocate memory"); + return -1; + } + // 拼接文件名 + snprintf(meta_filename, meta_file_length, "%s.meta", outFileName); + save_metadata_to_file(meta_filename, fragments, output_num); + + free(meta_filename); + fclose(outputFile); + free(fragments); + return count; +} + +static uint8_t *get_compress_input(size_t input_sz) +{ + uint8_t *inbuf = (uint8_t *)malloc(input_sz * sizeof(uint8_t)); + if (inbuf == NULL) { + return NULL; + } + + memset(inbuf, 0, input_sz); + srand((unsigned int)time(NULL)); + int i = 0; + for (i = 0; i < input_sz; i++) { + inbuf[i] = (uint8_t)rand() % 254 + 1; + } + + return inbuf; +} + +static void compress_async_polling(struct compress_param *param) +{ + struct compress_ctx *ctx = param->ctx; + + while (unlikely(param->done != 1)) { + if (!ctx->algorithm->poll) + continue; + + if(ctx->sess_count > 1) { + int idx = param->sn % ctx->sess_count; + ctx->algorithm->poll(&ctx->sess_array[idx], 1); + } else { + ctx->algorithm->poll(&ctx->sess, 1); + } + } + param->done = 0; + + if (param->loop_index > 0) { + ctx->out_total_len += param->dst_len; + ctx->finish_num++; + return; + } + + // todo: 考虑回调的 status 判断 ====== + // if (param->result.status != 0 && ctx->chunk_len <= 64 * 1024) { + // printf("Async compress callback status not ok!\n============== exit =============\n"); + // exit(-1); + // } + + struct compress_out_buf *out_buf = (struct compress_out_buf *)malloc(sizeof(struct compress_out_buf)); + if (out_buf == NULL) { + return; + } + ctx->algorithm->prepare_outbuf(ctx, out_buf, param); + + ctx->finish_num++; +} + +static int get_session_index(struct compress_ctx *ctx) { + int idx = ctx->sn % ctx->sess_count; + return idx; +} + +static int do_real_compression(struct compress_ctx *ctx, struct compress_param *param) +{ + if (ctx->compress_or_decompress) { // 压缩流程。 + if (ctx->algorithm->async_compress) { + if(ctx->sess_count > 1) { + int idx = get_session_index(ctx); + return ctx->algorithm->async_compress(&ctx->sess_array[idx], param); + } + return ctx->algorithm->async_compress(&ctx->sess, param); + } else { + return ctx->algorithm->compress(param); + } + } else { // 解压逻辑 + if (ctx->algorithm->async_decompress) { + if(ctx->sess_count > 1) { + int idx = get_session_index(ctx); + return ctx->algorithm->async_decompress(&ctx->sess_array[idx], param); + } + return ctx->algorithm->async_decompress(&ctx->sess, param); + } else { + return ctx->algorithm->decompress(param); + } + } + return 0; +} + +static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompress, unsigned int inflight_num, + unsigned int chunk_len, compression_algorithm_t *algorithm, int is_test_crc, int sess_nums) +{ + memset(ctx, 0, sizeof(struct compress_ctx)); + ctx->algorithm = algorithm; + ctx->chunk_len = chunk_len; + ctx->sn = 0; + ctx->finish_num = 0; + ctx->compress_or_decompress = compress_or_decompress; + ctx->out_buf_list = NULL; + ctx->out_buf_tail = NULL; + ctx->out_total_len = 0; + ctx->thread_id = 0; + ctx->inflight_num = inflight_num; + ctx->with_crc = is_test_crc; + ctx->src_buf_num = 1; + ctx->all_delays = (uint64_t *)malloc(sizeof(uint64_t) * MAX_LATENCY_COUNT); + ctx->param_index = 0; + ctx->is_lz77_mode = 0; + + int is_test_lz77_block = strcmp(algorithm->name, "kaelz4async_lz77") == 0; + int is_test_lz77_frame = strcmp(algorithm->name, "kaelz4async_lz77_frame") == 0; + if ((is_test_lz77_block || is_test_lz77_frame) && ctx->compress_or_decompress != 0) { + if (g_file_chunk_size == 0 || (size_t)g_file_chunk_size * 1024 > HPAGE_SIZE || (size_t)g_file_chunk_size * 1024 >= HW_MAX_SGE_LEN / 4) { + // TBM: 当前chunk_size超过2M kzip不支持lz77模式,因为大页内存不连续 + // 对于原始输入内容较大的 silesia.tar文件,需要严格限制2M,否则连续空间分配会失败。对于较小的数据集chunk_size可超过2M。 + ctx->algorithm = get_algorithm("kaelz4async_block"); + if (is_test_lz77_frame) { + ctx->algorithm = get_algorithm("kaelz4async_frame"); + } + return; + } + ctx->is_lz77_mode = 1; + g_enable_polling_mode = 1; + } + ctx->is_polling = g_enable_polling_mode; + ctx->is_zlib = strcmp(algorithm->name, "kaezlibasync_deflate") == 0; + + ctx->sess_count = sess_nums; + ctx->sess_array = calloc(ctx->sess_count, sizeof(void *)); +} + +static void compress_ctx_destory(struct compress_ctx *ctx) +{ + struct compress_out_buf *out_buf_list = ctx->out_buf_list; + while (out_buf_list != NULL) { + ctx->out_buf_list = ctx->out_buf_list->next; + free(out_buf_list); + out_buf_list = ctx->out_buf_list; + } + ctx->out_buf_list = NULL; + ctx->out_buf_tail = NULL; +} +#define MAX_CPUS 512 // 最大可绑核数量。 +static int g_taskset_cpus_arr[MAX_CPUS]; +static int g_config_bind_cpus_count = 0; // 配置文件中传入的绑定的cpu核心数量 +/** +* 绑核参数解析。将-b 参数解析为实际可用的cpu核数组 +* 支持逗号分割和横线范围绑核。 +* 最大支持 MAX_CPUS 个绑核。 +* @params [in] char * range 实际用户输入的-b参数。例如: 3,4,6-10,12 +* @params [out] int * tasksetCpuArr。实际解析出来的cpu绑核列表。 例如:[3,4,6,7,8,9,10,12] +* @params [out] int * bindCpusCount。实际解析出来的cpu绑核个数。默认从0开始 +* @return void +*/ +void parseTasksetLists(char *range, int *tasksetCpuArr, int *bindCpusCount){ + char *token; + token = strtok(range, ","); // 拆分解析逗号 + while (token != NULL) { + int start, end; + char *dash = strchr(token, '-'); // 查找横杠表示范围 + if (dash) { + // 解析范围,如 "1-3" + *dash = '\0'; + start = atoi(token); + end = atoi(dash + 1); + + // 将范围内的所有 CPU 加入数组 + for (int i = start; i <= end; i++) { + tasksetCpuArr[(*bindCpusCount)++] = i; + if (*bindCpusCount > MAX_CPUS) { + goto parse_config_end; + } + } + } else { + // 单个 CPU 核,如 "2" + tasksetCpuArr[(*bindCpusCount)++] = atoi(token); + if (*bindCpusCount > MAX_CPUS) { + goto parse_config_end; + } + } + token = strtok(NULL, ","); + } + parse_config_end: + *bindCpusCount = MAX_CPUS; +} +/** + * 绑定CPU亲和 + * @params [in] i 并发的进程或线程数 + * @params [in] process_or_pthread 当前启动的是进程还是线程。 1: 进程。 其他: 线程 + */ +static void set_cpu_affinity_for_child(int i, int process_or_pthread) +{ + cpu_set_t mask; + CPU_ZERO(&mask); // 清空 CPU 集合 + int use_engine_nums = 2; // 要使用的加速器数量; 可参数控制 + int super_thread_rate = g_cpu_threads_per_core; // 当前机器超线程的超级倍数;可自动读取 + int cpus_per_numa = 40 * super_thread_rate; // 每个numa对应的cpu核心数量。920 默认 40,开超线程就80。 + int coreid = (int)(i/use_engine_nums)*super_thread_rate + (cpus_per_numa * (i % use_engine_nums)) ; + if (g_config_bind_cpus_count > 0) { + if (i > g_config_bind_cpus_count) { // 如果并发数大于配置的绑核数量,直接不管多出来的并发了。 + return; + } + coreid = g_taskset_cpus_arr[i]; // 每个线程或进程使用一个固定的绑核。 + } + CPU_SET(coreid, &mask); + if (process_or_pthread == 1) { // 为进程绑核 + if (sched_setaffinity(0, sizeof(mask), &mask) == -1) { + perror("sched_setaffinity failed"); + exit(EXIT_FAILURE); + } + } else { // 为线程绑核 + if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) != 0) { + perror("pthread_setaffinity_np failed"); + pthread_exit(NULL); // 退出线程 + } + } +} + +static uLong get_src_content(struct compress_ctx *ctx, const char* in_filename, void **inbuf) +{ + uLong src_len; + if (in_filename) { + // fprintf(stdout, "compress filename : %s\n", in_filename); + src_len = read_inputFile(ctx, in_filename, inbuf); + } else { + *inbuf = get_compress_input(ctx->chunk_len); + src_len = ctx->chunk_len; + } + + if (g_file_chunk_size == 0) { + g_file_chunk_size = (src_len / 1024) + 1; + } + + if (!*inbuf) { + fprintf(stderr, "inbuf is NULL!\n"); + return -1; + } + return src_len; +} +static void check_and_reset_src_buf_num(struct compress_ctx *ctx) { + int is_asyc_compress = ctx->algorithm->async_compress != NULL && ctx->compress_or_decompress != 0; + int is_asyc_decompress = ctx->algorithm->async_decompress != NULL && !ctx->compress_or_decompress; + int has_async_test = is_asyc_compress || is_asyc_decompress; + if (((size_t)g_file_chunk_size * 1024) <= HPAGE_SIZE && has_async_test) { + // 分片为4k的模式下,使用单个buf节点性能最优,比4个节点的情况性能提升约4%。 + // 在QAT中小块分片性能测试时,使用单个节点性能最优。 + if(g_file_chunk_size < 128) { + ctx->src_buf_num = 1; + } else if ((size_t)(g_file_chunk_size*1024) > HW_MAX_SGE_LEN) { // 如果分片大小超过8M,那么不能一次性传下去 + ctx->src_buf_num = g_file_chunk_size*1024 / HW_MAX_SGE_LEN + 1; // 分片输入较大时,对分片进行8M的切分组装sgl。 + } else { + // 此处src_buf_num可修改为其他值,用于测试将一段src组装为多个链表sge节点的功能和性能。 + ctx->src_buf_num = 1; + } + } +} + +#define PRINT_DELAY_DATA_LEN 6 +static void printf_perf_data(struct compress_ctx *ctx, struct timeval start, struct timeval stop, uLong src_len, + const char* in_filename, const char* out_filename,int multi) +{ + gettimeofday(&stop, NULL); + uLong time1 = (stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec - start.tv_usec; + + uLong stream_len = ctx->compress_or_decompress ? src_len * ctx->loop_times : ctx->out_total_len; + float speed1 = 1000000.0 / time1 * multi * stream_len / (1 << 20); + float iops = 1000.0 * ctx->sn / time1; + + printf("%s %s perf result when loop %d times: ", ctx->algorithm->name, ctx->compress_or_decompress ? + "compress" : "decompress", ctx->loop_times); + printf( "file:%s. chunk %d kb.\ntime used: %lus, speed = %.1fMB/s (%.3fGB/s), ", + in_filename, g_file_chunk_size, time1 / 1000000, speed1, speed1 / 1024); + printf("iops = %.3fk, %s latency avg = %.3fus, latency avg per io = %.3fus\n", + iops, + ctx->compress_or_decompress ? "compress" : "decompress", + 1.0 * time1 / ctx->sn, + 1.0 * time1 / ctx->sn * ctx->inflight_num); + if (g_log_level) { + double resp_latencies[PRINT_DELAY_DATA_LEN] = {0}; + int p_counts[PRINT_DELAY_DATA_LEN] = {-200, 0, 50, 90, 99, 999}; + get_percent_latencies(ctx->all_delays, resp_latencies, p_counts, PRINT_DELAY_DATA_LEN, ctx->sn); + printf("%s delay result: chunk %d kb. inflightNum: %d, total test %d times. \n", + ctx->algorithm->name, g_file_chunk_size, ctx->inflight_num, ctx->sn); + printf("P_avg : %.2f us\n", get_average_latency(ctx->all_delays, ctx->sn)); + for (int i = 0; i < PRINT_DELAY_DATA_LEN; ++i) { + if (p_counts[i] == 0) { + printf("P_min : %.2f us\n", resp_latencies[i]); + } else if (p_counts[i] <= -1) { + printf("P_max : %.2f us\n", resp_latencies[i]); + } else { + printf("P%-4d : %.2f us\n", p_counts[i], resp_latencies[i]); + } + } + char *cahr_print_table_data = getenv("PRINT_TABLE_DATA");; + if (cahr_print_table_data != NULL) { + char *task_queue_num = getenv("KAE_LZ4_ASYNC_THREAD_NUM"); + int kae_task_num = 12; + if (task_queue_num != NULL) { + kae_task_num = atoi(task_queue_num); + } + printf( "console.log('kae-threads:%d, multi:%d,file:%s. chunk %d kb. inflightNum: %d, alg:%s ');", + kae_task_num, multi, in_filename, g_file_chunk_size, ctx->inflight_num, ctx->algorithm->name); + // // function t(r){console.log(r.split(' ').join(' '));} + double this_rate = (double)src_len * ctx->loop_times / ctx->out_total_len; + printf("t('%d 1-KAE %.3f %.3fGB/s 0 0 0 %.2fs %.2fus %.2fus %.2fus %.2fus %.2fus %.2fus %.2fus %d+1') \n", + g_file_chunk_size, + this_rate, + speed1 / 1024, + time1 / 1000000.0, + get_average_latency(ctx->all_delays, ctx->sn), + resp_latencies[0], + resp_latencies[1], + resp_latencies[2], + resp_latencies[3], + resp_latencies[4], + resp_latencies[5], + kae_task_num); + } + } + + double compress_rate = (double)src_len * ctx->loop_times / ctx->out_total_len; + fprintf(stdout, "compress_size is %luB = %.3lfMB, compress_rate is %.3f\n", + ctx->out_total_len, 1.0 * (float)ctx->out_total_len / (1 << 20), compress_rate); + if (out_filename && ctx->thread_id == 0) { + write_outputFile(out_filename, &(ctx->out_buf_list), ctx->sn / ctx->loop_times); + } +} + +static int wait_for_all_fork_done() +{ + int ret = -1; + while (1) { + ret = wait(NULL); + if (ret == -1) { + if (errno == EINTR) { + continue; + } + break; + } + } + return 0; +} + +static int do_comp_with_split_file( + struct compress_ctx *ctx, void *inbuf, uLong src_len, void *outbuf, uLong output_sz, unsigned long *out_offset) +{ + int ret = 0; + unsigned int remaining = src_len; + size_t chunk_size = (size_t)g_file_chunk_size * 1024; + + void *start_buf = inbuf; + while (remaining > 0) { + struct compress_param *param = NULL; + + while (ctx->param_buf[ctx->param_index].done != 0) { + compress_async_polling(&ctx->param_buf[ctx->param_index]); + } + param = &ctx->param_buf[ctx->param_index]; + ctx->param_index = (ctx->param_index + 1) % ctx->inflight_num; + // 单次接口调用时延 + if (g_log_level == 1) { + param->start_time = get_ns(); + } + param->done = 2; + + unsigned int chunk_src_len = (remaining > chunk_size) ? chunk_size : remaining; + unsigned int chunk_len_this_loop = chunk_src_len; // chunk_src_len 作为 src_len 传入压缩解压函数,可能会被修改 + + unsigned int output_size_chunk = ctx->algorithm->bound(chunk_size); // 实际本次压缩后产物的长度 + output_size_chunk += 4; + output_size_chunk -= output_size_chunk % 4; + uLong output_sz_tmp = output_size_chunk; + void *dst_start = outbuf + *out_offset; // 使用总内存里面的部分空间 + + param->src_buf = inbuf; + param->src_buf_offset = start_buf - inbuf; + param->src_len = chunk_len_this_loop; + param->dst_buf = dst_start; + param->dst_len = output_sz_tmp; + param->ctx = ctx; + param->sn = ctx->sn; + param->loop_index = ctx->loop_index; + if (ctx->algorithm->prepare_param) + ctx->algorithm->prepare_param(ctx, param); + + ret = do_real_compression(ctx, param); + if (ret != 0) { + printf("Error: do_real_compression error. ret = %d \nexit\n", ret); + return ret; + } + ctx->sn++; + + // 更新输入和剩余长度 + start_buf += chunk_len_this_loop; + remaining -= chunk_len_this_loop; + *out_offset += output_size_chunk; + } + return ret; +} + +static int prepare_tuple_buf(struct compress_ctx *ctx, size_t src_len) +{ + size_t tuple_buf_len = ((size_t)g_file_chunk_size * 1024) * (src_len / ((size_t)g_file_chunk_size * 1024) + 1) * 8; + size_t huge_page_num = tuple_buf_len * sizeof(Bytef) / HPAGE_SIZE + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 + size_t total_size = huge_page_num * HPAGE_SIZE; + ctx->tuple_buf = get_huge_pages(total_size); + + if (ctx->tuple_buf == NULL) { + return -1; + } + + memset(ctx->tuple_buf, 0, total_size); + + struct cache_page_map* cache = init_cache_page_map(ctx->tuple_buf, total_size); + if (cache == NULL) { + printf("init_cache_page_map failed\n"); + return -1; + } + + ctx->tuple_page_info = cache; + ctx->tuple_buf_offset = 0; + ctx->tuple_buf_len = total_size; + return 0; +} + +static int start_work(struct compress_ctx *ctx, const char* in_filename, const char* out_filename, int multi, + int window_bits, int level) +{ + if (multi == 0) { multi = 1; } + + int ret = 0; + int i, j; + pid_t pid_child = 1; + fflush(stdout); + fflush(stderr); + ctx->fork_id = 0; + for (i = 0; i < multi - 1; i++) { + pid_child = fork(); + if (pid_child == 0 || pid_child == -1) { + ctx->fork_id = (i + 1); + if (g_config_bind_cpus_count > 0) { + set_cpu_affinity_for_child(i, 1); + } + break; + } + } + // 单进程下测试时不给唯一的父进程绑核。 + if (pid_child > 0) { + if (g_config_bind_cpus_count > 0 && multi > 1) { + set_cpu_affinity_for_child(multi - 1, 1); + } + } + + void *inbuf = NULL; + uLong src_len = get_src_content(ctx, in_filename, &inbuf); + check_and_reset_src_buf_num(ctx); + ctx->src_buf = inbuf; + ctx->src_len = src_len; + + if (ctx->algorithm->init) + ctx->algorithm->init(ctx); + + uLong output_sz; + if(ctx->compress_or_decompress) { // 压缩空间预估理论上不同算法各有自己的计算规则 + if (ctx->algorithm->bound) { + output_sz = ctx->algorithm->bound(src_len); + } else { + output_sz = compressBound(src_len) * 2; // 这是zlib的压缩后空间测算 + } + output_sz *= 2; + } else { // 解压极端情况:1、压缩率超高,需要最大250倍以上的空间。 2、压缩率超低,需要的空间最大不超过1G + output_sz = MIN(src_len * 300, 1*1024*1024*1024); + } + // tips: 对文件分片压缩时,此处针对整体文件预估的空间是不足的。在循环次数较多时,空间会不够,可以继续增大。 + void *outbuf = malloc(output_sz * sizeof(uint8_t) * 2); + if (outbuf == NULL) { + printf("Error: 申请解压空间失败: %.3f G \n", 1.0 * (output_sz * sizeof(uint8_t) * ctx->loop_times) / (1 << 30)); + return -1; + } + + if (prepare_tuple_buf(ctx, src_len) != 0) { + return -1; + } + + struct timeval start, stop; + gettimeofday(&start, NULL); + unsigned long out_offset = 0; // 用于选择 outbuf 填充数据的偏移值。 + // 要循环压缩解压多少次 + for (j = 0; j < ctx->loop_times; j++) { + ctx->loop_index = j; + if (j > 0) { // 为第1次之后的循环的产物复用空间 + out_offset = output_sz; + ctx->tuple_buf_offset = ctx->tuple_buf_len / 2; + } + if (g_file_chunk_size != 0) { // 分片逻辑 + ret = do_comp_with_split_file(ctx, inbuf, src_len, outbuf, output_sz, &out_offset); + } + if (ret < 0) { + printf("Error: 压缩解压失败 ret=%d \n", ret); + } + } + + while (ctx->sn != ctx->finish_num) { + while (ctx->param_buf[ctx->param_index].done != 0) { + compress_async_polling(&ctx->param_buf[ctx->param_index]); + } + + ctx->param_index = (ctx->param_index + 1) % ctx->inflight_num; + } + + if (pid_child > 0 && ret >= 0) { + ret = wait_for_all_fork_done(); + + printf_perf_data(ctx, start, stop, src_len, in_filename, out_filename, multi); + printf("\nall process done====================。 \n \n"); + } + + if (g_enable_huge_pages) { + int huge_page_num = (int)(src_len * sizeof(Bytef) / HPAGE_SIZE) + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 + size_t total_size = huge_page_num * HPAGE_SIZE; + release_huge_pages(inbuf, total_size); + if(ctx->tuple_buf != NULL) { + release_huge_pages(ctx->tuple_buf, ctx->tuple_buf_len); + } + } else { + free(inbuf); + } + free(outbuf); + + return ret; +} +static int start_work_decompress( + struct compress_ctx *ctx, const char *in_filename, const char *out_filename, int multi, int window_bits, int level); +static void *start_work_thread(void *arg) +{ + struct thread_compress_args *args = (struct thread_compress_args *)arg; + struct compress_ctx *ctx = &args->ctx; + const char *in_filename = args->in_filename; + const char *out_filename = args->out_filename; + int multi = args->multi; + int window_bits = args->window_bits; + int level = args->level; + if (ctx->compress_or_decompress || g_file_chunk_size == 0) + start_work(ctx, in_filename, out_filename, multi, window_bits, level); + else + start_work_decompress(ctx, in_filename, out_filename, multi, window_bits, level); + + compress_ctx_destory(&args->ctx); + free(args); // 释放 args + return NULL; +} +// 分片解压。 +// 读取本地源数据信息。读取本地总文件信息。逐个块解压 --> 写文件拼接到一起 +static int start_work_decompress( + struct compress_ctx *ctx, const char *in_filename, const char *out_filename, int multi, int window_bits, int level) +{ + int ret; + int i, j; + pid_t pid_child = 1; + fflush(stdout); + fflush(stderr); + ctx->fork_id = 0; + for (i = 0; i < multi - 1; i++) { + pid_child = fork(); + if (pid_child == 0 || pid_child == -1) { + ctx->fork_id = (i + 1); + break; + } + } + + void *inbuf = NULL; + uLong src_len = get_src_content(ctx, in_filename, &inbuf); + check_and_reset_src_buf_num(ctx); + + // 从文件读取元数据 + struct fragment_metadata *loaded_fragments = NULL; + unsigned int fragment_count; + load_metadata_from_file(in_filename, &loaded_fragments, &fragment_count); + // 打印读取的元数据 + // for (unsigned int i = 0; i < fragment_count; i++) { + // printf("Fragment %d: Offset = %u, Length = %u chunk_len = 0x%lx\n", i + 1, loaded_fragments[i].offset, loaded_fragments[i].len, loaded_fragments[i].src_chunk_len); + // } + + ctx->src_buf = inbuf; + ctx->src_len = src_len; + if (multi == 0) { multi = 1; } + if (ctx->algorithm->init) + ctx->algorithm->init(ctx); + + uLong output_sz = MIN(src_len * 300, 1*1024*1024*1024); + void *outbuf = malloc(output_sz * sizeof(uint8_t) * 2); + if (outbuf == NULL) { + printf("Error: 申请解压空间失败: %.3f G \n", 1.0 * (output_sz * sizeof(uint8_t) * ctx->loop_times)/(1 << 30) ); + return -1; + } + + if (prepare_tuple_buf(ctx, loaded_fragments[0].src_chunk_len * fragment_count) != 0) { + return -1; + } + + struct timeval start, stop; + gettimeofday(&start, NULL); + + size_t out_offset = 0; // 总内存中的偏移,每一小块儿使用不同的偏移。 + for (j = 0; j < ctx->loop_times; j++) { + if (j > 0) { // 为第1次之后的循环的产物复用空间 + out_offset = output_sz; + ctx->tuple_buf_offset = ctx->tuple_buf_len / 2; + } + if (g_file_chunk_size != 0) { // 分片逻辑 + for (int k = 0; k < fragment_count; k++) { + size_t this_offset = loaded_fragments[k].offset; + size_t this_src_len = loaded_fragments[k].len; + + struct compress_param *param = NULL; + + while (ctx->param_buf[ctx->param_index].done != 0) { + compress_async_polling(&ctx->param_buf[ctx->param_index]); + } + param = &ctx->param_buf[ctx->param_index]; + ctx->param_index = (ctx->param_index + 1) % ctx->inflight_num; + // 单次接口调用时延 + if (g_log_level == 1) { + param->start_time = get_ns(); + } + param->done = 2; + ctx->loop_index = j; // 原来没有这一句,为了适配 param->loop_index = j + // param->loop_index = j; + + size_t output_size_chunk = loaded_fragments[k].src_chunk_len; // 预估本次压缩后产物的长度 + void *dst_start = outbuf + out_offset; // 使用总内存里面的部分空间 + + param->src_buf = inbuf; + param->src_buf_offset = this_offset; + param->src_len = this_src_len; + param->dst_buf = dst_start; + param->dst_len = output_size_chunk; + param->ctx = ctx; + param->sn = ctx->sn; + param->loop_index = ctx->loop_index; + if (ctx->algorithm->prepare_param) + ctx->algorithm->prepare_param(ctx, param); + + ret = do_real_compression(ctx, param); + if(ret != 0) { + printf("Error: sn %d len=%d;offset=%lx. end=%lx.do_real_compression decomp error. ret = %d \nexit\n ", + param->sn, loaded_fragments[k].len, this_offset, this_offset + loaded_fragments[k].len, ret); + write_outputFile_2( + "./error-split-file-compressed-data.compressed", inbuf + this_offset, loaded_fragments[k].len); + return ret; + } + ctx->sn++; + + out_offset += output_size_chunk; // 偏移本次解压实际使用的空间 + } + } + } + + while (ctx->sn != ctx->finish_num) { + while (ctx->param_buf[ctx->param_index].done != 0) { + compress_async_polling(&ctx->param_buf[ctx->param_index]); + } + + ctx->param_index = (ctx->param_index + 1) % ctx->inflight_num; + } + + if (pid_child > 0) { + ret = wait_for_all_fork_done(); + + printf_perf_data(ctx, start, stop, src_len, in_filename, out_filename, multi); + printf("\nall decompress done==================== \n \n"); + } + if (g_enable_huge_pages) { + int huge_page_num = (int)(src_len * sizeof(Bytef) / HPAGE_SIZE) + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 + size_t total_size = huge_page_num * HPAGE_SIZE; + release_huge_pages(inbuf, total_size); + if(ctx->tuple_buf != NULL) { + release_huge_pages(ctx->tuple_buf, ctx->tuple_buf_len); + } + } else { + free(inbuf); + } + free(outbuf); + + return ret; +} +static void format_cpu_env(char *str) +{ + char *token = strtok(str, "-"); + int count = 0; + + while (token != NULL) { + count++; + if (count == 3) { + // 获取第3个字段并转换为整数 + g_cpu_threads_per_core = atoi(token); + break; // 一旦获取到第3个字段,跳出循环 + } + token = strtok(NULL, "-"); + } +} +static void auto_get_parent_cpu_affinity(int *arr, int *count) +{ + // 子线程中获取父进程的亲和性 + cpu_set_t parent_affinity; + if (sched_getaffinity(0, sizeof(cpu_set_t), &parent_affinity) == -1) { + perror("sched_getaffinity"); + return; + } + for (int i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &parent_affinity)) { + arr[(*count)++] = i; + } + } +} + +static void init_env_config() +{ + char *taskset_cpu_config = getenv("USER_BIND_CPU_CONFIG"); + if (taskset_cpu_config != NULL) { + parseTasksetLists(taskset_cpu_config, g_taskset_cpus_arr, &g_config_bind_cpus_count); + } else { + auto_get_parent_cpu_affinity(g_taskset_cpus_arr, &g_config_bind_cpus_count); + } +} + +int compare_files(char *file_un, char *file_de) { + FILE* file1 = fopen(file_un, "rb"); + FILE* file2 = fopen(file_de, "rb"); + int result = 0; + while (1) { + char byte1 = fgetc(file1); // 从文件中读取一个字节 + char byte2 = fgetc(file2); + + if (byte1 != byte2) { // 读出字节不同,文件内容不同 + result = 1; + break; + } + + if (feof(file1) || feof(file2)) { + break; // 任意一个文件到达末尾 + } + } + // 检查两个文件是否同时到达末尾 + if (!feof(file1) || !feof(file2)) { + result = 1; // 文件长度不同,内容不同 + } + + fclose(file1); + fclose(file2); + return result; +} + +int round_trip_fuzztest(uint32_t RDGseed) +{ + uint64_t RDGsize = 64 * 1024; // 默认值64K + double RDGlitProba = 0.0; // lit分布概率,一般不用改 + int RGDproba = 50; // 数据可压缩程度 0-100 + + int compress = 1; + int ret = 0; + int multi = 1; + int threadNum = 1; + int loop_times = 1000; + int inflight_num = 256; + uLong chunk_len = 1024; + int window_bits = 15; + int level = 1; + + char algorithm_name[25] = {0}; + + // Fuzz 测试循环次数 + uint64_t run_times = 104104 * 2; + + char *algorithm_options[2] = {"kaelz4async_block", "kaelz4async_frame"}; + int file_chunk_size_options[13] = {2 KB, 4 KB, 8 KB, 16 KB, 32 KB, 64 KB, 128 KB, 256 KB, 512 KB, 1 MB, 2 MB, 4 MB, 8 MB}; + int random_data_size_options[13] = {16 KB, 32 KB, 64 KB, 64 KB + 5, 96 KB, 128 KB, 128 KB + 5, 512 KB, 1 MB, 4 MB, 16 MB, 64 MB, 128 MB}; + int compress_proba_options[11] = {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; + int muti_options[1] = {1}; + int threadNum_options[1] = {1}; + int loop_times_options[4] = {1, 1000, 2000, 5000}; + int inflight_num_options[7] = {24, 48, 64, 128, 256, 512, 1024}; + + int params_num = 8; + int option_counts[8] = {2, 13, 13, 11, 1, 1, 4, 7}; // 每个参数的选项数量 + int indices[8] = {0}; // 每个参数的当前选项索引 + + for (int i = 0; i < run_times; ++i) { + + compress = (i + 1) % 2; // 先压缩后解压 + char* input_filename = (compress == 1) ? "uncomp_data" : "comped_data"; + char* output_filename = (compress == 1) ? "comped_data" : "decomp_data"; + + // 迭代法遍历各种参数组合 + if (compress == 1) { // 每次测试压缩时读入参数,解压时维持压缩时参数 + strcpy(algorithm_name, algorithm_options[indices[0]]); + g_file_chunk_size = file_chunk_size_options[indices[1]]; + g_file_chunk_size /= 1024; // 转换成KB + RDGsize = random_data_size_options[indices[2]]; + RGDproba = compress_proba_options[indices[3]]; + multi = muti_options[indices[4]]; + threadNum = threadNum_options[indices[5]]; + loop_times = loop_times_options[indices[6]]; + inflight_num = inflight_num_options[indices[7]]; + + // 生成随机数据 + char *RDGbuffer = (char *)malloc(RDGsize); + + RDG_genBuffer(RDGbuffer, RDGsize, (double)(RGDproba / 100), RDGlitProba, RDGseed); + FILE *file_in = fopen(input_filename, "wb"); + + size_t written = fwrite(RDGbuffer, sizeof(char), RDGsize, file_in); + if (written != RDGsize) { + printf("Error: write file failed\n"); + return 0; + } + fclose(file_in); + free(RDGbuffer); + // 更新迭代索引 + int j = params_num - 1; + while(j >= 0) { + indices[j]++; + if (indices[j] < option_counts[j]) { + break; + } + indices[j] = 0; + j--; + } + if (j < 0) { + printf("seed : %d Fuzz test done, seed + 1.\n", RDGseed); + ++RDGseed; + } + } + + const char* in_filename = input_filename; + const char* out_filename = output_filename; + chunk_len *= 1024; + printf("kzip fuzz test: algorithm: %s, multi process %d, threadNum %d, data length: %lu(KB), loop times: %d, window_bits : %d, level : %d, chunk: %d(KB), %s, in_file : %s, out_file : %s,\n", + algorithm_name, multi, threadNum, RDGsize/1024, loop_times, window_bits, level, g_file_chunk_size, compress == 1 ? "compress" : "decompress", in_filename, out_filename); + + compression_algorithm_t *algorithm = get_algorithm(algorithm_name); + if (!algorithm) { + printf("Error: Algorithm %s not found.\n", algorithm_name); + return -1; + } + struct compress_ctx ctx; + compress_ctx_init(&ctx, compress, inflight_num, chunk_len, algorithm, 0, 1); + ctx.loop_times = loop_times; + if (!ctx.compress_or_decompress) { + ctx.loop_times = 1; + multi = 1; + } + + if (ctx.algorithm->init) + ctx.algorithm->init(&ctx); + + if (!ctx.compress_or_decompress && g_file_chunk_size > 0) { // 如果是分片解压,单独处理 + ret = start_work_decompress(&ctx, in_filename, out_filename, multi, window_bits, level); + } else { + if (threadNum > 1) { + pthread_t threads[threadNum]; + int j; + for (j = 0; j < threadNum; j++) { + struct thread_compress_args *args = malloc(sizeof(struct thread_compress_args)); + compress_ctx_init(&args->ctx, compress, inflight_num, chunk_len, algorithm, 0, 1); + args->ctx.thread_id = j; + args->ctx.loop_times = loop_times; + args->in_filename = in_filename; + args->out_filename = out_filename; + args->multi = multi; + args->window_bits = window_bits; + args->level = level; + if (pthread_create(&threads[j], NULL, start_work_thread, args) != 0) { + perror("pthread_create failed"); + exit(EXIT_FAILURE); + } + } + for (j = 0; j < threadNum; j++) { + pthread_join(threads[j], NULL); + } + } else { + ret = start_work(&ctx, in_filename, out_filename, multi, window_bits, level); + } + } + if (ret < 0) { + printf("kzip fuzz failed ! seed : %u, %s, file : %s\n", RDGseed, compress == 1 ? "compress" : "decompress", in_filename); + return ret; + } + if (!compress) { // 解压后比较两个文件内容是否一致 + ret = compare_files("uncomp_data", "decomp_data"); // 返回值1时表示文件不同,返回值0时表示文件相同 + if (ret != 0) { + printf("Error: find difference between uncomp_data and decomp_data! \n"); + compress_ctx_destory(&ctx); + return ret; + } + } + compress_ctx_destory(&ctx); + } + printf("kzip fuzz test done. all test succeess.\n"); + return ret; +} + +static void usage(void) +{ + printf("usage: \n"); + printf(" -A: set algorithm type(kaelz4|kaelz4_frame|kaelz4async_block|kaelz4async_frame). default is kaelz4\n"); + printf(" -d: compress or decompress\n"); + printf(" -m: multi process. default is 2. use fork() to start multi process\n"); + printf(" -t: thread num. default is 1. if thread num > 1, use pthread_create for multi compression. \n"); + printf(" -i: inflight num for calling async compression at same time. default 16\n"); + printf(" -f: input filename(-l useless if this work)\n"); + printf(" -g: show delay data in compression results or not. default: 0\n"); + printf(" -o: output filename\n"); + printf(" -n: loop times\n"); + printf(" -s: input file split chunk size(KB)\n"); + printf(" -P: use Huge Pages to save uncompress data \n"); + printf(" -p: use polling mode to wait for async operation done\n"); + printf(" -r: take crc32 checksum when data is callback.default: 0 \n"); + printf(" -e: use how many sessions to test compression at same time.default: 1 \n"); + printf(" example: ./kzip -A kaelz4 -m 2 -f ./kzip -o ./kzip.compressd -n 1000\n"); + printf(" ./kzip -A kaelz4 -d -m 2 -f ./kzip.compressd -o ./kzip.origin -n 1000\n"); +} +int main(int argc, char **argv) +{ + // 初始化所有算法 + initialize_algorithms(); + init_env_config(); + + const char *optstring = "dm:l:n:w:f:o:v:A:hg:s:c:i:t:r:P:p:e:"; + int ret = 0; + int o = 0; + int multi = 1; + int level = 6; + uLong chunk_len = 1024; + int loop_times = 1000; + int compress = 1; + int window_bits = 15; + char input_filename[512] = {0}; + char output_filename[512] = {0}; + char algorithm_name[25] = "kaelz4"; + char cpuConfigStr[20] = "4-40-1-4"; // 默认的920B配置:未开超线程,使用全部的4个加速器。 + int inflight_num = 256; + int threadNum = 1; + int is_test_crc = 0; // 是否每次都带上crc32校验值 + int sess_nums = 1; // 本次压缩任务创建的sess数量。默认1个session。大于1时需要使用数组存储创建的sessions,所有任务按策略使用。 + + while ((o = getopt(argc, argv, optstring)) != -1) { + if(optstring == NULL) continue; + switch (o) { + case 'A': + strcpy(algorithm_name, optarg); + break; + case 'c': + strcpy(cpuConfigStr, optarg); + break; + case 'd': + compress = 0; + break; + case 'e': + sess_nums = atoi(optarg); + break; + case 'f': + strcpy(input_filename, optarg); + break; + case 'g': + g_log_level = atoi(optarg); + break; + case 'i': + inflight_num = atoi(optarg); + if (inflight_num > 1024) + inflight_num = 1024; + break; + case 'l': + chunk_len = atoi(optarg); + break; + case 'm': + multi = atoi(optarg); + break; + case 'n': + loop_times = atoi(optarg); + break; + case 'o': + strcpy(output_filename, optarg); + break; + case 'P': + g_enable_huge_pages = 1; + break; + case 'p': + g_enable_polling_mode = atoi(optarg); + break; + case 'r': + is_test_crc = atoi(optarg); + break; + case 's': + g_file_chunk_size = atoi(optarg); + break; + case 't': + threadNum = atoi(optarg); + break; + case 'v': + level = atoi(optarg); + break; + case 'w': + window_bits = atoi(optarg); + break; + case 'h': + usage(); + return 0; + } + } + + ret = vaild_algorithm(algorithm_name); + if(ret != 0) { return ret; } + + if (argc <= 1) { + usage(); + printf("\ndefault input parameter used\n"); + } + format_cpu_env(cpuConfigStr); + + const char* in_filename = input_filename[0] == 0 ? NULL : input_filename; + const char* out_filename = output_filename[0]== 0 ? NULL : output_filename; + chunk_len *= 1024; + + printf("kzip perf parameter: algorithm: %s, multi process %d, threadNum %d, stream length: %lu(KB), loop times: %d, window_bits : %d, level : %d, chunk: %d\n", + algorithm_name, multi, threadNum, chunk_len/1024, loop_times, window_bits, level, g_file_chunk_size); + + // 获取用户指定的算法 + compression_algorithm_t *algorithm = get_algorithm(algorithm_name); + + if (!algorithm) { + printf("Error: Algorithm %s not found.\n", algorithm_name); + return -1; + } + + struct compress_ctx *ctx = malloc(sizeof(struct compress_ctx)); + + compress_ctx_init(ctx, compress, inflight_num, chunk_len, algorithm, is_test_crc, sess_nums); + ctx->loop_times = loop_times; + + if (!ctx->compress_or_decompress && threadNum == 1) { // 如果是分片解压,单独处理 + ret = start_work_decompress(ctx, in_filename, out_filename, multi, window_bits, level); + } else { + if (threadNum > 1) { + pthread_t threads[threadNum]; + int j; + for (j = 0; j < threadNum; j++) { + struct thread_compress_args *args = malloc(sizeof(struct thread_compress_args)); + + compress_ctx_init(&args->ctx, compress, inflight_num, chunk_len, algorithm, is_test_crc, sess_nums); + args->ctx.thread_id = j; + args->ctx.loop_times = loop_times; + args->in_filename = in_filename; + args->out_filename = out_filename; + args->multi = multi; + args->window_bits = window_bits; + args->level = level; + if (pthread_create(&threads[j], NULL, start_work_thread, args) != 0) { + perror("pthread_create failed"); + exit(EXIT_FAILURE); + } + } + for (j = 0; j < threadNum; j++) { + pthread_join(threads[j], NULL); + } + } else { + ret = start_work(ctx, in_filename, out_filename, multi, window_bits, level); + } + } + + if (ctx->algorithm->cleanup) + ctx->algorithm->cleanup(ctx); + + compress_ctx_destory(ctx); + free(ctx); + return ret; +} diff --git a/scripts/perftest/kzip/include/compress_ctx.h b/scripts/perftest/kzip/include/compress_ctx.h new file mode 100644 index 0000000..af64c6c --- /dev/null +++ b/scripts/perftest/kzip/include/compress_ctx.h @@ -0,0 +1,155 @@ +#ifndef _COMPRESS_CTX_H +#define _COMPRESS_CTX_H + +#include +#include +#include "manage.h" + +struct fragment_metadata { + unsigned int offset; // 分片的起始偏移量 + unsigned int len; // 分片的长度 + size_t src_chunk_len; +}; + +struct compress_out_buf { + void *buf_addr; + unsigned int len; + unsigned int sn; + struct compress_out_buf *next; + void *src; + unsigned int src_len; + uint32_t ibuf_crc; + uint32_t obuf_crc; +}; + +struct compress_ctx; + +#ifdef CONFIG_KAELZ4 +#include +typedef struct { + struct kaelz4_result result; + struct kaelz4_buffer_list src; + struct kaelz4_buffer_list dst; // 一般的传给硬件的输出指针 + struct kaelz4_buffer_list tuple; // 特殊的lz77_only模式传给硬件的输出指针 + struct kaelz4_buffer_list *dst_buf_list; // 真正的传给硬件压缩的dst指针。 + struct kaelz4_buffer src_buf[1024]; + struct kaelz4_buffer dst_buf[1024]; + struct kaelz4_buffer tuple_buf[1024]; +} kaelz4_param; +#endif + +#ifdef CONFIG_KAEZLIB +#include +typedef struct { + struct kaezip_result result; + struct kaezip_buffer_list src; + struct kaezip_buffer_list dst; + struct kaezip_buffer_list tuple; + struct kaezip_buffer_list *dst_buf_list; + struct kaezip_buffer src_buf[1024]; + struct kaezip_buffer dst_buf[1024]; + struct kaezip_buffer tuple_buf[1024]; +} kaezip_param; +#endif + + +struct __attribute__((aligned(64))) compress_param { + struct compress_ctx *ctx; + uint32_t ibuf_crc; + uint32_t obuf_crc; + unsigned int sn; + unsigned int loop_index; + unsigned int src_len; // 单次压缩任务的输入长度 + void *src_buf; // 单次压缩任务起始内存地址 + size_t src_buf_offset; // 单次压缩任务起始内存地址偏移 + unsigned int dst_len; // 单次压缩任务的输出长度 + void *dst_buf; + + union { + +#ifdef CONFIG_KAELZ4 + kaelz4_param kaelz4_param; +#endif +#ifdef CONFIG_KAEZLIB + kaezip_param kaezip_param; +#endif + }; + + uint64_t start_time; + volatile unsigned int done; +}; + +struct compress_session { + void *kae_sess; +}; + +struct compress_ctx { + struct compress_param param_buf[1024]; + compression_algorithm_t *algorithm; + unsigned int param_index; + unsigned int loop_times; + unsigned int inflight_num; + unsigned int loop_index; + unsigned int sn; + volatile unsigned int finish_num; + void *src_buf; + unsigned long src_len; + unsigned long out_total_len; + unsigned int chunk_len; + int compress_or_decompress; + struct compress_out_buf *out_buf_list; + struct compress_out_buf *out_buf_tail; + void *page_info; + size_t total_size; + size_t meta_size; + void *tuple_page_info; + void *tuple_buf; + size_t tuple_buf_offset; + size_t tuple_buf_len; + int thread_id; + int with_crc; + unsigned int src_buf_num; + struct compress_session sess; + struct compress_session *sess_array; // sess指针数组 + int sess_count; // sess指针数量,默认1 + + uint64_t *all_delays; + int is_polling; + int is_lz77_mode; + int is_zlib; + int use_tuple_buf; + int fork_id; +}; + +struct thread_compress_args { + struct compress_ctx ctx; + const char *in_filename; + const char *out_filename; + int multi; + int window_bits; + int level; +}; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#if defined(__AARCH64_CMODEL_SMALL__) && __AARCH64_CMODEL_SMALL__ +#define dsb(opt) \ + { \ + asm volatile("dsb " #opt : : : "memory"); \ + } +#define rmb() dsb(ld) /* read fence */ +#define wmb() dsb(st) /* write fence */ +#define mb() dsb(sy) /* rw fence */ +#else +#define rmb() __sync_synchronize() /* read fence */ +#define wmb() __sync_synchronize() /* write fence */ +#define mb() __sync_synchronize() /* rw fence */ +#endif + +void *get_physical_address_wrapper(void *usr, void *vaddr, size_t sz); +void release_huge_pages(void *addr, size_t total_size); +void *get_huge_pages(size_t total_size); +struct cache_page_map* init_cache_page_map(void *base_vaddr, size_t total_size); +#endif diff --git a/scripts/perftest/kzip/include/delayRecord.h b/scripts/perftest/kzip/include/delayRecord.h new file mode 100644 index 0000000..a2c11d5 --- /dev/null +++ b/scripts/perftest/kzip/include/delayRecord.h @@ -0,0 +1,17 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * @Description: Recording the statistical delay data + * @Author: Ma Xiaofeng + * @Date: 2025-3-31 + * @LastEditTime: 2025-3-31 + */ +#ifndef DELAY_RECORD_H +#define DELAY_RECORD_H + +#define MAX_LATENCY_COUNT 100000000 +void record_latency(uint64_t *all_delays, uint64_t latency, size_t sn); +void get_percent_latencies(uint64_t *all_delays, double *out_latencies, const int *percentiles, int count, size_t sn); +double get_average_latency(uint64_t *all_delays, size_t cnt); +uint64_t get_ns(void); + +#endif diff --git a/scripts/perftest/kzip/include/platform.h b/scripts/perftest/kzip/include/platform.h new file mode 100644 index 0000000..43a171b --- /dev/null +++ b/scripts/perftest/kzip/include/platform.h @@ -0,0 +1,155 @@ +/* + platform.h - compiler and OS detection + Copyright (C) 2016-2020, Przemyslaw Skibinski, Yann Collet + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef PLATFORM_H_MODULE +#define PLATFORM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/* ************************************** +* Compiler Options +****************************************/ +#if defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */ +# if (_MSC_VER <= 1800) /* (1800 = Visual Studio 2013) */ +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before and */ +# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */ +# endif +#endif + + +/* ************************************** +* Detect 64-bit OS +* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros +****************************************/ +#if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \ + || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \ + || (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__ /* SPARC 64-bit */ \ + || defined __x86_64__s || defined _M_X64 /* x86 64-bit */ \ + || defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__ /* ARM 64-bit */ \ + || (defined __mips && (__mips == 64 || __mips == 4 || __mips == 3)) /* MIPS 64-bit */ \ + || defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */ \ + || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */ +# if !defined(__64BIT__) +# define __64BIT__ 1 +# endif +#endif + + +/* ********************************************************* +* Turn on Large Files support (>4GB) for 32-bit Linux/Unix +***********************************************************/ +#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ +# endif +# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ +# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ +# endif +# if defined(_AIX) || defined(__hpux) +# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ +# endif +#endif + + +/* ************************************************************ +* Detect POSIX version +* PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows +* PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX +* PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION +************************************************************** */ +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ + || defined(__midipix__) || defined(__VMS)) +# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MidnightBSD__) /* BSD distros */ \ + || defined(__HAIKU__) +# define PLATFORM_POSIX_VERSION 200112L +# else +# if defined(__linux__) || defined(__linux) +# ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200809L /* use feature test macro */ +# endif +# endif +# include /* declares _POSIX_VERSION */ +# if defined(_POSIX_VERSION) /* POSIX compliant */ +# define PLATFORM_POSIX_VERSION _POSIX_VERSION +# else +# define PLATFORM_POSIX_VERSION 0 +# endif +# endif +#endif +#if !defined(PLATFORM_POSIX_VERSION) +# define PLATFORM_POSIX_VERSION -1 +#endif + + +/*-********************************************* +* Detect if isatty() and fileno() are available +*********************************************** */ +#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__) +# include /* isatty */ +# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__) +# include /* _isatty */ +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#elif defined(WIN32) || defined(_WIN32) +# include /* _isatty */ +# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include /* FILE */ +static __inline int IS_CONSOLE(FILE* stdStream) +{ + DWORD dummy; + return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy); +} +#else +# define IS_CONSOLE(stdStream) 0 +#endif + + +/****************************** +* OS-specific Includes +***************************** */ +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) +# include /* _O_BINARY */ +# include /* _setmode, _fileno, _get_osfhandle */ +# if !defined(__DJGPP__) +# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include /* FSCTL_SET_SPARSE */ +# define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; } +# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } +# else +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +# define SET_SPARSE_FILE_MODE(file) +# endif +#else +# define SET_BINARY_MODE(file) +# define SET_SPARSE_FILE_MODE(file) +#endif + + + +#if defined (__cplusplus) +} +#endif + +#endif /* PLATFORM_H_MODULE */ diff --git a/scripts/perftest/kzip/include/util.h b/scripts/perftest/kzip/include/util.h new file mode 100644 index 0000000..3192ddc --- /dev/null +++ b/scripts/perftest/kzip/include/util.h @@ -0,0 +1,697 @@ +/* + util.h - utility functions + Copyright (C) 2016-2020, Przemyslaw Skibinski, Yann Collet + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef UTIL_H_MODULE +#define UTIL_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/*-**************************************** +* Dependencies +******************************************/ +#include "platform.h" /* PLATFORM_POSIX_VERSION */ +#include /* size_t, ptrdiff_t */ +#include /* malloc */ +#include /* strlen, strncpy */ +#include /* fprintf, fileno */ +#include +#include /* stat, utime */ +#include /* stat */ +#if defined(_WIN32) +# include /* utime */ +# include /* _chmod */ +#else +# include /* chown, stat */ +# if PLATFORM_POSIX_VERSION < 200809L +# include /* utime */ +# else +# include /* AT_FDCWD */ +# include /* for utimensat */ +# endif +#endif +#include /* time */ +#include /* INT_MAX */ +#include + + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/* ************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +# define UTIL_fseek _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define UTIL_fseek fseeko +#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) +# define UTIL_fseek fseeko64 +#else +# define UTIL_fseek fseek +#endif + + +/*-**************************************** +* Sleep functions: Windows - Posix - others +******************************************/ +#if defined(_WIN32) +# include +# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) +# define UTIL_sleep(s) Sleep(1000*s) +# define UTIL_sleepMilli(milli) Sleep(milli) +#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */ +# include +# include /* setpriority */ +# include /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */ +# if defined(PRIO_PROCESS) +# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# else +# define SET_REALTIME_PRIORITY /* disabled */ +# endif +# define UTIL_sleep(s) sleep(s) +# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L) /* nanosleep requires POSIX.1-2001 */ +# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); } +# else +# define UTIL_sleepMilli(milli) /* disabled */ +# endif +#else +# define SET_REALTIME_PRIORITY /* disabled */ +# define UTIL_sleep(s) /* disabled */ +# define UTIL_sleepMilli(milli) /* disabled */ +#endif + + +/*-**************************************** +* stat() functions +******************************************/ +#if defined(_MSC_VER) +# define UTIL_TYPE_stat __stat64 +# define UTIL_stat _stat64 +# define UTIL_fstat _fstat64 +# define UTIL_STAT_MODE_ISREG(st_mode) ((st_mode) & S_IFREG) +#elif defined(__MINGW32__) && defined (__MSVCRT__) +# define UTIL_TYPE_stat _stati64 +# define UTIL_stat _stati64 +# define UTIL_fstat _fstati64 +# define UTIL_STAT_MODE_ISREG(st_mode) ((st_mode) & S_IFREG) +#else +# define UTIL_TYPE_stat stat +# define UTIL_stat stat +# define UTIL_fstat fstat +# define UTIL_STAT_MODE_ISREG(st_mode) (S_ISREG(st_mode)) +#endif + + +/*-**************************************** +* fileno() function +******************************************/ +#if defined(_MSC_VER) +# define UTIL_fileno _fileno +#else +# define UTIL_fileno fileno +#endif + +/* ************************************* +* Constants +***************************************/ +#define LIST_SIZE_INCREASE (8*1024) + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(__INTEL_COMPILER) +# pragma warning(disable : 177) /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */ +#endif +#if defined(__GNUC__) +# define UTIL_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define UTIL_STATIC static inline +#elif defined(_MSC_VER) +# define UTIL_STATIC static __inline +#else +# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + + +/*-**************************************** +* Allocation functions +******************************************/ +/* + * A modified version of realloc(). + * If UTIL_realloc() fails the original block is freed. +*/ +UTIL_STATIC void* UTIL_realloc(void* ptr, size_t size) +{ + void* const newptr = realloc(ptr, size); + if (newptr) return newptr; + free(ptr); + return NULL; +} + + +/*-**************************************** +* String functions +******************************************/ +/* + * A modified version of realloc(). + * If UTIL_realloc() fails the original block is freed. +*/ +UTIL_STATIC int UTIL_sameString(const char* a, const char* b) +{ + assert(a!=NULL && b!=NULL); /* unsupported scenario */ + if (a==NULL) return 0; + if (b==NULL) return 0; + return !strcmp(a,b); +} + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + + typedef LARGE_INTEGER UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } + +#elif defined(__APPLE__) && defined(__MACH__) + + #include + typedef U64 UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom)) / 1000ULL; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom); + } + +#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || (defined(__GLIBC__) && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2) ) ) + + #include + typedef struct timespec UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) + { + UTIL_time_t now; + if (clock_gettime(CLOCK_MONOTONIC, &now)) + fprintf(stderr, "ERROR: Failed to get time\n"); /* we could also exit() */ + return now; + } + UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; + } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; + } + +#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */ + + typedef clock_t UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +#endif + + +/* returns time span in microseconds */ +UTIL_STATIC U64 UTIL_clockSpanMicro(UTIL_time_t clockStart) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + +/* returns time span in nanoseconds */ +UTIL_STATIC U64 UTIL_clockSpanNano(UTIL_time_t clockStart) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeNano(clockStart, clockEnd); +} + +UTIL_STATIC void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} + + + +/*-**************************************** +* File functions +******************************************/ +#if defined(_MSC_VER) + #define chmod _chmod + typedef struct __stat64 stat_t; +#else + typedef struct stat stat_t; +#endif + + +UTIL_STATIC int UTIL_isRegFile(const char* infilename); +UTIL_STATIC int UTIL_isRegFD(int fd); + + +UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf) +{ + int res = 0; + + if (!UTIL_isRegFile(filename)) + return -1; + + { +#if defined(_WIN32) || (PLATFORM_POSIX_VERSION < 200809L) + struct utimbuf timebuf; + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + res += utime(filename, &timebuf); /* set access and modification times */ +#else + struct timespec timebuf[2]; + memset(timebuf, 0, sizeof(timebuf)); + timebuf[0].tv_nsec = UTIME_NOW; + timebuf[1].tv_sec = statbuf->st_mtime; + res += utimensat(AT_FDCWD, filename, timebuf, 0); /* set access and modification times */ +#endif + } + +#if !defined(_WIN32) + res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ +#endif + + res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ + + errno = 0; + return -res; /* number of errors is returned */ +} + + +UTIL_STATIC int UTIL_getFDStat(int fd, stat_t *statbuf) +{ + int r; +#if defined(_MSC_VER) + r = _fstat64(fd, statbuf); + if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */ +#else + r = fstat(fd, statbuf); + if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */ +#endif + return 1; +} + +UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf) +{ + int r; +#if defined(_MSC_VER) + r = _stat64(infilename, statbuf); + if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */ +#else + r = stat(infilename, statbuf); + if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */ +#endif + return 1; +} + + +UTIL_STATIC int UTIL_isRegFD(int fd) +{ + stat_t statbuf; +#ifdef _WIN32 + /* Windows runtime library always open file descriptors 0, 1 and 2 in text mode, therefore we can't use them for binary I/O */ + if(fd < 3) return 0; +#endif + return UTIL_getFDStat(fd, &statbuf); /* Only need to know whether it is a regular file */ +} + + +UTIL_STATIC int UTIL_isRegFile(const char* infilename) +{ + stat_t statbuf; + return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */ +} + + +UTIL_STATIC U32 UTIL_isDirectory(const char* infilename) +{ + int r; + stat_t statbuf; +#if defined(_MSC_VER) + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +UTIL_STATIC U64 UTIL_getOpenFileSize(FILE* file) +{ + int r; + int fd; + struct UTIL_TYPE_stat statbuf; + + fd = UTIL_fileno(file); + if (fd < 0) { + perror("fileno"); + exit(1); + } + r = UTIL_fstat(fd, &statbuf); + if (r || !UTIL_STAT_MODE_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (U64)statbuf.st_size; +} + + +UTIL_STATIC U64 UTIL_getFileSize(const char* infilename) +{ + int r; + struct UTIL_TYPE_stat statbuf; + + r = UTIL_stat(infilename, &statbuf); + if (r || !UTIL_STAT_MODE_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (U64)statbuf.st_size; +} + + +UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) +{ + U64 total = 0; + unsigned n; + for (n=0; n= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + } while (FindNextFileA(hFile, &cFile)); + + FindClose(hFile); + assert(nbFiles < INT_MAX); + return (int)nbFiles; +} + +#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ +# define UTIL_HAS_CREATEFILELIST +# include /* opendir, readdir */ +# include /* strerror, memcpy */ + +UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd) +{ + DIR* dir; + struct dirent * entry; + size_t dirLength; + int nbFiles = 0; + + if (!(dir = opendir(dirName))) { + fprintf(stderr, "Cannot open directory '%s': %s\n", dirName, strerror(errno)); + return 0; + } + + dirLength = strlen(dirName); + errno = 0; + while ((entry = readdir(dir)) != NULL) { + char* path; + size_t fnameLength, pathLength; + if (strcmp (entry->d_name, "..") == 0 || + strcmp (entry->d_name, ".") == 0) continue; + fnameLength = strlen(entry->d_name); + path = (char*)malloc(dirLength + fnameLength + 2); + if (!path) { closedir(dir); return 0; } + memcpy(path, dirName, dirLength); + path[dirLength] = '/'; + memcpy(path+dirLength+1, entry->d_name, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + + if (UTIL_isDirectory(path)) { + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd); /* Recursively call "UTIL_prepareFileList" with the new path. */ + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } else { + if (*bufStart + *pos + pathLength >= *bufEnd) { + size_t const newListSize = (size_t)(*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */ + } + + if (errno != 0) { + fprintf(stderr, "readdir(%s) error: %s\n", dirName, strerror(errno)); + free(*bufStart); + *bufStart = NULL; + } + closedir(dir); + return nbFiles; +} + +#else + +UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd) +{ + (void)bufStart; (void)bufEnd; (void)pos; + fprintf(stderr, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName); + return 0; +} + +#endif /* #ifdef _WIN32 */ + +/* + * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, + * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). + * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer) + * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called. + */ +UTIL_STATIC const char** +UTIL_createFileList(const char** inputNames, unsigned inputNamesNb, + char** allocatedBuffer, unsigned* allocatedNamesNb) +{ + size_t pos; + unsigned i, nbFiles; + char* buf = (char*)malloc(LIST_SIZE_INCREASE); + size_t bufSize = LIST_SIZE_INCREASE; + const char** fileTable; + + if (!buf) return NULL; + + for (i=0, pos=0, nbFiles=0; i= bufSize) { + while (pos + len >= bufSize) bufSize += LIST_SIZE_INCREASE; + buf = (char*)UTIL_realloc(buf, bufSize); + if (!buf) return NULL; + } + assert(pos + len < bufSize); + memcpy(buf + pos, inputNames[i], len); + pos += len; + nbFiles++; + } else { + char* bufend = buf + bufSize; + nbFiles += (unsigned)UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend); + if (buf == NULL) return NULL; + assert(bufend > buf); + bufSize = (size_t)(bufend - buf); + } } + + if (nbFiles == 0) { free(buf); return NULL; } + + fileTable = (const char**)malloc(((size_t)nbFiles+1) * sizeof(const char*)); + if (!fileTable) { free(buf); return NULL; } + + for (i=0, pos=0; i bufSize) { + free(buf); + free((void*)fileTable); + return NULL; + } /* can this happen ? */ + + *allocatedBuffer = buf; + *allocatedNamesNb = nbFiles; + + return fileTable; +} + + +UTIL_STATIC void +UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer) +{ + free(allocatedBuffer); + free((void*)filenameTable); +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* UTIL_H_MODULE */ diff --git a/scripts/perftest/kzip/kzelz4.cnf b/scripts/perftest/kzip/kzelz4.cnf new file mode 100644 index 0000000..2e8a22e --- /dev/null +++ b/scripts/perftest/kzip/kzelz4.cnf @@ -0,0 +1,2 @@ +[LogSection] +debug_level=debug diff --git a/scripts/perftest/kzip/scripts/parse_perf_log.py b/scripts/perftest/kzip/scripts/parse_perf_log.py new file mode 100644 index 0000000..0bfba2c --- /dev/null +++ b/scripts/perftest/kzip/scripts/parse_perf_log.py @@ -0,0 +1,171 @@ +import re +import pandas as pd +import sys +from openpyxl import load_workbook +from openpyxl.styles import Font, Border, Side, Alignment, PatternFill +from openpyxl.utils import get_column_letter + +def parse_filename(filename): + """从文件名提取blocksize和inflight参数""" + blocksize_match = re.search(r'blocksize-(\d+)', filename) + inflight_match = re.search(r'inflight-(\d+)', filename) + + blocksize = blocksize_match.group(1) if blocksize_match else 'N/A' + inflight = inflight_match.group(1) if inflight_match else 'N/A' + + return int(blocksize), int(inflight) + +def extract_log_data(log_text, filename): + blocksize, inflight = parse_filename(filename) + + pattern = r"console\.log\('kae-threads:(\d+).*?multi:1,file:.*?/([^/]*?\.tar(\.compressed)?).*?inflightNum:\s*(\d+).*?alg:([^ ]+).*?\);t\(\'(.*?)\)" + matches = re.findall(pattern, log_text) + + data_dict = {} + + for match in matches: + kae_threads, full_filename, compressed_mark, inflight_num, alg, data_part = match + file_key = "decompress" if compressed_mark else "compress" + + parts = data_part.split() + + # 解析总用时(移除's'并转换为浮点数) + time_used = float(parts[7].replace('s', '')) + + # 过滤总用时小于1秒的数据 + if time_used < 1: + continue + + if file_key not in data_dict: + data_dict[file_key] = [] + + row = [ + blocksize, # 新增:来自文件名的blocksize + inflight, # 新增:来自文件名的inflight + alg, + int(kae_threads), + parts[0] + "kb", + int(inflight_num), + float(parts[2]), + parts[3], + time_used, # 已经处理过的总用时 + float(parts[8].replace('us', '')), + float(parts[9].replace('us', '')), + float(parts[10].replace('us', '')), + float(parts[11].replace('us', '')), + float(parts[12].replace('us', '')), + float(parts[13].replace('us', '')), + float(parts[14].replace('us', '')), + filename + ] + data_dict[file_key].append(row) + if parts[0] == "16" or parts[0] == "64": + # 现在有15列了(原13列+新增的2列) + data_dict[file_key].append([None]*16) + + return data_dict + +def ensure_xlsx_suffix(filename): + if not filename.lower().endswith('.xlsx'): + return filename + '.xlsx' + return filename + +def style_excel(output_file): + wb = load_workbook(output_file) + + # 定义样式 + header_font = Font(bold=True, color="FFFFFF") + header_fill = PatternFill("solid", fgColor="4F81BD") + border = Border(left=Side(style='thin'), + right=Side(style='thin'), + top=Side(style='thin'), + bottom=Side(style='thin')) + alignment = Alignment(horizontal='center', vertical='center') + even_fill = PatternFill("solid", fgColor="DCE6F1") + odd_fill = PatternFill("solid", fgColor="FFFFFF") + + for sheet in wb.sheetnames: + ws = wb[sheet] + + # 设置列宽(新增两列后调整) + col_widths = [12, 12, 12, 15, 10, 12, 10, 12, 10, 12, 12, 12, 10, 10, 10] + for i, width in enumerate(col_widths, 1): + ws.column_dimensions[get_column_letter(i)].width = width + + # 应用样式 + for row in ws.iter_rows(): + for cell in row: + cell.border = border + cell.alignment = alignment + + if cell.row == 1: + cell.font = header_font + cell.fill = header_fill + else: + cell.fill = even_fill if cell.row % 2 == 0 else odd_fill + + ws.freeze_panes = "A2" + ws.auto_filter.ref = ws.dimensions + + wb.save(output_file) + +def create_excel(data_dict, output_file): + output_file = ensure_xlsx_suffix(output_file) + + with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + for file_type, data in data_dict.items(): + columns = [ + "后台blocksize", # 新增列 + "后台inflight", # 新增列 + "算法(alg)", + "压缩等级", + "包长大小", + "inflightNum", + "压缩率", + "带宽", + "总用时(s)", # 单位改为秒 + "平均时延(μs)", + "最大时延(μs)", + "最小时延(μs)", + "P50(μs)", + "P90(μs)", + "P99(μs)", + "P999(μs)", + "原始文件" + ] + sheet_name = f"{file_type}_数据"[:31] + + df = pd.DataFrame(data, columns=columns) + df.to_excel(writer, sheet_name=sheet_name, index=False) + + style_excel(output_file) + print(f"成功生成美化Excel文件:{output_file}") + +def main(): + if len(sys.argv) < 2: + print("错误:请指定日志文件名作为参数") + print("用法:python script.py <日志文件名> [输出文件名]") + sys.exit(1) + + log_file = sys.argv[1] + output_file = sys.argv[2] if len(sys.argv) > 2 else sys.argv[1] + + try: + with open(log_file, 'r', encoding='utf-8') as f: + log_text = f.read() + + # 提取并处理数据(传入文件名用于提取参数) + data_dict = extract_log_data(log_text, log_file) + + if not data_dict: + print("警告:未找到任何有效数据(或所有数据总用时均小于1秒)") + sys.exit(0) + + create_excel(data_dict, output_file) + + except Exception as e: + print(f"处理文件错误: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/perftest/kzip/scripts/parse_perf_log_by_path.py b/scripts/perftest/kzip/scripts/parse_perf_log_by_path.py new file mode 100644 index 0000000..32a7b50 --- /dev/null +++ b/scripts/perftest/kzip/scripts/parse_perf_log_by_path.py @@ -0,0 +1,56 @@ +import os +import sys + +from parse_perf_log import extract_log_data, create_excel + +def process_folder(folder_path, output_file, file_prefix): + # 初始化数据字典 + data_dict = {} + + # 遍历文件夹中的所有文件 + for filename in sorted(os.listdir(folder_path)): + file_path = os.path.join(folder_path, filename) + # print(f"遍历:{file_path}") + # 仅处理文件,不处理子目录,且文件名符合前缀 + if os.path.isfile(file_path) and filename.startswith(file_prefix): + try: + with open(file_path, 'r', encoding='utf-8') as f: + log_text = f.read() + except Exception as e: + print(f"读取文件 {filename} 错误: {e}") + continue + + # 提取并处理数据 + file_data = extract_log_data(log_text, file_path) + + # 将提取的数据添加到data_dict中 + for suffix, data in file_data.items(): + if suffix not in data_dict: + data_dict[suffix] = [] + data_dict[suffix].extend(data) + + # 生成Excel + create_excel(data_dict, output_file) + +def main(): + if len(sys.argv) < 2: + print("错误:请指定日志文件名作为参数") + print("用法:python script.py <日志文件名> [输出文件名]") + sys.exit(1) + + # 第一个参数:要处理的文件夹。 + log_file_path = sys.argv[1] + # 第二个参数:最终要输出的文件路径。 + output_file = sys.argv[2] if len(sys.argv) > 2 else 'output.xlsx' + # 指定日志文件的前缀,仅处理符合前缀的文件。 + file_prefix = "kzip-delay.log." + + if not os.path.isdir(log_file_path): + print("错误:指定的路径不是有效的文件夹") + return + + # 处理文件夹中的所有符合条件的文件 + process_folder(log_file_path, output_file, file_prefix) + +if __name__ == "__main__": + main() diff --git a/scripts/perftest/kzip/scripts/runAffinityFunc.sh b/scripts/perftest/kzip/scripts/runAffinityFunc.sh new file mode 100644 index 0000000..c69e02c --- /dev/null +++ b/scripts/perftest/kzip/scripts/runAffinityFunc.sh @@ -0,0 +1,79 @@ +export LD_LIBRARY_PATH=/usr/local/kaelz4/lib/:/usr/local/kaezstd/lib/:/usr/local/kaezip/lib/:$LD_LIBRARY_PATH +export KAE_LZ4_WINTYPE=8 +export KAE_LZ4_COMP_TYPE=8 +export KAE_LZ4_ASYNC_THREAD_NUM=6 + +echo "亲和性测试开始" + +buildParams="kae" +sh build.sh $buildParams + +Algthm=("kaelz4async_block" "kaelz4async_frame") +Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") +BlockSize=("4" "8" "60""128" "512" "1024") +Cpu_core_ranges=("0-20" "40-79" "0-79" "120-159") + +current_time=$(date +"%Y-%m-%d_%H-%M-%S") +LogFile=kaelz4-function.log.$current_time +testFilePath=../../../scripts/compressTestDataset + + +for da in "${Datasets[@]}"; do + for alg in "${Algthm[@]}"; do + for bs in "${BlockSize[@]}"; do + echo "Executing: $da $alg $bs testing" + testFile="$testFilePath/$da" + testFileComped="$testFile.compressed" + testFileOrigin="$testFile.origin" + rm -rf $testFileComped + rm -rf $testFileOrigin + rm -rf $testFileComped.meta + rm -rf $testFileOrigin.meta + for cpu_cores in "${Cpu_core_ranges[@]}"; do + echo "Binding CPU : $cpu_cores Testing" + taskset -c "$cpu_cores" ./kzip -A $alg -m 1 -f $testFile -n 30000 -s $bs -i 16 >> $LogFile & # 压缩测试 + disown # 丢弃后台进程在被kill时的bash报错 + sleep 0.5 + pid=$! + numa0=0 # 自适应亲和性测试记录KAE线程所处的numa + numa1=0 + for thread in $(ls /proc/$pid/task); do + thread_id=$(basename $thread) + current_core=$(cat "/proc/$pid/task/$thread_id/stat" | awk '{print $39}') + + start_core=$(echo "$cpu_cores" | cut -d'-' -f1) + end_core=$(echo "$cpu_cores" | cut -d'-' -f2) + if [[ "$current_core" -ge "$start_core" && "$core" -le "$end_core" ]]; then + echo "$thread running in $current_core, pass" + else + echo "Error: $thread set in $cpu_cores ,runing in $current_core" + echo "$alg, Dataset : $da, BlockSize : $bs" + exit 1 + fi + # 自适应亲和性测试。计算0-79绑核时,KAE线程落在2个numa上的数量 + if [[ "$cpu_cores" == "0-79" ]]; then + # 主进程不参与计算 + if [[ "$thread" -ne "$pid" && $current_core -ge 0 && $current_core -le 39 ]]; then + ((numa0++)) + elif [[ "$thread" -ne "$pid" && $current_core -ge 40 && $current_core -le 79 ]]; then + ((numa1++)) + fi + fi + done + if [[ "$cpu_cores" == "0-79" ]]; then + echo "SUCCESS: all threads running in $cpu_cores." + if [[ "$numa0" -eq "$numa1" ]]; then + echo "SUCCESS: KAE threads average test: $numa0 in numa0, $numa1 in numa1." + else + echo "Warning: KAE threads: $numa0 in numa0, $numa1 in numa1." + fi + else + echo "SUCCESS: all threads running in $cpu_cores" + fi + kill -9 $pid + done + done + done +done + +echo "亲和性测试结束" \ No newline at end of file diff --git a/scripts/perftest/kzip/scripts/runDelay.sh b/scripts/perftest/kzip/scripts/runDelay.sh new file mode 100644 index 0000000..39ed714 --- /dev/null +++ b/scripts/perftest/kzip/scripts/runDelay.sh @@ -0,0 +1,187 @@ +export LD_LIBRARY_PATH=/usr/local/kaelz4/lib/:/usr/local/kaezstd/lib/:/usr/local/kaezip/lib/:$LD_LIBRARY_PATH +export KAE_LZ4_WINTYPE=8 +export KAE_LZ4_COMP_TYPE=8 +export KAE_LZ4_ASYNC_THREAD_NUM=12 +export KAE_LZ4_ASYNC_DC_THREAD_NUM=10 +export PRINT_TABLE_DATA=1 +export KZIP_QAT_USE_DEV_NUM=1 # 测试单个QAT能力 + +buildParams="kae" +Algthm=() +ThreadNum=("12" "8" "7" "6" "5" "4" "3" "2" "1") + +if [[ -d "/usr/local/kaelz4" ]]; then + Algthm+=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77" "kaelz4async_lz77_frame") +fi +if [[ -d "/usr/local/kaezip" ]]; then + Algthm+=("kaezlib_deflate" "kaezlibasync_deflate") +fi + +sh build.sh $buildParams + + +Datasets=("calgary.tar" "silesia.tar") +inflateNum=("1" "4" "8" "16" "64") # 单IO 单核测试 +BlockSize=("4" "8" "16" "32" "64") +Multi=("1") + +# 后台进程的分片大小,由执行脚本时的第2个参数控制。仅在第1个参数为with_full_compress 或 with_full_uncompress 时有效 +BackProcessBlockSize=$2 +BackProcessInflight=$3 # 后台进程的inflight大小。仅在第1个参数为with_full_compress 或 with_full_uncompress 时有效 +BackProcessBlockSize=${BackProcessBlockSize:=0} +BackProcessInflight=${BackProcessInflight:=0} + +current_time=$(date +"%Y-%m-%d_%H-%M-%S") +LogFile=kzip-delay.log.$current_time.withBackProcess.blocksize-$BackProcessBlockSize.inflight-$BackProcessInflight +testFilePath=../../../scripts/compressTestDataset + +diffFile() { + local testFile=$1 + local testFileOrigin=$2 + if [[ ! -f "$testFile" ]]; then + echo "Error: 压缩异常!未成功压缩文件" + exit 1 + fi + if [[ ! -f "$testFileOrigin" ]]; then + echo "Error: 解压异常!未成功解压文件" + exit 1 + fi + diffRes=$(diff $testFile $testFileOrigin) + if [[ -n "$diffRes" ]] ; then + echo "Error: 解压后数据与原始数据比对不通过!!" + else + echo "Success: 测试通过 解压数据校验通过" + fi +} + + +is_testing_with_full_compress=0; # 是否测试压缩打满时的表现 +is_testing_with_full_uncompress=0; # 是否测试解压打满时的表现 +is_testing_multi_when_inflight16=0; + +case "$1" in + "with_full_compress") + is_testing_with_full_compress=1 + echo "开启压缩打满测试" + ;; + "with_full_uncompress") + is_testing_with_full_uncompress=1 + echo "开启解压打满测试" + ;; + "with_multi") + is_testing_multi_when_inflight16=1 + echo "开启inflight=16时的多并发测试" + ;; + *) + ;; +esac + + +check_and_start_backend_process(){ + # 背景压力:压缩或解压"打满"硬件带宽。保持压缩压力和解压压力大致为 7:3 + backfile="$testFilePath/itemdata" + backfileComped="$backfile.compressed" + backfileOrigin="$backfile.origin" + + if [[ "$is_testing_with_full_compress" == "1" ]]; then + backInflightNum=$BackProcessInflight + backBlockSize=$BackProcessBlockSize + + echo "后台压缩进程启动: 压缩压力:解压压力 = $backInflightNum : $inum. 后台压力的分片大小 $backBlockSize" >> $LogFile + echo "taskset -c 0-79 ./kzip -A $alg -m 1 -p1 -g1 -f $backfile -o $backfileComped -n 60000000 -s $backBlockSize -i $backInflightNum &" >> $LogFile + taskset -c 0-79 ./kzip -A $alg -m 1 -p1 -g1 -f $backfile -o $backfileComped -n 60000000 -s $backBlockSize -i $backInflightNum & + loop1=2 + sleep 1 # 让打满带宽的进程彻底跑起来 + fi + if [[ "$is_testing_with_full_uncompress" == "1" ]]; then + backInflightNum=$BackProcessInflight + backBlockSize=$BackProcessBlockSize + echo "后台解压进程启动 压缩压力:解压压力 = $inum : $backInflightNum. 后台压力的分片大小 $backBlockSize" >> $LogFile + echo "taskset -c 0-79 ./kzip -d -A $alg -m 1 -p1 -g1 -f $backfileComped -o $backfileOrigin -n 600000 -s $backBlockSize -i $backInflightNum &" >> $LogFile + # 准备解压数据 + taskset -c 0-79 ./kzip -A $alg -m 1 -p1 -g1 -f $backfile -o $backfileComped -n 1 -s $backBlockSize -i $backInflightNum 1>/dev/null + taskset -c 0-79 ./kzip -d -A $alg -m 1 -p1 -g1 -f $backfileComped -o $backfileOrigin -n 600000 -s $backBlockSize -i $backInflightNum & + loop2=2 + sleep 1 # 让打满带宽的进程彻底跑起来 + fi +} +check_and_stop_backend_process(){ + sleep 1 + if [[ "$is_testing_with_full_compress" == "1" || "$is_testing_with_full_uncompress" == "1" ]]; then + killall -9 kzip # 杀掉可能的后台压力进程 + sleep 3 + fi +} + +compute_confortable_looptimes(){ + loop=300 # silesia.tar 压缩循环 300;10并发解压循环3000、inflight1的时候600 + if [[ "$da" == "calgary.tar" ]]; then + loop=21000 # calgary.tar 循环 22000 次 + if [[ "$inum" == "1" ]]; then + loop=7200 # 降低单IO测试时间 + fi + fi + + if [[ "$is_testing_with_full_compress" == "1" || "$is_testing_with_full_uncompress" == "1" ]]; then + loop=$((loop / 3)) # 背后有满带宽进程,QAT下速率降低,减少测试时间。 KAE压缩解压互不影响 + fi + loop1=$loop + loop2=$loop + + if [[ "$is_testing_with_full_compress" == "1" ]]; then + loop1=2 # 压缩数据不准,尽快结束 + fi + if [[ "$is_testing_with_full_uncompress" == "1" ]]; then + loop2=2 # 解压数据不准尽快结束 + fi +} + +format_multi_arr(){ + # 追加测试 inflate为16时,多进程并发的表现 + if [[ "$is_testing_multi_when_inflight16" == "1" ]]; then + Multi=("1") + if [[ "$inum" == "16" ]]; then + Multi=("1" "2" "3" "4") + fi + fi +} + +for kaenum in "${ThreadNum[@]}"; do + export KAE_LZ4_ASYNC_THREAD_NUM=$kaenum + + for alg in "${Algthm[@]}"; do + for da in "${Datasets[@]}"; do + for inum in "${inflateNum[@]}"; do + format_multi_arr + for m in "${Multi[@]}"; do + for bs in "${BlockSize[@]}"; do + loop1=100 + loop2=100 + compute_confortable_looptimes + + echo "Executing: comp-level or kae-threads:$kaenum $da $alg $bs kb chunk. multi:$m, inflateNum: $inum testing" + testFile="$testFilePath/$da" + testFileComped="$testFile.compressed" + testFileOrigin="$testFile.origin" + rm -rf $testFileComped + rm -rf $testFileOrigin + rm -rf $testFileComped.meta + rm -rf $testFileOrigin.meta + echo "Executing: comp-level or kae-threads:$kaenum $da $alg $bs kb chunk. multi:$m, inflateNum: $inum testing" >> $LogFile + date >> $LogFile + + check_and_start_backend_process + + taskset -c 0-79 ./kzip -A $alg -m $m -p1 -g1 -f $testFile -o $testFileComped -n $loop1 -s $bs -i $inum >> $LogFile # 压缩测试 + date >> $LogFile + taskset -c 0-79 ./kzip -d -A $alg -m $m -p1 -g1 -f $testFileComped -o $testFileOrigin -n $loop2 -s $bs -i $inum >> $LogFile # 压缩测试 + diffFile $testFile $testFileOrigin >> $LogFile + + check_and_stop_backend_process + done + done + done + done + done +done +echo "时延测试结束" \ No newline at end of file diff --git a/scripts/perftest/kzip/scripts/runError.sh b/scripts/perftest/kzip/scripts/runError.sh new file mode 100644 index 0000000..ca29d34 --- /dev/null +++ b/scripts/perftest/kzip/scripts/runError.sh @@ -0,0 +1,113 @@ +export LD_LIBRARY_PATH=/usr/local/kaelz4/lib/:/usr/local/kaezstd/lib/:/usr/local/kaezip/lib/:$LD_LIBRARY_PATH +export KAE_LZ4_WINTYPE=8 +export KAE_LZ4_COMP_TYPE=8 + +# 异常以及可靠性测试 +# 1、KAE硬件队列资源消耗完时,自动切软算。 +# 2、K异步接口在KAE资源不可用时,自动切软算。 + +uninstall_driver() +{ + rmmod hisi_zip +} +install_driver() +{ + modprobe hisi_zip perf_mode=1 uacce_mode=2 pf_q_num=256 +} + +set_driver_less_and_runout() +{ + rmmod hisi_zip + sleep 1 + modprobe hisi_zip uacce_mode=2 pf_q_num=2 + sleep 1 + taskset -c 300-312 ./kzip -A kaelz4 -m 8 -n 400000000 -s 4 1>/dev/null 2>/dev/null & +} +reset_driver() +{ + rmmod hisi_zip + modprobe hisi_zip perf_mode=1 uacce_mode=2 pf_q_num=256 +} + +do_compress_and_decompress_and_diff() +{ + buildParams="kae" + sh build.sh $buildParams + Algthm=("kaelz4async_block" "kaelz4async_frame") + Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") + Datasets=("calgary" "itemdata") + BlockSize=("4" "8" "16" "60" "64") + + current_time=$(date +"%Y-%m-%d_%H-%M-%S") + LogFile=kaelz4-function.log.$current_time + testFilePath=../../../scripts/compressTestDataset + + diffFile() { + local testFile=$1 + local testFileOrigin=$2 + if [[ ! -f "$testFile" ]]; then + echo "Error: 压缩异常!未成功压缩文件" + fi + if [[ ! -f "$testFileOrigin" ]]; then + echo "Error: 解压异常!未成功解压文件" + fi + diffRes=$(diff $testFile $testFileOrigin) + if [[ -n "$diffRes" ]] ; then + echo "Error: 解压后数据与原始数据比对不通过!!" + else + echo "Success: 测试通过 解压数据校验通过" + fi + } + + for da in "${Datasets[@]}"; do + for alg in "${Algthm[@]}"; do + for bs in "${BlockSize[@]}"; do + echo "Executing: $da $alg $bs kb testing" + testFile="$testFilePath/$da" + testFileComped="$testFile.compressed" + testFileOrigin="$testFile.origin" + rm -rf $testFileComped + rm -rf $testFileOrigin + rm -rf $testFileComped.meta + rm -rf $testFileOrigin.meta + ./kzip -A $alg -m 2 -f $testFile -o $testFileComped -n 2 -s $bs -i 256 >> $LogFile # 压缩测试 + ./kzip -d -A $alg -m 1 -f $testFileComped -o $testFileOrigin -n 2 -s $bs -i 256 >> $LogFile # 压缩测试 + diffFile $testFile $testFileOrigin + done + done + done +} + +test_func_ok_when_run_out_of_KAE() +{ + echo "start testing async lz4 compress when hardware queue is run out>>>>>>>>>>>>>>>" + set_driver_less_and_runout + sleep 2 + echo "hardware queue left 0. start tesing when data is less than 64k>>>>>>>>>>>>>>>" + do_compress_and_decompress_and_diff 2>/dev/null + echo "all testing done>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" + sleep 2 + killall kzip + reset_driver + echo "env is reset." +} +test_func_ok_when_unavaliable_of_KAE() { + echo "start testing async lz4 compress when hardware queue is unavaliable>>>>>>>>>>>>>" + uninstall_driver + echo "hardware queue no exists. start tesing when data is less than 64k>>>>>>>>>>>>>>>" + do_compress_and_decompress_and_diff 2>/dev/null + echo "all testing done>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" + install_driver + echo "env is reset." +} + +main() +{ + test_func_ok_when_run_out_of_KAE + + sleep 5 + + test_func_ok_when_unavaliable_of_KAE +} + +main diff --git a/scripts/perftest/kzip/scripts/runFunc.sh b/scripts/perftest/kzip/scripts/runFunc.sh new file mode 100644 index 0000000..3483c99 --- /dev/null +++ b/scripts/perftest/kzip/scripts/runFunc.sh @@ -0,0 +1,71 @@ +export LD_LIBRARY_PATH=/usr/local/kaelz4/lib/:/usr/local/kaezstd/lib/:/usr/local/kaezip/lib/:$LD_LIBRARY_PATH +export KAE_LZ4_WINTYPE=8 +export KAE_LZ4_COMP_TYPE=8 + +buildParams="kae" +Algthm=() +if [[ -d "/usr/local/kaelz4" ]]; then + Algthm+=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77" "kaelz4async_lz77_frame") +fi +if [[ -d "/usr/local/kaezip" ]]; then + Algthm+=("kaezlib_deflate" "kaezlibasync_deflate") +fi + +sh build.sh $buildParams + +Datasets=("calgary" "itemdata" "dickens" "mozilla" "mr" "nci" "ooffice" "osdb" "reymont" "samba" "sao" "webster" "xml" "x-ray") +Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") +BlockSize=("4" "8" "16" "64" "128" "2090" "8192" "10244" "0") +Polling=("1" "0") + + +current_time=$(date +"%Y-%m-%d_%H-%M-%S") +LogFile=kaelz4-function.log.$current_time +testFilePath=../../../scripts/compressTestDataset +passCnt=0 +failCnt=0 + +diffFile() { + local testFile=$1 + local testFileOrigin=$2 + if [[ ! -f "$testFile" ]]; then + echo "Error: 压缩异常!未成功压缩文件" + exit 1 + fi + if [[ ! -f "$testFileOrigin" ]]; then + echo "Error: 解压异常!未成功解压文件" + exit 1 + fi + diffRes=$(diff $testFile $testFileOrigin) + if [[ -n "$diffRes" ]] ; then + echo "Error: 解压后数据与原始数据比对不通过!!" + failCnt=`expr $failCnt + 1` + else + echo "Success: 测试通过 解压数据校验通过" + passCnt=`expr $passCnt + 1` + fi +} + +for da in "${Datasets[@]}"; do + for alg in "${Algthm[@]}"; do + for bs in "${BlockSize[@]}"; do + for polling in "${Polling[@]}"; do + echo "Executing: $da $alg $bs kb chunk polling mode: $polling testing" + testFile="$testFilePath/$da" + testFileComped="$testFile.compressed" + testFileOrigin="$testFile.origin" + rm -rf $testFileComped + rm -rf $testFileOrigin + rm -rf $testFileComped.meta + rm -rf $testFileOrigin.meta + ./kzip -A $alg -m 1 -f $testFile -o $testFileComped -n 2 -s $bs -i 256 -p $polling >> $LogFile # 压缩测试 + ./kzip -d -A $alg -m 1 -f $testFileComped -o $testFileOrigin -n 2 -s $bs -i 256 -p $polling >> $LogFile # 压缩测试 + diffFile $testFile $testFileOrigin + done + done + sleep 1 + done +done + +echo "test pass: $passCnt, failed: $failCnt." +echo "功能测试结束" \ No newline at end of file diff --git a/scripts/perftest/kzip/scripts/runLogfunc.sh b/scripts/perftest/kzip/scripts/runLogfunc.sh new file mode 100644 index 0000000..b910961 --- /dev/null +++ b/scripts/perftest/kzip/scripts/runLogfunc.sh @@ -0,0 +1,57 @@ +export LD_LIBRARY_PATH=/usr/local/kaelz4/lib/:/usr/local/kaezstd/lib/:/usr/local/kaezip/lib/:$LD_LIBRARY_PATH +export KAE_LZ4_WINTYPE=8 +export KAE_LZ4_COMP_TYPE=8 +export KAELZ4_CONF_ENV=/var/log/ + +buildParams="kae" +sh build.sh $buildParams + +Algthm=("kaelz4async_block" "kaelz4async_frame") +Datasets=("calgary" "itemdata" "dickens" "mozilla" "mr" "nci" "ooffice" "osdb" "reymont" "samba" "sao" "webster" "xml" "x-ray") +Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") +BlockSize=("4" "8" "16" "60" "64" "68" "128" "512" "1024" "2090" "10244") + +current_time=$(date +"%Y-%m-%d_%H-%M-%S") +LogFile=kaelz4-logfunction.log.$current_time +testFilePath=../../../scripts/compressTestDataset +search_content="do polling" # debug等级日志信息里,异步压缩回收硬件压缩结果标志 +FILE_PATH="/var/log/kaelz4.log" # 日志文件路径 + +echo "日志测试开始" + +rm -f /var/log/kaelz4.cnf +cp ./kaelz4.cnf /var/log/ + +for da in "${Datasets[@]}"; do + for alg in "${Algthm[@]}"; do + for bs in "${BlockSize[@]}"; do + echo "Executing: $da $alg $bs testing" + testFile="$testFilePath/$da" + testFileComped="$testFile.compressed" + testFileOrigin="$testFile.origin" + rm -rf $testFileComped + rm -rf $testFileOrigin + rm -rf $testFileComped.meta + rm -rf $testFileOrigin.meta + echo " " > $FILE_PATH + + ./kzip -A $alg -m 1 -f $testFile -o $testFileComped -n 2 -s $bs -i 256 >> $LogFile # 压缩测试 + + if [ ! -f "$FILE_PATH" ]; then + echo "压缩日志文件 $FILE_PATH 不存在!" + exit 1 + fi + + if grep -q "$search_content" "$FILE_PATH"; then + sleep 0.1 + else + echo "日志文件中不包含指定内容:$search_content" + exit 1 + fi + wait + done + done +done + +rm -f /var/log/kaelz4.cnf +echo "日志功能正常,日志测试结束" diff --git a/scripts/perftest/kzip/scripts/runPerf.sh b/scripts/perftest/kzip/scripts/runPerf.sh new file mode 100644 index 0000000..31a281f --- /dev/null +++ b/scripts/perftest/kzip/scripts/runPerf.sh @@ -0,0 +1,132 @@ +#!/bin/bash +export LD_LIBRARY_PATH=/usr/local/kaelz4/lib/:/usr/local/kaezstd/lib/:/usr/local/kaezip/lib/:$LD_LIBRARY_PATH +export KAE_LZ4_WINTYPE=8 +export KAE_LZ4_COMP_TYPE=8 +# export KAE_LZ4_ASYNC_THREAD_NUM=12 # default is 12 +export KAE_LZ4_ASYNC_DC_THREAD_NUM=10 + +# 使用 getopts 解析命令行参数 +while getopts "m:l:n:w:f:o:v:A:h:g:s:c:i:t:p:k:r:P:e:" opt; do + case $opt in + A) # 要测试的算法 + Alg="$OPTARG" + ;; + m) # 支持的进程数量 + multiProcess="$OPTARG" + ;; + s) # 文件分片处理. 默认0kb + fileChunk="$OPTARG" + ;; + n) # 循环压缩次数处理。 默认100次 + loppTimes="$OPTARG" + ;; + g) + recordDelay="$OPTARG" + ;; + i) + inflightNum="$OPTARG" + ;; + t) + threadsNum="$OPTARG" + ;; + f) + testFile="$OPTARG" + ;; + k) + useKAENum="$OPTARG" + ;; + r) + isTestCrc="$OPTARG" + ;; + p) + isTestPolling="$OPTARG" + ;; + e) + sess_nums="$OPTARG" + ;; + *) + echo "Usage: all params m:l:n:w:f:o:v:A:h:s:c:" + exit 1 + ;; + esac +done + +Alg=${Alg:=kaezip} +multiProcess=${multiProcess:=1} +fileChunk=${fileChunk:=0} +loppTimes=${loppTimes:=1} +inflightNum=${inflightNum:=64} +threadsNum=${threadsNum:=1} +testFile=${testFile:="../../../scripts/compressTestDataset/calgary"} +useKAENum=${useKAENum:=2} +isTestCrc=${isTestCrc:=0} +isTestPolling=${isTestPolling:=0} +sess_nums=${sess_nums:=1} +recordDelay=${recordDelay:=0} + +kae_algs=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77" "kaelz4async_lz77_frame" "kaezlib_deflate" "kaezlibasync_deflate") + +if [[ " ${kae_algs[@]} " =~ " $Alg " ]]; then + buildParams="kae" +else + echo "暂不支持的算法,检查-A参数" +fi +sh build.sh $buildParams + +numa_nodes=$(lscpu | grep -i "numa node(s)" | awk '{print $NF}') +threads_per_core=$(lscpu | grep -i "Thread(s) per core" | awk '{print $NF}') +cpu_range=$(lscpu | grep -i "NUMA node0 CPU(s)" | awk '{print $NF}') +cpu_count_per_numa=40 +if [[ $cpu_range =~ ([0-9]+)-([0-9]+) ]]; then + start_cpu=${BASH_REMATCH[1]} + end_cpu=${BASH_REMATCH[2]} + cpu_count_per_numa=$((end_cpu - start_cpu + 1)) +fi +cpu1_range=$(lscpu | grep -i "NUMA node1 CPU(s)" | awk '{print $NF}') +if [[ $cpu1_range =~ ([0-9]+)-([0-9]+) ]]; then + cpu1_start_cpu=${BASH_REMATCH[1]} + cpu1_end_cpu=${BASH_REMATCH[2]} +fi +useEngineNumsForThisTest=$useKAENum # 本次测试使用的加速器的数量 +# 本次测试使用的CPU配置。 机器中numa总量-单个numa的cpu核心数量-超线程的单核线程倍数-本次测试使用的加速器数量 +cpuConfigStr="$numa_nodes-$cpu_count_per_numa-$threads_per_core-$useEngineNumsForThisTest" +# 获取使用前两个Numa的话,CPU的绑核范围 +bindCpu0AndCpu1="$start_cpu-$cpu1_end_cpu" +if [[ $useEngineNumsForThisTest == 1 ]]; then + bindCpu0AndCpu1="$start_cpu-$end_cpu" +fi + + +testFileComped="$testFile.compressed" +testFileOrigin="$testFile.origin" +rm -rf $testFileComped +rm -rf $testFileOrigin +rm -rf $testFileComped.meta +rm -rf $testFileOrigin.meta + +echo "taskset -c $bindCpu0AndCpu1 ./kzip -A $Alg -m $multiProcess -f $testFile -o $testFileComped -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc -p $isTestPolling -e $sess_nums -g $recordDelay" +echo "taskset -c $bindCpu0AndCpu1 ./kzip -d -A $Alg -m $multiProcess -f $testFileComped -o $testFileOrigin -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc -p $isTestPolling -e $sess_nums -g $recordDelay" + +date +# gdb --args +taskset -c $bindCpu0AndCpu1 ./kzip -A $Alg -m $multiProcess -f $testFile -o $testFileComped -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc -p $isTestPolling -e $sess_nums -g $recordDelay +date + +# sleep 1 +#taskset -c $bindCpu0AndCpu1 gdb --args ./kzip -d -A "kaezlib_deflate" -m $multiProcess -f $testFileComped -o $testFileOrigin -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc +taskset -c $bindCpu0AndCpu1 ./kzip -d -A $Alg -m $multiProcess -f $testFileComped -o $testFileOrigin -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc -p $isTestPolling -e $sess_nums -g $recordDelay +date +if [[ ! -f "$testFile" ]]; then + echo "Error: 压缩异常!未成功压缩文件" + exit 1 +fi +if [[ ! -f "$testFileOrigin" ]]; then + echo "Error: 解压异常!未成功解压文件" + exit 1 +fi +diffRes=$(diff $testFile $testFileOrigin) +if [[ -n "$diffRes" ]] ; then + echo "Error: 解压后数据与原始数据比对不通过!!" +else + echo "Success: 解压数据校验通过" +fi -- Gitee From 0045e8c016a48095d0ab442fe9fb1a39942ef67e Mon Sep 17 00:00:00 2001 From: shen-chenyang1 Date: Mon, 1 Sep 2025 10:55:30 +0800 Subject: [PATCH 2/2] fix: rename KAExxx_compress_async_polling_in_session to KAExxx_async_polling_in_session --- KAELz4/README.md | 8 ++++---- KAELz4/include/kaelz4.h | 2 +- KAELz4/src/kaelz4_adapter.c | 2 +- KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c | 2 +- KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c | 2 +- KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c | 2 +- KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c | 2 +- KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c | 2 +- .../test/kzip/scene_test_functions/test_async_deflate.c | 4 ++-- .../test_async_dst_buf_less_deflate.c | 6 +++--- .../test_async_dst_buf_less_lz77_raw.c | 4 ++-- .../test/kzip/scene_test_functions/test_async_lz77_raw.c | 2 +- .../test/kzip/scene_test_functions/test_async_polling.c | 2 +- KAEZlib/README.md | 8 ++++---- KAEZlib/include/kaezip.h | 2 +- KAEZlib/src/kaezip_async_adapter.c | 2 +- scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c | 2 +- scripts/perftest/kzip/alg/kaezlib/deflateAsync.c | 2 +- 18 files changed, 28 insertions(+), 28 deletions(-) diff --git a/KAELz4/README.md b/KAELz4/README.md index ac99063..fc9dbea 100644 --- a/KAELz4/README.md +++ b/KAELz4/README.md @@ -110,7 +110,7 @@ int KAELZ4_compress_lz77_async_in_session(void *sess, const struct kaelz4_buffer * @param: sess : session * @param: budget : process packet num per call. */ -void KAELZ4_compress_async_polling_in_session(void *sess, int budget); +void KAELZ4_async_polling_in_session(void *sess, int budget); ``` #### 3.1.6、对lz77_raw数据进行格式转换 ``` @@ -487,7 +487,7 @@ static int test_lz77_raw_polling(int contentChecksumFlag, int blockChecksumFlag, } while (g_has_done != 1) { - KAELZ4_compress_async_polling_in_session(sess, 1); + KAELZ4_async_polling_in_session(sess, 1); usleep(100); } KAELZ4_destroy_async_compress_session(sess); @@ -590,7 +590,7 @@ int KAELZ4_compress_frame_async_in_session(void *sess, const struct kaelz4_buffe * @param: sess : session * @param: budget : process packet num per call. */ -void KAELZ4_compress_async_polling_in_session(void *sess, int budget); +void KAELZ4_async_polling_in_session(void *sess, int budget); ``` #### 3.2.6、清理session会话 ``` @@ -763,7 +763,7 @@ static int test_frame_polling(int contentChecksumFlag, int blockChecksumFlag, in } while (g_has_done != 1) { - KAELZ4_compress_async_polling_in_session(sess, 1); + KAELZ4_async_polling_in_session(sess, 1); usleep(100); } KAELZ4_destroy_async_compress_session(sess); diff --git a/KAELz4/include/kaelz4.h b/KAELz4/include/kaelz4.h index 47b1e14..f2fc662 100644 --- a/KAELz4/include/kaelz4.h +++ b/KAELz4/include/kaelz4.h @@ -208,7 +208,7 @@ int KAELZ4_compress_async_in_session(void *sess, const struct kaelz4_buffer_list * @param: sess : session * @param: budget : process packet num per call. */ -void KAELZ4_compress_async_polling_in_session(void *sess, int budget); +void KAELZ4_async_polling_in_session(void *sess, int budget); /** * @brief: frame compress async api * @param: sess : session diff --git a/KAELz4/src/kaelz4_adapter.c b/KAELz4/src/kaelz4_adapter.c index 786326d..b5a5042 100644 --- a/KAELz4/src/kaelz4_adapter.c +++ b/KAELz4/src/kaelz4_adapter.c @@ -782,7 +782,7 @@ int KAELZ4_compress_lz77_async_in_session(void *sess, const struct kaelz4_buffer } -void KAELZ4_compress_async_polling_in_session(void *sess, int budget) +void KAELZ4_async_polling_in_session(void *sess, int budget) { struct kaelz4_async_ctrl *ctrl = NULL; lz4_task_queue *task_queue = NULL; diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c index cf2f1b3..4a8e9bc 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c @@ -46,7 +46,7 @@ static void lz4_async_block_cleanup(struct compress_ctx *ctx) compression_algorithm_t lz4async_block_algorithm = { .name = "kaelz4async_block", .async_compress = lz4async_block_compress, - .poll = KAELZ4_compress_async_polling_in_session, + .poll = KAELZ4_async_polling_in_session, .bound = lz4_bound, .async_decompress = lz4async_block_decompress, .init = lz4_async_block_init, diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c index 9ec445e..4306798 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c @@ -65,7 +65,7 @@ static void lz4_async_frame_cleanup(struct compress_ctx *ctx) compression_algorithm_t lz4_async_frame_algorithm = { .name = "kaelz4async_frame", .bound = lz4_frame_bound, - .poll = KAELZ4_compress_async_polling_in_session, + .poll = KAELZ4_async_polling_in_session, .async_compress = lz4_async_frame_compress, .async_decompress = lz4_async_frame_decompress, .init = lz4_frame_init, diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c index 2cae34e..b9404a7 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c @@ -41,7 +41,7 @@ static void lz4_async_block_cleanup(struct compress_ctx *ctx) compression_algorithm_t lz4async_lz77_algorithm = { .name = "kaelz4async_lz77", .async_compress = lz4async_block_compress, - .poll = KAELZ4_compress_async_polling_in_session, + .poll = KAELZ4_async_polling_in_session, .bound = lz4_bound, .async_decompress = lz4async_block_decompress, .init = lz4_async_block_init, diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c index 4dc0331..dfcde4a 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c @@ -40,7 +40,7 @@ static void lz4_async_block_cleanup(struct compress_ctx *ctx) compression_algorithm_t lz4async_lz77_frame_algorithm = { .name = "kaelz4async_lz77_frame", .async_compress = lz4async_block_compress, - .poll = KAELZ4_compress_async_polling_in_session, + .poll = KAELZ4_async_polling_in_session, .bound = lz4_bound, .async_decompress = lz4async_block_decompress, .init = lz4_async_block_init, diff --git a/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c b/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c index 253931d..829ac16 100644 --- a/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c +++ b/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c @@ -83,7 +83,7 @@ static void zlib_async_deflate_cleanup(struct compress_ctx *ctx) compression_algorithm_t zlibasync_block_algorithm = { .name = "kaezlibasync_deflate", .async_compress = zlibasync_deflate_compress, - .poll = KAEZIP_compress_async_polling_in_session, + .poll = KAEZIP_async_polling_in_session, .bound = zlib_bound, .async_decompress = zlibasync_deflate_decompress, .decompress = zlib_decompress, diff --git a/KAELz4/test/kzip/scene_test_functions/test_async_deflate.c b/KAELz4/test/kzip/scene_test_functions/test_async_deflate.c index 2d776c2..ab94227 100644 --- a/KAELz4/test/kzip/scene_test_functions/test_async_deflate.c +++ b/KAELz4/test/kzip/scene_test_functions/test_async_deflate.c @@ -264,7 +264,7 @@ static int decompressAsync(struct my_custom_data *mydata) return -1; } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(desess, 1); + KAEZIP_async_polling_in_session(desess, 1); usleep(100); } KAEZIP_destroy_async_decompress_session(desess); @@ -376,7 +376,7 @@ static int test_main() } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(sess, 1); + KAEZIP_async_polling_in_session(sess, 1); usleep(100); } KAEZIP_destroy_async_compress_session(sess); diff --git a/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_deflate.c b/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_deflate.c index 4b70cdb..7034cc0 100644 --- a/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_deflate.c +++ b/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_deflate.c @@ -264,7 +264,7 @@ static int decompressAsync(struct my_custom_data *mydata) return -1; } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(desess, 1); + KAEZIP_async_polling_in_session(desess, 1); usleep(100); } KAEZIP_destroy_async_decompress_session(desess); @@ -374,7 +374,7 @@ static int retry_compression(struct my_custom_data *my_data) return -1; } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(sess, 1); + KAEZIP_async_polling_in_session(sess, 1); usleep(100); } KAEZIP_destroy_async_compress_session(sess); @@ -429,7 +429,7 @@ static int test_main() } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(sess, 1); + KAEZIP_async_polling_in_session(sess, 1); usleep(100); } KAEZIP_destroy_async_compress_session(sess); diff --git a/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_lz77_raw.c b/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_lz77_raw.c index 4e44d2c..210e61a 100644 --- a/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_lz77_raw.c +++ b/KAELz4/test/kzip/scene_test_functions/test_async_dst_buf_less_lz77_raw.c @@ -333,7 +333,7 @@ static int retry_compression(struct my_custom_data *my_data) } while (g_has_done == 0) { - KAELZ4_compress_async_polling_in_session(sess, 1); + KAELZ4_async_polling_in_session(sess, 1); usleep(100); } KAELZ4_destroy_async_compress_session(sess); @@ -415,7 +415,7 @@ static int test_lz77_raw_polling(int contentChecksumFlag, int blockChecksumFlag, } while (g_has_done == 0) { - KAELZ4_compress_async_polling_in_session(sess, 1); + KAELZ4_async_polling_in_session(sess, 1); usleep(100); } KAELZ4_destroy_async_compress_session(sess); diff --git a/KAELz4/test/kzip/scene_test_functions/test_async_lz77_raw.c b/KAELz4/test/kzip/scene_test_functions/test_async_lz77_raw.c index 06a92e2..5282be0 100644 --- a/KAELz4/test/kzip/scene_test_functions/test_async_lz77_raw.c +++ b/KAELz4/test/kzip/scene_test_functions/test_async_lz77_raw.c @@ -356,7 +356,7 @@ static int test_lz77_raw_polling(int contentChecksumFlag, int blockChecksumFlag, } while (g_has_done != 1) { - KAELZ4_compress_async_polling_in_session(sess, 1); + KAELZ4_async_polling_in_session(sess, 1); usleep(100); } KAELZ4_destroy_async_compress_session(sess); diff --git a/KAELz4/test/kzip/scene_test_functions/test_async_polling.c b/KAELz4/test/kzip/scene_test_functions/test_async_polling.c index 29243fb..3b95ed0 100644 --- a/KAELz4/test/kzip/scene_test_functions/test_async_polling.c +++ b/KAELz4/test/kzip/scene_test_functions/test_async_polling.c @@ -179,7 +179,7 @@ static int test_frame_polling(int contentChecksumFlag, int blockChecksumFlag, in } while (g_has_done != 1) { - KAELZ4_compress_async_polling_in_session(sess, 1); + KAELZ4_async_polling_in_session(sess, 1); usleep(100); } KAELZ4_destroy_async_compress_session(sess); diff --git a/KAEZlib/README.md b/KAEZlib/README.md index 0551789..5edd734 100644 --- a/KAEZlib/README.md +++ b/KAEZlib/README.md @@ -20,7 +20,7 @@ export LD_LIBRARY_PATH=/usr/local/kaezip/lib:$LD_LIBRARY_PATH |---------------------------------------|--------------------------| | `KAEZIP_create_async_compress_session`| 创建异步压缩任务session | | `KAEZIP_compress_async_in_session` | 提交异步压缩任务 | -| `KAEZIP_compress_async_polling_in_session` | polling查询异步压缩/解压任务的结果 | +| `KAEZIP_async_polling_in_session` | polling查询异步压缩/解压任务的结果 | | `KAEZIP_destroy_async_compress_session` | 销毁压缩任务session | | `KAEZIP_create_async_decompress_session`| 创建异步解压任务session | | `KAEZIP_decompress_async_in_session` | 提交异步解压任务 | @@ -59,7 +59,7 @@ int KAEZIP_compress_async_in_session(void *sess, const struct kaezip_buffer_list * @param: sess : session * @param: budget : process packet num per call. */ - void KAEZIP_compress_async_polling_in_session(void *sess, int budget); + void KAEZIP_async_polling_in_session(void *sess, int budget); /** @@ -367,7 +367,7 @@ static int decompressAsync(struct my_custom_data *mydata) return -1; } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(desess, 1); + KAEZIP_async_polling_in_session(desess, 1); usleep(100); } KAEZIP_destroy_async_decompress_session(desess); @@ -479,7 +479,7 @@ static int test_main() } while (g_has_done != 1) { - KAEZIP_compress_async_polling_in_session(sess, 1); + KAEZIP_async_polling_in_session(sess, 1); usleep(100); } KAEZIP_destroy_async_compress_session(sess); diff --git a/KAEZlib/include/kaezip.h b/KAEZlib/include/kaezip.h index a0b813d..b273a56 100644 --- a/KAEZlib/include/kaezip.h +++ b/KAEZlib/include/kaezip.h @@ -116,7 +116,7 @@ int KAEZIP_compress_async_in_session(void *sess, const struct kaezip_buffer_list * @param: sess : session * @param: budget : process packet num per call. */ - void KAEZIP_compress_async_polling_in_session(void *sess, int budget); + void KAEZIP_async_polling_in_session(void *sess, int budget); /** * @brief: Initialize Task Queues and Threads on the KAE Side. diff --git a/KAEZlib/src/kaezip_async_adapter.c b/KAEZlib/src/kaezip_async_adapter.c index edcefca..8457376 100644 --- a/KAEZlib/src/kaezip_async_adapter.c +++ b/KAEZlib/src/kaezip_async_adapter.c @@ -188,7 +188,7 @@ int KAEZIP_compress_async_in_session(void *sess, const struct kaezip_buffer_list return kaezip_async_do_comp_in_session(sess, src, dst, callback, result, KAEZIP_ASYNC_BLOCK, WCRYPTO_DEFLATE); } -void KAEZIP_compress_async_polling_in_session(void *sess, int budget) +void KAEZIP_async_polling_in_session(void *sess, int budget) { struct kaezip_async_ctrl *ctrl = NULL; kaezip_task_queue *task_queue = NULL; diff --git a/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c index dbb4109..23f312b 100644 --- a/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c +++ b/scripts/perftest/kzip/alg/kaelz4/lz4AsyncBlock.c @@ -211,7 +211,7 @@ void lz4_prepre_out_buf(struct compress_ctx *ctx, struct compress_out_buf *out_b void lz4_async_polling(struct compress_session *sess, int budget) { - KAELZ4_compress_async_polling_in_session(sess->kae_sess, budget); + KAELZ4_async_polling_in_session(sess->kae_sess, budget); } // LZ4 算法实例 diff --git a/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c b/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c index 37794ea..03733b5 100644 --- a/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c +++ b/scripts/perftest/kzip/alg/kaezlib/deflateAsync.c @@ -201,7 +201,7 @@ void zlib_prepre_out_buf(struct compress_ctx *ctx, struct compress_out_buf *out_ static void zlib_polling(struct compress_session *sess, int budget) { - KAEZIP_compress_async_polling_in_session(sess->kae_sess, budget); + KAEZIP_async_polling_in_session(sess->kae_sess, budget); } // Zlib 算法实例 -- Gitee