From 69e76332796316a65b4f3d2da0b11427fe90cba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Tue, 1 Jul 2025 07:06:55 +0000 Subject: [PATCH 1/3] feat: add lz77 raw data format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAELz4/include/kaelz4.h | 9 ++ KAELz4/src/kaelz4_adapter.c | 141 ++++++++++--------- KAELz4/src/kaelz4_adapter.h | 6 +- KAELz4/src/utils/kaelz4_common.h | 1 + KAELz4/src/v1/kaelz4_comp.h | 26 +++- KAELz4/src/v1/kaelz4_ctx.c | 230 +++++++++++++++++++++---------- KAELz4/src/v1/kaelz4_ctx.h | 25 +++- KAELz4/src/v1/wd_queue_memory.c | 6 +- 8 files changed, 285 insertions(+), 159 deletions(-) diff --git a/KAELz4/include/kaelz4.h b/KAELz4/include/kaelz4.h index 4b758b6..729647c 100644 --- a/KAELz4/include/kaelz4.h +++ b/KAELz4/include/kaelz4.h @@ -71,6 +71,8 @@ typedef struct { #define KAE_LZ4_SET_FAIL 6 #define KAE_LZ4_HW_TIMEOUT_FAIL 7 +#define KAE_LZ77_SEQ_DATA_SIZE_PER_64K (128UL * 1024UL) + #define VERSION_STRUCT_LEN 100 typedef struct { char productName[VERSION_STRUCT_LEN]; @@ -133,4 +135,11 @@ int KAELZ4_decompress_async(const struct kaelz4_buffer_list *src, struct kaelz4_ lz4_async_callback callback, struct kaelz4_result *result); int KAELZ4F_decompressFrame_async(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback callback, struct kaelz4_result *result, const void *options_ptr); +size_t KAELZ4_compress_get_tuple_buf_len(size_t src_len); +int KAELZ4_compress_lz77_async_in_session(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, + lz4_async_callback callback, struct kaelz4_result *result); +int KAELZ4_rebuild_lz77_to_block(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *tuple_buf, struct kaelz4_buffer_list *dst, + struct kaelz4_result *result); +int KAELZ4_rebuild_lz77_to_frame(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *tuple_buf, struct kaelz4_buffer_list *dst, + struct kaelz4_result *result, const void *preferences_ptr); #endif \ No newline at end of file diff --git a/KAELz4/src/kaelz4_adapter.c b/KAELz4/src/kaelz4_adapter.c index bdecbc1..7852081 100644 --- a/KAELz4/src/kaelz4_adapter.c +++ b/KAELz4/src/kaelz4_adapter.c @@ -304,6 +304,7 @@ static void *compress_thread_func(void *arg) exit_thread: kaelz4_async_deinit(); + task_queue->stop = 0; return NULL; } @@ -394,6 +395,7 @@ static void *decompress_thread_func(void *arg) exit_thread: LZ4F_freeDecompressionContext(dctx); kaelz4_async_deinit(); + task_queue->stop = 0; return NULL; } @@ -461,13 +463,18 @@ static int kaelz4_task_queue_init(lz4_task_queue *task_queue, int index, task_qu static void kaelz4_task_queue_free(lz4_task_queue *task_queue) { - pthread_mutex_lock(&task_queue->mutex); - task_queue->stop = 1; - pthread_cond_signal(&task_queue->cond); - pthread_mutex_unlock(&task_queue->mutex); - if (!task_queue->is_polling) + if (!task_queue->is_polling) { + pthread_mutex_lock(&task_queue->mutex); + task_queue->stop = 1; + pthread_cond_signal(&task_queue->cond); + pthread_mutex_unlock(&task_queue->mutex); + while (task_queue->stop) { + pthread_cond_signal(&task_queue->cond); + } + pthread_join(task_queue->worker_thread, NULL); + } pthread_mutex_destroy(&task_queue->mutex); @@ -549,18 +556,14 @@ void *KAELZ4_create_async_compress_session(iova_map_fn usr_map) return NULL; sess->usr_map = usr_map; -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) ret = kaelz4_task_queue_init(&sess->task_queue, 0, NULL); if (ret != 0) { free(sess); return NULL; } -#endif ret = kaelz4_async_instances_init(&sess->ctrl, usr_map); if (ret != 0) { -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) kaelz4_task_queue_free(&sess->task_queue); -#endif free(sess); return NULL; } @@ -572,9 +575,7 @@ void KAELZ4_destroy_async_compress_session(void *sess) { if (sess) { kaelz4_async_instances_deinit(((kaelz4_session *)sess)->ctrl); -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) kaelz4_task_queue_free(&((kaelz4_session *)sess)->task_queue); -#endif free(sess); } } @@ -695,88 +696,77 @@ static int kaelz4_check_param_valid(const struct kaelz4_buffer_list *src, struct } result->dst_len = dst->buf[0].buf_len; - return 0; + return KAE_LZ4_SUCC; } -int KAELZ4_compress_async_in_session(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, - lz4_async_callback callback, struct kaelz4_result *result) +static int kaelz4_async_do_comp_in_session(kaelz4_session *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, + lz4_async_callback callback, struct kaelz4_result *result, + enum kae_lz4_async_data_format data_format, const LZ4F_preferences_t* preferences_ptr) { - if (unlikely(sess == NULL || kaelz4_check_param_valid(src, dst, callback, result) != 0)) { - return KAE_LZ4_INVAL_PARA; - } - -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) - lz4_task_queue *task_queue = &((kaelz4_session *)sess)->task_queue; -#endif + lz4_task_queue *task_queue = &sess->task_queue; lz4_async_task_t task = {0}; task.src = src; task.dst = dst; task.callback = callback; task.result = result; + task.data_format = data_format; - if (result->src_size > SMALL_BLOCK_SIZE) { - task.data_format = KAELZ4_ASYNC_BLOCK; + if (preferences_ptr != NULL) { + task.preferences = *(const LZ4F_preferences_t *)preferences_ptr; } -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) - if (task_queue->pi != task_queue->ci && !kaelz4_async_is_thread_do_comp_full(((kaelz4_session *)sess)->ctrl)) { - kaelz4_dequeue_process(((kaelz4_session *)sess)->ctrl, task_queue, ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET); + if (task_queue->pi != task_queue->ci && !kaelz4_async_is_thread_do_comp_full(sess->ctrl)) { + kaelz4_dequeue_process(sess->ctrl, task_queue, ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET); } - if (task_queue->pi != task_queue->ci || kaelz4_async_is_thread_do_comp_full(((kaelz4_session *)sess)->ctrl)) { + if (task_queue->pi != task_queue->ci || kaelz4_async_is_thread_do_comp_full(sess->ctrl)) { return kaelz4_enqueue(task_queue, &task); } else { -#endif - return kaelz4_compress_async(((kaelz4_session *)sess)->ctrl, task.src, task.dst, task.callback, task.result, + return kaelz4_compress_async(sess->ctrl, task.src, task.dst, task.callback, task.result, task.data_format, &task.preferences); -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) } -#endif +} + + +int KAELZ4_compress_async_in_session(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, + lz4_async_callback callback, struct kaelz4_result *result) +{ + if (unlikely(sess == NULL || kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { + return KAE_LZ4_INVAL_PARA; + } + if (result->src_size <= SMALL_BLOCK_SIZE) { + return kaelz4_async_do_comp_in_session(sess, src, dst, callback, result, KAELZ4_ASYNC_SMALL_BLOCK, NULL); + } + + return kaelz4_async_do_comp_in_session(sess, src, dst, callback, result, KAELZ4_ASYNC_BLOCK, NULL); } int KAELZ4_compress_frame_async_in_session(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback callback, struct kaelz4_result *result, const void *preferences_ptr) { - if (unlikely(sess == NULL || kaelz4_check_param_valid(src, dst, callback, result) != 0)) { + if (unlikely(sess == NULL || kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { return KAE_LZ4_INVAL_PARA; } -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) - lz4_task_queue *task_queue = &((kaelz4_session *)sess)->task_queue; -#endif - lz4_async_task_t task = {0}; - task.src = src; - task.dst = dst; - task.callback = callback; - task.result = result; - task.data_format = KAELZ4_ASYNC_FRAME; - if (preferences_ptr != NULL) { - task.preferences = *(const LZ4F_preferences_t *)preferences_ptr; - } + return kaelz4_async_do_comp_in_session(sess, src, dst, callback, result, KAELZ4_ASYNC_FRAME, preferences_ptr); +} -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) - if (task_queue->pi != task_queue->ci && !kaelz4_async_is_thread_do_comp_full(((kaelz4_session *)sess)->ctrl)) { - kaelz4_dequeue_process(((kaelz4_session *)sess)->ctrl, task_queue, ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET); +int KAELZ4_compress_lz77_async_in_session(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, + lz4_async_callback callback, struct kaelz4_result *result) +{ + if (unlikely(sess == NULL || kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { + return KAE_LZ4_INVAL_PARA; } - if (task_queue->pi != task_queue->ci || kaelz4_async_is_thread_do_comp_full(((kaelz4_session *)sess)->ctrl)) { - return kaelz4_enqueue(task_queue, &task); - } else { -#endif - return kaelz4_compress_async(((kaelz4_session *)sess)->ctrl, task.src, task.dst, task.callback, task.result, - task.data_format, &task.preferences); -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) - } -#endif + return kaelz4_async_do_comp_in_session(sess, src, dst, callback, result, KAELZ4_ASYNC_LZ77_RAW, NULL); } + void KAELZ4_compress_async_polling_in_session(void *sess, int budget) { struct kaelz4_async_ctrl *ctrl = NULL; -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) lz4_task_queue *task_queue = NULL; -#endif int ret = 1; int cnt = 0; @@ -784,17 +774,13 @@ void KAELZ4_compress_async_polling_in_session(void *sess, int budget) return; ctrl = ((kaelz4_session *)sess)->ctrl; -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) task_queue = &((kaelz4_session *)sess)->task_queue; -#endif while (ret > 0 && cnt < budget) { ret = kaelz4_async_compress_polling(ctrl, ASYNC_POLLING_DEFAULT_BUDGET); -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) if (!kaelz4_async_is_thread_do_comp_full(ctrl)) { kaelz4_dequeue_process(ctrl, task_queue, ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET); } -#endif cnt += ret; } } @@ -802,7 +788,7 @@ void KAELZ4_compress_async_polling_in_session(void *sess, int budget) int KAELZ4_compress_async(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback callback, struct kaelz4_result *result) { - if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != 0)) { + if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { return KAE_LZ4_INVAL_PARA; } @@ -816,7 +802,7 @@ int KAELZ4_compress_async(const struct kaelz4_buffer_list *src, struct kaelz4_bu int KAELZ4F_compressFrame_async(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback callback, struct kaelz4_result *result, const void *preferences_ptr) { - if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != 0)) { + if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { return KAE_LZ4_INVAL_PARA; } @@ -826,7 +812,7 @@ int KAELZ4F_compressFrame_async(const struct kaelz4_buffer_list *src, struct kae int KAELZ4_decompress_async(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback callback, struct kaelz4_result *result) { - if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != 0)) { + if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { return KAE_LZ4_INVAL_PARA; } @@ -836,9 +822,32 @@ int KAELZ4_decompress_async(const struct kaelz4_buffer_list *src, struct kaelz4_ int KAELZ4F_decompressFrame_async(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback callback, struct kaelz4_result *result, const void *options_ptr) { - if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != 0)) { + if (unlikely(kaelz4_check_param_valid(src, dst, callback, result) != KAE_LZ4_SUCC)) { return KAE_LZ4_INVAL_PARA; } return kaelz4_async_do_decomp(src, dst, callback, result, KAELZ4_ASYNC_FRAME, options_ptr); } + +size_t KAELZ4_compress_get_tuple_buf_len(size_t src_len) +{ + size_t freg_cnt = (src_len > 0) ? ((src_len - 1) / SMALL_BLOCK_SIZE + 1) : 0; + + return freg_cnt * KAE_LZ77_SEQ_DATA_SIZE_PER_64K; +} + +int KAELZ4_rebuild_lz77_to_block(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *tuple_buf, struct kaelz4_buffer_list *dst, + struct kaelz4_result *result) +{ + if (result->src_size <= SMALL_BLOCK_SIZE) { + return kaelz4_triples_rebuild_impl(src, tuple_buf, dst, result, KAELZ4_ASYNC_SMALL_BLOCK, NULL); + } + + return kaelz4_triples_rebuild_impl(src, tuple_buf, dst, result, KAELZ4_ASYNC_BLOCK, NULL); +} + +int KAELZ4_rebuild_lz77_to_frame(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *tuple_buf, struct kaelz4_buffer_list *dst, + struct kaelz4_result *result, const void *preferences_ptr) +{ + return kaelz4_triples_rebuild_impl(src, tuple_buf, dst, result, KAELZ4_ASYNC_FRAME, preferences_ptr); +} diff --git a/KAELz4/src/kaelz4_adapter.h b/KAELz4/src/kaelz4_adapter.h index 1e3223c..5f8896a 100644 --- a/KAELz4/src/kaelz4_adapter.h +++ b/KAELz4/src/kaelz4_adapter.h @@ -26,7 +26,6 @@ enum { #define SMALL_BLOCK_SIZE (64 * 1024) #define ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET 3 #define ASYNC_POLLING_DEFAULT_BUDGET 1 -#define KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE TRUE // 测试时延时打开次编译宏才能统计准确 typedef struct { const struct kaelz4_buffer_list *src; @@ -65,9 +64,7 @@ typedef struct { struct kaelz4_async_ctrl; typedef struct { -#if defined(KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE) && (KAELZ4_ASYNC_POLLING_ENQUEUE_ENABLE == TRUE) lz4_task_queue task_queue; -#endif iova_map_fn usr_map; struct kaelz4_async_ctrl *ctrl; } kaelz4_session; @@ -96,4 +93,7 @@ struct kaelz4_async_ctrl *kaelz4_async_init(volatile int *stop, sw_compress_fn s void kaelz4_async_deinit(void); int kaelz4_async_instances_init(struct kaelz4_async_ctrl **ctrl, iova_map_fn usr_map); void kaelz4_async_instances_deinit(struct kaelz4_async_ctrl *ctrl); + +int kaelz4_triples_rebuild_impl(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *tuple_buf, struct kaelz4_buffer_list *dst, + struct kaelz4_result *result, enum kae_lz4_async_data_format data_format, const LZ4F_preferences_t *ptr); #endif \ No newline at end of file diff --git a/KAELz4/src/utils/kaelz4_common.h b/KAELz4/src/utils/kaelz4_common.h index 4d82830..aba66b0 100644 --- a/KAELz4/src/utils/kaelz4_common.h +++ b/KAELz4/src/utils/kaelz4_common.h @@ -17,6 +17,7 @@ enum kae_lz4_async_data_format { KAELZ4_ASYNC_SMALL_BLOCK = 0, KAELZ4_ASYNC_BLOCK, KAELZ4_ASYNC_FRAME, + KAELZ4_ASYNC_LZ77_RAW, KAELZ4_ASYNC_BUTT, }; diff --git a/KAELz4/src/v1/kaelz4_comp.h b/KAELz4/src/v1/kaelz4_comp.h index 280cd8a..992b815 100644 --- a/KAELz4/src/v1/kaelz4_comp.h +++ b/KAELz4/src/v1/kaelz4_comp.h @@ -30,6 +30,8 @@ #define REQ_BUFFER_MAX 60 // uadk支持最大的sgl buf数量 +#define MAX_NUM_IN_COMP 4 // 每个线程最多允许同时进行的压缩任务数 + #if !defined(LZ4_memcpy) # if defined(__GNUC__) && (__GNUC__ >= 4) # define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) @@ -49,8 +51,16 @@ typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_una struct kaelz4_compress_ctx; struct kaelz4_async_req; +struct kaelz4_priv_save_info { + void *prev_last_lit_ptr; // 用户输入数据>64K需要分块、返回BLOCK格式、现有保序返回切块压缩结果的约束下,记录前一个分块的last literal信息 + size_t prev_last_lit_len; + unsigned int prev_last_lit_buf_index; // 用户输入数据>64K需要分块、返回BLOCK格式、现有保序返回切块压缩结果的约束下,记录前一个分块的last literal信息 + const struct kaelz4_buffer_list *src; + LZ4F_preferences_t preferences; +}; + typedef int (*kaelz4_post_process_handle_t)(struct kaelz4_async_req *req, const struct wd_buf_list *source, - void *dest); + void *dest, struct kaelz4_priv_save_info *save_info); struct kaelz4_compress_ctx { size_t srcSize; @@ -58,24 +68,27 @@ struct kaelz4_compress_ctx { size_t dst_len; const struct kaelz4_buffer_list *src; struct kaelz4_buffer_list *dst; - void *prev_last_lit_ptr; // 用户输入数据>64K需要分块、返回BLOCK格式、现有保序返回切块压缩结果的约束下,记录前一个分块的last literal信息 - size_t prev_last_lit_len; - unsigned int prev_last_lit_buf_index; // 用户输入数据>64K需要分块、返回BLOCK格式、现有保序返回切块压缩结果的约束下,记录前一个分块的last literal信息 - unsigned int recv_cnt; + struct kaelz4_priv_save_info save_info; lz4_async_callback callback; struct kaelz4_result *result; enum kae_lz4_async_data_format data_format; - LZ4F_preferences_t preferences; kaelz4_post_process_handle_t kaelz4_post_process_handle; struct kaelz4_async_req *req_list; struct kaelz4_compress_ctx *next; int status; }; +struct kaelz4_seq_result { + unsigned int seq_num; + unsigned char seq_start[]; +}; + struct kaelz4_async_req { LZ4_CCtx zc; struct wd_buf_list src; + struct wd_buf_list dst; struct wd_buf buffers[REQ_BUFFER_MAX]; + struct wd_buf dst_buffers[REQ_BUFFER_MAX]; size_t src_size; U32 idx; U32 special_flag; @@ -86,7 +99,6 @@ struct kaelz4_async_req { struct kaelz4_async_req *next; }; -#define MAX_NUM_IN_COMP 2 // 每个线程最多允许同时进行的压缩任务数 struct kaelz4_async_ctrl { struct kaelz4_compress_ctx *ctx_head; struct kaelz4_compress_ctx *tail; diff --git a/KAELz4/src/v1/kaelz4_ctx.c b/KAELz4/src/v1/kaelz4_ctx.c index c9a6835..dae25cb 100644 --- a/KAELz4/src/v1/kaelz4_ctx.c +++ b/KAELz4/src/v1/kaelz4_ctx.c @@ -15,9 +15,6 @@ static pthread_mutex_t g_kaelz4_deflate_pool_init_mutex = PTHREAD_MUTEX_INITIALI static pthread_mutex_t g_kaelz4_inflate_pool_init_mutex = PTHREAD_MUTEX_INITIALIZER; static KAE_QUEUE_POOL_HEAD_S* kaelz4_get_qp(int algtype); -static kaelz4_ctx_t* kaelz4_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_type, int comp_optype, int is_sgl); -static int kaelz4_create_wd_ctx(kaelz4_ctx_t *kz_ctx, int alg_comp_type, int comp_optype); -static int kaelz4_driver_do_comp_impl(kaelz4_ctx_t *kz_ctx); static void kaelz4_free_kz_ctx(void* kz_ctx) { @@ -26,31 +23,26 @@ static void kaelz4_free_kz_ctx(void* kz_ctx) return; } - if (!kaelz4_ctx->is_sgl) { - if (kaelz4_ctx->op_data.in && kaelz4_ctx->setup.br.usr) { - kaelz4_ctx->setup.br.free(kaelz4_ctx->setup.br.usr, (void *)kaelz4_ctx->op_data.in); + if (!kaelz4_ctx->q_node->is_sgl) { + if (kaelz4_ctx->op_data.in && kaelz4_ctx->setup->br.usr) { + kaelz4_ctx->setup->br.free(kaelz4_ctx->setup->br.usr, (void *)kaelz4_ctx->op_data.in); kaelz4_ctx->op_data.in = NULL; } - if (kaelz4_ctx->op_data.out && kaelz4_ctx->setup.br.usr) { - kaelz4_ctx->setup.br.free(kaelz4_ctx->setup.br.usr, (void *)kaelz4_ctx->op_data.out); + if (kaelz4_ctx->op_data.out && kaelz4_ctx->setup->br.usr) { + kaelz4_ctx->setup->br.free(kaelz4_ctx->setup->br.usr, (void *)kaelz4_ctx->op_data.out); kaelz4_ctx->op_data.out = NULL; } } else { - if (kaelz4_ctx->output.literal && kaelz4_ctx->setup.br.usr) { - kaelz4_ctx->setup.br.free(kaelz4_ctx->setup.br.usr, (void *)kaelz4_ctx->output.literal); + if (kaelz4_ctx->output.literal && kaelz4_ctx->setup->br.usr) { + kaelz4_ctx->setup->br.free(kaelz4_ctx->setup->br.usr, (void *)kaelz4_ctx->output.literal); kaelz4_ctx->output.literal = NULL; } - if (kaelz4_ctx->output.sequence && kaelz4_ctx->setup.br.usr) { - kaelz4_ctx->setup.br.free(kaelz4_ctx->setup.br.usr, (void *)kaelz4_ctx->output.sequence); + if (kaelz4_ctx->dst_sgl_kernel && kaelz4_ctx->setup->br.usr) { + kaelz4_ctx->setup->br.free(kaelz4_ctx->setup->br.usr, (void *)kaelz4_ctx->dst_sgl_kernel); kaelz4_ctx->output.sequence = NULL; } } - if (kaelz4_ctx->wd_ctx != NULL) { - wcrypto_del_comp_ctx(kaelz4_ctx->wd_ctx); - kaelz4_ctx->wd_ctx = NULL; - } - kae_free(kaelz4_ctx); return; @@ -120,7 +112,8 @@ static void kaelz4_ctx_callback(const void *msg, void *tag) return; } -static kaelz4_ctx_t* kaelz4_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_type, int comp_optype, int is_sgl) +static kaelz4_ctx_t* kaelz4_new_ctx(struct kaelz4_instance *instance, + int alg_comp_type, int comp_optype, int is_sgl) { kaelz4_ctx_t *kz_ctx = NULL; kz_ctx = (kaelz4_ctx_t *)kae_malloc(sizeof(kaelz4_ctx_t)); @@ -130,44 +123,36 @@ static kaelz4_ctx_t* kaelz4_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_ } memset(kz_ctx, 0, sizeof(kaelz4_ctx_t)); - kz_ctx->setup.comp_lv = kaelz4_get_comp_lv(); - kz_ctx->setup.win_size = kaelz4_get_win_size(); - kz_ctx->setup.br.usr = q_node->kae_queue_mem_pool; - kz_ctx->setup.cb = kaelz4_ctx_callback; - kz_ctx->is_sgl = is_sgl; + kz_ctx->setup = &instance->setup; + kz_ctx->comp_alg_type = alg_comp_type; + kz_ctx->comp_type = comp_optype; + kz_ctx->q_node = instance->q_node; + kz_ctx->wd_ctx = instance->wd_ctx; if (is_sgl) { - kz_ctx->setup.br.alloc = kaelz4_wd_alloc_sgl; - kz_ctx->setup.br.free = kaelz4_wd_free_sgl; - kz_ctx->setup.br.iova_map = kaelz4_dma_map_sgl; - kz_ctx->setup.br.iova_unmap = kaelz4_dma_unmap_sgl; - kz_ctx->op_data.in = (void *)kz_ctx->sgl; + kz_ctx->op_data.in = (void *)kz_ctx->src_sgl_buf; kz_ctx->output.lit_sz = COMP_BLOCK_SIZE; kz_ctx->output.seq_sz = COMP_BLOCK_SIZE; - kz_ctx->output.literal = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); + kz_ctx->output.literal = kz_ctx->setup->br.alloc(kz_ctx->setup->br.usr, COMP_BLOCK_SIZE); if (kz_ctx->output.literal == NULL) { US_ERR("alloc opdata output.literal buf failed"); goto err; } - kz_ctx->output.sequence = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); - if (kz_ctx->output.sequence == NULL) { + kz_ctx->dst_sgl_kernel = kz_ctx->setup->br.alloc(kz_ctx->setup->br.usr, COMP_BLOCK_SIZE); + if (kz_ctx->dst_sgl_kernel == NULL) { US_ERR("alloc opdata output.sequence buf failed"); goto err; } kz_ctx->op_data.out = (void *)&kz_ctx->output; } else { - kz_ctx->setup.br.alloc = kaelz4_wd_alloc_blk; - kz_ctx->setup.br.free = kaelz4_wd_free_blk; - kz_ctx->setup.br.iova_map = kaelz4_dma_map_blk; - kz_ctx->setup.br.iova_unmap = kaelz4_dma_unmap_blk; - kz_ctx->op_data.in = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); + kz_ctx->op_data.in = kz_ctx->setup->br.alloc(kz_ctx->setup->br.usr, COMP_BLOCK_SIZE); if (kz_ctx->op_data.in == NULL) { US_ERR("alloc opdata in buf failed"); goto err; } - kz_ctx->op_data.out = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); + kz_ctx->op_data.out = kz_ctx->setup->br.alloc(kz_ctx->setup->br.usr, COMP_BLOCK_SIZE); if (kz_ctx->op_data.out == NULL) { US_ERR("alloc opdata out buf failed"); goto err; @@ -175,13 +160,6 @@ static kaelz4_ctx_t* kaelz4_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_ } kz_ctx->op_data.priv = &kz_ctx->lz4_data; - kz_ctx->q_node = q_node; - q_node->priv_ctx = kz_ctx; - - if (kaelz4_create_wd_ctx(kz_ctx, alg_comp_type, comp_optype) == KAEZIP_FAILED) { - US_ERR("create wd ctx fail!"); - goto err; - } return kz_ctx; @@ -191,67 +169,147 @@ err: return NULL; } -static int kaelz4_create_wd_ctx(kaelz4_ctx_t *kz_ctx, int alg_comp_type, int comp_optype) +static int kaelz4_create_wd_ctx(struct kaelz4_instance *instance, int alg_comp_type, int comp_optype) { - if (kz_ctx->wd_ctx != NULL) { + if (instance->wd_ctx != NULL) { US_WARN("wd ctx is in used by other comp"); return KAEZIP_FAILED; } - struct wd_queue *q = kz_ctx->q_node->kae_wd_queue; + struct wd_queue *q = instance->q_node->kae_wd_queue; - kz_ctx->setup.alg_type = (enum wcrypto_comp_alg_type)alg_comp_type; - kz_ctx->setup.op_type = (enum wcrypto_comp_optype)comp_optype; - kz_ctx->setup.stream_mode = (enum wcrypto_comp_state)WCRYPTO_COMP_STATELESS; - if (kz_ctx->is_sgl) - kz_ctx->setup.data_fmt = WD_SGL_BUF; + instance->setup.alg_type = (enum wcrypto_comp_alg_type)alg_comp_type; + instance->setup.op_type = (enum wcrypto_comp_optype)comp_optype; + instance->setup.stream_mode = (enum wcrypto_comp_state)WCRYPTO_COMP_STATELESS; + if (instance->q_node->is_sgl) + instance->setup.data_fmt = WD_SGL_BUF; - kz_ctx->wd_ctx = wcrypto_create_comp_ctx(q, &kz_ctx->setup); - if (kz_ctx->wd_ctx == NULL) { + instance->wd_ctx = wcrypto_create_comp_ctx(q, &instance->setup); + if (instance->wd_ctx == NULL) { US_ERR("wd create kae comp ctx fail!"); return KAEZIP_FAILED; } - kz_ctx->comp_alg_type = alg_comp_type; - kz_ctx->comp_type = comp_optype; - return KAEZIP_SUCCESS; } +static struct kaelz4_instance *kaelz4_new_instance(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_type, int comp_optype, int is_sgl) +{ + struct kaelz4_instance *instance = (struct kaelz4_instance *)kae_malloc(sizeof(struct kaelz4_instance)); + + if (instance == NULL) { + US_ERR("failed to alloc kaelz4 instance"); + return NULL; + } + + memset(instance, 0, sizeof(struct kaelz4_instance)); + + instance->q_node = q_node; + instance->total_num = MAX_KAE_CTX_DEPTH; + instance->setup.comp_lv = kaelz4_get_comp_lv(); + instance->setup.win_size = kaelz4_get_win_size(); + instance->setup.br.usr = q_node->kae_queue_mem_pool; + instance->setup.cb = kaelz4_ctx_callback; + + if (is_sgl) { + instance->setup.br.alloc = kaelz4_wd_alloc_sgl; + instance->setup.br.free = kaelz4_wd_free_sgl; + instance->setup.br.iova_map = kaelz4_dma_map_sgl; + instance->setup.br.iova_unmap = kaelz4_dma_unmap_sgl; + } else { + instance->setup.br.alloc = kaelz4_wd_alloc_blk; + instance->setup.br.free = kaelz4_wd_free_blk; + instance->setup.br.iova_map = kaelz4_dma_map_blk; + instance->setup.br.iova_unmap = kaelz4_dma_unmap_blk; + } + + if (kaelz4_create_wd_ctx(instance, alg_comp_type, comp_optype) == KAEZIP_FAILED) { + US_ERR("create wd ctx fail!"); + kae_free(instance); + return NULL; + } + return instance; +} + +void kaelz4_free_instance(void *arg) +{ + struct kaelz4_instance *instance = arg; + + for (int i = 0; i < instance->total_num; i++) { + if (instance->kz_ctx[i]) { + kaelz4_free_kz_ctx(instance->kz_ctx[i]); + instance->kz_ctx[i] = NULL; + } + } + + if (instance->wd_ctx != NULL) { + wcrypto_del_comp_ctx(instance->wd_ctx); // scy: TBM + instance->wd_ctx = NULL; + } + + kae_free(instance); +} + +__thread struct kaelz4_instance *g_cur_instance; kaelz4_ctx_t* kaelz4_get_ctx(int alg_comp_type, int comp_optype, int is_sgl) { KAE_QUEUE_DATA_NODE_S *q_node = NULL; kaelz4_ctx_t *kz_ctx = NULL; - KAE_QUEUE_POOL_HEAD_S* qp = kaelz4_get_qp(comp_optype); + if(unlikely(!qp)) { US_ERR("failed to get hardware queue pool"); return NULL; } - q_node = kaelz4_get_node_from_pool(qp, alg_comp_type, comp_optype, is_sgl); - if (q_node == NULL) { - kaelz4_queue_pool_check_and_release(qp, kaelz4_free_kz_ctx); + if (g_cur_instance == NULL) { q_node = kaelz4_get_node_from_pool(qp, alg_comp_type, comp_optype, is_sgl); - if (q_node == NULL) { - US_ERR("failed to get hardware queue"); - return NULL; + kaelz4_queue_pool_check_and_release(qp, kaelz4_free_instance); + q_node = kaelz4_get_node_from_pool(qp, alg_comp_type, comp_optype, is_sgl); + + if (q_node == NULL) { + kae_free(g_cur_instance); + g_cur_instance = NULL; + US_ERR("failed to get hardware queue"); + return NULL; + } + } + + if (q_node->priv_ctx == NULL) { + g_cur_instance = kaelz4_new_instance(q_node, alg_comp_type, comp_optype, is_sgl); + if (g_cur_instance == NULL) { + US_ERR("create instance fail!"); + (void)kaelz4_put_node_to_pool(qp, q_node, kaelz4_free_instance); + return NULL; + } + q_node->priv_ctx = g_cur_instance; + } else { + g_cur_instance = q_node->priv_ctx; } + } else { + q_node = g_cur_instance->q_node; } - kz_ctx = (kaelz4_ctx_t *)q_node->priv_ctx; + kz_ctx = g_cur_instance->kz_ctx[g_cur_instance->cur_idx]; if (kz_ctx == NULL) { - kz_ctx = kaelz4_new_ctx(q_node, alg_comp_type, comp_optype, is_sgl); + kz_ctx = kaelz4_new_ctx(g_cur_instance, alg_comp_type, comp_optype, is_sgl); if (kz_ctx == NULL) { - US_ERR("kaezip new engine ctx fail!"); - (void)kaelz4_put_node_to_pool(qp, q_node, kaelz4_free_kz_ctx); + if (g_cur_instance->cur_idx == 0) { + (void)kaelz4_put_node_to_pool(qp, q_node, kaelz4_free_instance); + } + g_cur_instance = NULL; return NULL; } + g_cur_instance->kz_ctx[g_cur_instance->cur_idx] = kz_ctx; } - kz_ctx->q_node = q_node; kaelz4_init_ctx(kz_ctx); + kz_ctx->index = g_cur_instance->cur_idx; + g_cur_instance->cur_idx++; + if (g_cur_instance->cur_idx == g_cur_instance->total_num) { + g_cur_instance = NULL; + } return kz_ctx; } @@ -289,9 +347,18 @@ void kaelz4_put_ctx(kaelz4_ctx_t* kz_ctx) } if (kz_ctx->q_node != NULL) { + struct kaelz4_instance *instance = (struct kaelz4_instance *)kz_ctx->q_node->priv_ctx; + temp = kz_ctx->q_node; - kz_ctx->q_node = NULL; - (void)kaelz4_put_node_to_pool(kaelz4_get_qp(kz_ctx->comp_type), temp, kaelz4_free_kz_ctx); + instance->free_num++; + if (instance->free_num == instance->cur_idx) { + (void)kaelz4_put_node_to_pool(kaelz4_get_qp(kz_ctx->comp_type), temp, kaelz4_free_instance); + instance->cur_idx = 0; + instance->free_num = 0; + if (instance == g_cur_instance) { + g_cur_instance = NULL; + } + } } kz_ctx = NULL; @@ -306,7 +373,18 @@ void kaelz4_free_ctx(kaelz4_ctx_t* kz_ctx) return; } - kaelz4_free_wd_queue_memory(kz_ctx->q_node, kaelz4_free_kz_ctx); + struct kaelz4_instance *instance = (struct kaelz4_instance *)kz_ctx->q_node->priv_ctx; + + instance->kz_ctx[kz_ctx->index] = NULL; + kaelz4_free_kz_ctx(kz_ctx); + + instance->free_num++; + if (instance->free_num == instance->cur_idx) { + kaelz4_free_wd_queue_memory(kz_ctx->q_node, kaelz4_free_instance); + if (instance == g_cur_instance) { + g_cur_instance = NULL; + } + } } static int kaelz4_driver_do_comp_impl(kaelz4_ctx_t* kz_ctx) @@ -461,7 +539,7 @@ static KAE_QUEUE_POOL_HEAD_S* kaelz4_get_qp(int algtype) pthread_mutex_unlock(&g_kaelz4_deflate_pool_init_mutex); return g_kaelz4_deflate_qp; } - kaelz4_queue_pool_destroy(g_kaelz4_deflate_qp, kaelz4_free_kz_ctx); + kaelz4_queue_pool_destroy(g_kaelz4_deflate_qp, kaelz4_free_instance); g_kaelz4_deflate_qp = kaelz4_init_queue_pool(algtype); pthread_mutex_unlock(&g_kaelz4_deflate_pool_init_mutex); @@ -475,7 +553,7 @@ static KAE_QUEUE_POOL_HEAD_S* kaelz4_get_qp(int algtype) pthread_mutex_unlock(&g_kaelz4_inflate_pool_init_mutex); return g_kaelz4_inflate_qp; } - kaelz4_queue_pool_destroy(g_kaelz4_inflate_qp, kaelz4_free_kz_ctx); + kaelz4_queue_pool_destroy(g_kaelz4_inflate_qp, kaelz4_free_instance); g_kaelz4_inflate_qp = kaelz4_init_queue_pool(algtype); pthread_mutex_unlock(&g_kaelz4_inflate_pool_init_mutex); @@ -488,11 +566,11 @@ static KAE_QUEUE_POOL_HEAD_S* kaelz4_get_qp(int algtype) void kaelz4_free_all_qps(void) { pthread_mutex_lock(&g_kaelz4_deflate_pool_init_mutex); - kaelz4_queue_pool_destroy(g_kaelz4_deflate_qp, kaelz4_free_kz_ctx); + kaelz4_queue_pool_destroy(g_kaelz4_deflate_qp, kaelz4_free_instance); g_kaelz4_deflate_qp = NULL; pthread_mutex_unlock(&g_kaelz4_deflate_pool_init_mutex); pthread_mutex_lock(&g_kaelz4_inflate_pool_init_mutex); - kaelz4_queue_pool_destroy(g_kaelz4_inflate_qp, kaelz4_free_kz_ctx); + kaelz4_queue_pool_destroy(g_kaelz4_inflate_qp, kaelz4_free_instance); g_kaelz4_inflate_qp = NULL; pthread_mutex_unlock(&g_kaelz4_inflate_pool_init_mutex); } diff --git a/KAELz4/src/v1/kaelz4_ctx.h b/KAELz4/src/v1/kaelz4_ctx.h index 2dd2ff2..e1b8cbe 100644 --- a/KAELz4/src/v1/kaelz4_ctx.h +++ b/KAELz4/src/v1/kaelz4_ctx.h @@ -11,6 +11,8 @@ #include "wd_queue_memory.h" #include "uadk/v1/wd_comp.h" +#define MAX_KAE_CTX_DEPTH 2 + enum kaelz4_comp_status { KAEZIP_COMP_INIT = 0, KAEZIP_COMP_DOING, @@ -49,20 +51,35 @@ struct kaelz4_ctx { int comp_type; // WCRYPTO_DEFLATE / WCRYPTO_INFLATE unsigned int do_comp_len; // a compress proccess cost len int status; // enum kaelz4_comp_status - int is_sgl; + unsigned int index; struct wcrypto_end_block end_block; - KAE_QUEUE_DATA_NODE_S* q_node; - struct wcrypto_comp_ctx_setup setup; + KAE_QUEUE_DATA_NODE_S *q_node; + struct wcrypto_comp_ctx_setup *setup; struct wcrypto_comp_op_data op_data; struct wcrypto_lz77_zstd_format lz4_data; void* wd_ctx; struct wcrypto_zstd_out output; wd_map usr_map; - unsigned char sgl[32 + (32 * 60)]; // 32: sizeof(struct wd_sgl) + sizeof(struct wd_sge) * 60 + unsigned char src_sgl_buf[32 + (32 * 60)]; // 32: sizeof(struct wd_sgl) + sizeof(struct wd_sge) * 60 + unsigned char dst_sgl_buf[32 + (32 * 60)]; // 32: sizeof(struct wd_sgl) + sizeof(struct wd_sge) * 60 + void *src_sgl; + void *dst_sgl_usr; + void *dst_sgl_kernel; void (*callback)(int status, void *param); void* param; }; + +struct kaelz4_instance { + KAE_QUEUE_DATA_NODE_S *q_node; + void *wd_ctx; + struct kaelz4_ctx *kz_ctx[MAX_KAE_CTX_DEPTH]; + struct wcrypto_comp_ctx_setup setup; + unsigned int total_num; + unsigned int cur_idx; + unsigned int free_num; +}; + typedef struct kaelz4_ctx kaelz4_ctx_t; kaelz4_ctx_t* kaelz4_get_ctx(int alg_comp_type, int comp_optype, int is_sgl); diff --git a/KAELz4/src/v1/wd_queue_memory.c b/KAELz4/src/v1/wd_queue_memory.c index 4e059ca..aa2c9e7 100644 --- a/KAELz4/src/v1/wd_queue_memory.c +++ b/KAELz4/src/v1/wd_queue_memory.c @@ -75,8 +75,8 @@ void* kaelz4_create_sgl_mempool(struct wd_queue *q) setup.align_size = 64; setup.sge_num_in_sgl = 1; setup.buf_num_in_sgl = setup.sge_num_in_sgl; - setup.sgl_num = 4; - setup.buf_num = setup.buf_num_in_sgl * setup.sgl_num + setup.sgl_num + 2; + setup.sgl_num = MAX_KAE_CTX_DEPTH * 2; // SGL模式下,每个SGL output需要两段buf分别给seq和lit + setup.buf_num = setup.buf_num_in_sgl * setup.sgl_num + setup.sgl_num * 2 + 2; void *mempool = wd_sglpool_create(q, &setup); return mempool; @@ -85,7 +85,7 @@ void* kaelz4_create_sgl_mempool(struct wd_queue *q) void* kaelz4_create_alg_wd_queue_mempool(struct wd_queue *q) { unsigned int block_size = COMP_BLOCK_SIZE; - unsigned int block_num = COMP_BLOCK_NUM; + unsigned int block_num = COMP_BLOCK_NUM * MAX_KAE_CTX_DEPTH; struct wd_blkpool_setup setup; memset(&setup, 0, sizeof(setup)); -- Gitee From d9e047305959f501a5184ba5cfde180680a0afe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Tue, 1 Jul 2025 07:07:04 +0000 Subject: [PATCH 2/3] feat: add lz77 raw data format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAELz4/src/v1/kaelz4_comp.c | 230 +++++++++++++++++++++++++++--------- 1 file changed, 174 insertions(+), 56 deletions(-) diff --git a/KAELz4/src/v1/kaelz4_comp.c b/KAELz4/src/v1/kaelz4_comp.c index 46e8b25..882a95f 100644 --- a/KAELz4/src/v1/kaelz4_comp.c +++ b/KAELz4/src/v1/kaelz4_comp.c @@ -29,7 +29,7 @@ static int kaelz4_data_parsing(LZ4_CCtx* zc, kaelz4_ctx_t* config) return KAE_LZ4_INVAL_PARA; } - if (config->is_sgl) { + if (config->q_node->is_sgl) { zc->seqStore.litStart = wd_get_first_sge_buf(config->lz4_data.literals_start); } else { zc->seqStore.litStart = config->lz4_data.literals_start; @@ -37,7 +37,7 @@ static int kaelz4_data_parsing(LZ4_CCtx* zc, kaelz4_ctx_t* config) zc->seqStore.lit = zc->seqStore.litStart; zc->seqStore.lit += config->lz4_data.lit_num; - if (config->is_sgl) { + if (config->q_node->is_sgl) { zc->seqStore.sequencesStart = wd_get_first_sge_buf(config->lz4_data.sequences_start); } else { zc->seqStore.sequencesStart = config->lz4_data.sequences_start; @@ -275,14 +275,14 @@ static void kaelz4_compress_async_callback(struct kaelz4_compress_ctx *compress_ struct kaelz4_result *result = compress_ctx->result; result->status = status; result->dst_len = compress_ctx->dst_len; - if (result->ibuf_crc != NULL && status == KAE_LZ4_SUCC) { + if (result->ibuf_crc != NULL && status == KAE_LZ4_SUCC && compress_ctx->data_format != KAELZ4_ASYNC_LZ77_RAW) { for (int i = 0; i < compress_ctx->src->buf_num; i++) { *result->ibuf_crc = KAELZ4CRC32(*result->ibuf_crc, compress_ctx->src->buf[i].data, compress_ctx->src->buf[i].buf_len); } } - if (result->obuf_crc != NULL && status == KAE_LZ4_SUCC) { + if (result->obuf_crc != NULL && status == KAE_LZ4_SUCC && compress_ctx->data_format != KAELZ4_ASYNC_LZ77_RAW) { *result->obuf_crc = KAELZ4CRC32(*result->obuf_crc, compress_ctx->dst->buf[0].data, compress_ctx->dst_len); } @@ -295,7 +295,7 @@ static void kaelz4_compress_async_callback(struct kaelz4_compress_ctx *compress_ } static int kaelz4_triples_rebuild(struct kaelz4_async_req *req, const struct wd_buf_list *source, - void *dest) + void *dest, struct kaelz4_priv_save_info *save_info) { unsigned int cur_buf_idx = 0; const BYTE* ip = (const BYTE*) source->buf[0].data; @@ -410,7 +410,7 @@ static int kaelz4_triples_rebuild(struct kaelz4_async_req *req, const struct wd_ // 1、对于非last subblock:first new seq生成时继承prev subblock的last literal;cur subblock的尾部last literal信息更新至ctx中 // 2、对于last subblock:first new seq生成时继承prev subblock的last literal;cur subblock的尾部last literal生成last seq格式 static int kaelz4_triples_rebuild_64Kblock(struct kaelz4_async_req *req, const struct wd_buf_list *source, - void *dest) + void *dest, struct kaelz4_priv_save_info *save_info) { unsigned int cur_buf_idx = 0; const BYTE* ip = (const BYTE*) source->buf[0].data; @@ -451,7 +451,7 @@ static int kaelz4_triples_rebuild_64Kblock(struct kaelz4_async_req *req, const s } mlBase -= 1; - litLength += tempLiteralLength + req->compress_ctx->prev_last_lit_len; + litLength += tempLiteralLength + save_info->prev_last_lit_len; tempLiteralLength = 0; token = op++; @@ -468,16 +468,16 @@ static int kaelz4_triples_rebuild_64Kblock(struct kaelz4_async_req *req, const s } // 满足生成first new sequence条件,继承prev subblock的last literal - if (req->compress_ctx->prev_last_lit_ptr != NULL) { - struct kaelz4_buffer *last_buf = &req->compress_ctx->src->buf[req->compress_ctx->prev_last_lit_buf_index]; - size_t last_buf_remain = last_buf->data + last_buf->buf_len - req->compress_ctx->prev_last_lit_ptr; - kaelz4_wild_copy16_from_buffers(req->compress_ctx->src, &req->compress_ctx->prev_last_lit_buf_index, - (const BYTE **)&req->compress_ctx->prev_last_lit_ptr, &last_buf_remain, - op, req->compress_ctx->prev_last_lit_len); - op += req->compress_ctx->prev_last_lit_len; - litLength -= req->compress_ctx->prev_last_lit_len; - req->compress_ctx->prev_last_lit_ptr = NULL; - req->compress_ctx->prev_last_lit_len = 0; + if (save_info->prev_last_lit_ptr != NULL) { + struct kaelz4_buffer *last_buf = &save_info->src->buf[save_info->prev_last_lit_buf_index]; + size_t last_buf_remain = last_buf->data + last_buf->buf_len - save_info->prev_last_lit_ptr; + kaelz4_wild_copy16_from_buffers(save_info->src, &save_info->prev_last_lit_buf_index, + (const BYTE **)&save_info->prev_last_lit_ptr, &last_buf_remain, + op, save_info->prev_last_lit_len); + op += save_info->prev_last_lit_len; + litLength -= save_info->prev_last_lit_len; + save_info->prev_last_lit_ptr = NULL; + save_info->prev_last_lit_len = 0; } U32 tmp_len = litLength + mlBase + 4; if (likely(tmp_len < ip_buf_remain)) { @@ -582,16 +582,16 @@ static int kaelz4_triples_rebuild_64Kblock(struct kaelz4_async_req *req, const s // 非last subblock,刷新ctx中的prev subblock literal 信息 if (req->last != 1) { // 本分块不满足rebuild new seq:1)subblock完全不可压,全是literal;2)三元组全是matchlen=0。本分块作为literal留给后续分块的first new seq继承,且存在跨多个分块情况 - if (unlikely(req->compress_ctx->prev_last_lit_ptr != NULL)) { - req->compress_ctx->prev_last_lit_len += tempLiteralLength; + if (unlikely(save_info->prev_last_lit_ptr != NULL)) { + save_info->prev_last_lit_len += tempLiteralLength; } else { // 本分块满足rebuild new seq:记录尾部的literal部分信息,留给后续分块的first new seq继承 - req->compress_ctx->prev_last_lit_ptr = (void *)ip; - req->compress_ctx->prev_last_lit_buf_index = req->buf_start_index + cur_buf_idx; - req->compress_ctx->prev_last_lit_len = tempLiteralLength; + save_info->prev_last_lit_ptr = (void *)ip; + save_info->prev_last_lit_buf_index = req->buf_start_index + cur_buf_idx; + save_info->prev_last_lit_len = tempLiteralLength; } } else { // last subblock,生成last seq // 继承prev subblock的last literal - tempLiteralLength += req->compress_ctx->prev_last_lit_len; + tempLiteralLength += save_info->prev_last_lit_len; token = op++; if (tempLiteralLength >= RUN_MASK) { @@ -606,16 +606,16 @@ static int kaelz4_triples_rebuild_64Kblock(struct kaelz4_async_req *req, const s *token = (BYTE)(tempLiteralLength << ML_BITS); } - if (req->compress_ctx->prev_last_lit_ptr != NULL) { - struct kaelz4_buffer *last_buf = &req->compress_ctx->src->buf[req->compress_ctx->prev_last_lit_buf_index]; - size_t last_buf_remain = last_buf->data + last_buf->buf_len - req->compress_ctx->prev_last_lit_ptr; - kaelz4_wild_copy16_from_buffers(req->compress_ctx->src, &req->compress_ctx->prev_last_lit_buf_index, - (const BYTE **)&req->compress_ctx->prev_last_lit_ptr, &last_buf_remain, - op, req->compress_ctx->prev_last_lit_len); - op += req->compress_ctx->prev_last_lit_len; - tempLiteralLength -= req->compress_ctx->prev_last_lit_len; - req->compress_ctx->prev_last_lit_ptr = NULL; - req->compress_ctx->prev_last_lit_len = 0; + if (save_info->prev_last_lit_ptr != NULL) { + struct kaelz4_buffer *last_buf = &save_info->src->buf[save_info->prev_last_lit_buf_index]; + size_t last_buf_remain = last_buf->data + last_buf->buf_len - save_info->prev_last_lit_ptr; + kaelz4_wild_copy16_from_buffers(save_info->src, &save_info->prev_last_lit_buf_index, + (const BYTE **)&save_info->prev_last_lit_ptr, &last_buf_remain, + op, save_info->prev_last_lit_len); + op += save_info->prev_last_lit_len; + tempLiteralLength -= save_info->prev_last_lit_len; + save_info->prev_last_lit_ptr = NULL; + save_info->prev_last_lit_len = 0; } wd_copy_from_buffers(source, &cur_buf_idx, &ip, &ip_buf_remain, op, tempLiteralLength); op += tempLiteralLength; @@ -633,8 +633,16 @@ static void kaelz4_async_compress_cb(int status, void *param) kaelz4_ctx_t* kaelz4_ctx = (kaelz4_ctx_t*)zc->kaeConfig; struct wcrypto_comp_op_data *op_data = &kaelz4_ctx->op_data; - if (kaelz4_ctx->is_sgl) - wd_destory_sgl(kaelz4_ctx->q_node->kae_wd_queue, kaelz4_ctx->q_node->kae_queue_mem_pool, (void *)kaelz4_ctx->sgl); + if (kaelz4_ctx->q_node->is_sgl) { + if (kaelz4_ctx->src_sgl != NULL) { + wd_destory_sgl(kaelz4_ctx->q_node->kae_wd_queue, kaelz4_ctx->q_node->kae_queue_mem_pool, kaelz4_ctx->src_sgl); + kaelz4_ctx->src_sgl = NULL; + } + if (kaelz4_ctx->dst_sgl_usr != NULL) { + wd_destory_sgl(kaelz4_ctx->q_node->kae_wd_queue, kaelz4_ctx->q_node->kae_queue_mem_pool, kaelz4_ctx->dst_sgl_usr); + kaelz4_ctx->dst_sgl_usr = NULL; + } + } if (status != 0) { US_ERR("kaelz4_async_compress_cb status %d !\n", status); @@ -663,13 +671,26 @@ static void kaelz4_async_compress_cb(int status, void *param) req->done = 1; } -static void kaelz4_fill_sgl_buffer(kaelz4_ctx_t *kz_ctx, const struct wd_buf_list *src) +static void kaelz4_fill_sgl_buffer(kaelz4_ctx_t *kz_ctx, const struct wd_buf_list *src, struct wd_buf_list *dst) { struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; op_data->in_len = 0; - wd_build_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, (void *)kz_ctx->sgl, src, + kz_ctx->src_sgl = kz_ctx->src_sgl_buf; + wd_build_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->src_sgl, src, (wd_map)kz_ctx->usr_map); + + if (dst->buf_num) { + kz_ctx->dst_sgl_usr = kz_ctx->dst_sgl_buf; + wd_build_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->dst_sgl_usr, dst, + (wd_map)kz_ctx->usr_map); + kz_ctx->output.sequence = kz_ctx->dst_sgl_usr; + kz_ctx->output.seq_sz = dst->buf[0].buf_len; + + } else { + kz_ctx->output.sequence = kz_ctx->dst_sgl_kernel; + kz_ctx->output.seq_sz = COMP_BLOCK_SIZE; + } op_data->in_len += kz_ctx->do_comp_len; op_data->avail_out = KAEZIP_STREAM_CHUNK_OUT; op_data->flush = kz_ctx->flush; @@ -694,7 +715,7 @@ static void kaelz4_fill_flat_buffer(kaelz4_ctx_t *kz_ctx, const struct wd_buf_li op_data->stream_pos = WCRYPTO_COMP_STREAM_NEW; } -static int kaelz4_compress_async_impl(LZ4_CCtx* zc, const struct wd_buf_list *src, size_t srcSize, void *usr_data) +static int kaelz4_compress_async_impl(LZ4_CCtx* zc, const struct wd_buf_list *src, struct wd_buf_list *dst, size_t srcSize, void *usr_data) { kaelz4_ctx_t* kaelz4_ctx = (kaelz4_ctx_t*)zc->kaeConfig; if (kaelz4_ctx == NULL || src == NULL || srcSize == 0) { @@ -715,8 +736,8 @@ static int kaelz4_compress_async_impl(LZ4_CCtx* zc, const struct wd_buf_list *sr kaelz4_ctx->callback = kaelz4_async_compress_cb; kaelz4_ctx->param = usr_data; - if (kaelz4_ctx->is_sgl) - kaelz4_fill_sgl_buffer(kaelz4_ctx, src); + if (kaelz4_ctx->q_node->is_sgl) + kaelz4_fill_sgl_buffer(kaelz4_ctx, src, dst); else kaelz4_fill_flat_buffer(kaelz4_ctx, src); @@ -725,6 +746,17 @@ static int kaelz4_compress_async_impl(LZ4_CCtx* zc, const struct wd_buf_list *sr static void kaelz4_find_and_free_kz_ctx(struct kaelz4_async_ctrl *ctrl, kaelz4_ctx_t *kz_ctx) { + if (kz_ctx->q_node->is_sgl) { + if (kz_ctx->src_sgl) { + wd_destory_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->src_sgl); + kz_ctx->src_sgl = NULL; + } + if (kz_ctx->dst_sgl_usr) { + wd_destory_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->dst_sgl_usr); + kz_ctx->dst_sgl_usr = NULL; + } + } + for (int i = 0; i < MAX_NUM_IN_COMP; i++) { if (ctrl->kz_ctx[i] == kz_ctx) { kaelz4_free_ctx(ctrl->kz_ctx[i]); @@ -856,14 +888,14 @@ static int KAELZ4BlockFooterGen(unsigned char *dstPtr, uint32_t compressed_len) } static int kaelz4_async_frame_padding(struct kaelz4_async_req *req, const struct wd_buf_list *source, - void *dst_tmp) + void *dst_tmp, struct kaelz4_priv_save_info *save_info) { int ret = 0; int padding_len = 0; void *dst_after_frameheader = dst_tmp; - LZ4F_frameInfo_t frameinfo_ptr = req->compress_ctx->preferences.frameInfo; + LZ4F_frameInfo_t frameinfo_ptr = save_info->preferences.frameInfo; - if (req->compress_ctx->src->buf_num != 1) { + if (save_info->src->buf_num != 1) { frameinfo_ptr.contentChecksumFlag = LZ4F_noContentChecksum; } // 如果是第一个block块,添加frame头部 @@ -876,7 +908,7 @@ static int kaelz4_async_frame_padding(struct kaelz4_async_req *req, const struct dst_tmp += 4; // 直接往后偏移4个字节(KAELZ4BlockHeaderGen 的返回值), 预留block头的空间 // 写入真实 block 数据 - ret = kaelz4_triples_rebuild(req, source, dst_tmp); + ret = kaelz4_triples_rebuild(req, source, dst_tmp, save_info); if (ret < 0) { return ret; } @@ -910,8 +942,8 @@ static int kaelz4_async_frame_padding(struct kaelz4_async_req *req, const struct // 如果是最后一个block块,添加frame尾部 if (req->last == 1) { int contentChecksum = frameinfo_ptr.contentChecksumFlag; - int len4 = KAELZ4FooterGen(dst_tmp, (unsigned char *)req->compress_ctx->src->buf[0].data, - req->compress_ctx->srcSize, contentChecksum); + int len4 = KAELZ4FooterGen(dst_tmp, (unsigned char *)save_info->src->buf[0].data, + save_info->src->buf[0].buf_len, contentChecksum); padding_len += len4; } @@ -919,6 +951,20 @@ static int kaelz4_async_frame_padding(struct kaelz4_async_req *req, const struct return ret; } +static int kaelz4_async_lz77_post_handle(struct kaelz4_async_req *req, const struct wd_buf_list *source, + void *dst_tmp, struct kaelz4_priv_save_info *save_info) +{ + struct kaelz4_seq_result *req_result = dst_tmp; + + if (req->special_flag) { + req_result->seq_num = 0; + } else { + req_result->seq_num = req->zc.seqnum; + } + + return KAE_LZ77_SEQ_DATA_SIZE_PER_64K; +} + int kaelz4_async_is_thread_do_comp_full(struct kaelz4_async_ctrl *ctrl) { return ctrl->cur_num_in_comp < MAX_NUM_IN_COMP ? FALSE : TRUE; @@ -1011,7 +1057,7 @@ static int kaelz4_async_sw_compress(struct kaelz4_async_ctrl *ctrl, struct kaelz compress_ctx->status = KAE_LZ4_SUCC; if (compress_ctx->data_format == KAELZ4_ASYNC_FRAME && ctrl->sw_compress_frame != NULL) { ret = ctrl->sw_compress_frame(compress_ctx->dst->buf[0].data, compress_ctx->dstCapacity, compress_ctx->src->buf[0].data, - compress_ctx->srcSize, &compress_ctx->preferences); + compress_ctx->srcSize, &compress_ctx->save_info.preferences); } else if (compress_ctx->data_format <= KAELZ4_ASYNC_BLOCK && ctrl->sw_compress != NULL) { ret = ctrl->sw_compress(compress_ctx->src->buf[0].data, compress_ctx->dst->buf[0].data, compress_ctx->srcSize, compress_ctx->dstCapacity); @@ -1040,7 +1086,8 @@ int kaelz4_async_compress_polling(struct kaelz4_async_ctrl *ctrl, int budget) if (likely(compress_ctx->status == KAE_LZ4_SUCC)) { ret = compress_ctx->kaelz4_post_process_handle(req, &req->src, - compress_ctx->dst->buf[0].data + compress_ctx->dst_len); + compress_ctx->dst->buf[0].data + compress_ctx->dst_len, + &compress_ctx->save_info); if (ret < 0) { US_ERR("kaelz4_post_process_handle err. ret=%d\n", ret); } @@ -1173,7 +1220,7 @@ static int kaelz4_send_async_compress(struct kaelz4_async_ctrl *ctrl, struct kae return ret; } size_t compress_size = req->src_size - MFLIMIT; - ret = kaelz4_compress_async_impl(&req->zc, &req->src, compress_size, (void *)req); + ret = kaelz4_compress_async_impl(&req->zc, &req->src, &req->dst, compress_size, (void *)req); if (unlikely(ret != KAE_LZ4_SUCC)) { kaelz4_find_and_free_kz_ctx(ctrl, (kaelz4_ctx_t *)req->zc.kaeConfig); ctrl->ctx_index = (ctrl->ctx_index + MAX_NUM_IN_COMP - 1) % MAX_NUM_IN_COMP; @@ -1185,8 +1232,22 @@ static int kaelz4_send_async_compress(struct kaelz4_async_ctrl *ctrl, struct kae return ret; } -static void kaelz4_fill_hw_req_buf_list(struct kaelz4_async_req *req, const struct kaelz4_buffer_list *src, - unsigned int *index, size_t *offset, size_t rem_len) +static void kaelz4_fill_hw_req_dst_buf_list(struct kaelz4_async_req *req, const struct kaelz4_buffer_list *dst, + enum kae_lz4_async_data_format data_format) +{ + req->dst.buf = req->dst_buffers; + req->dst.buf_num = 0; + req->dst.usr_data = dst->usr_data; + if (data_format == KAELZ4_ASYNC_LZ77_RAW) { + req->dst.buf_num = 1; + struct kaelz4_seq_result *seq_result = dst->buf[0].data + req->idx * KAE_LZ77_SEQ_DATA_SIZE_PER_64K; + req->dst.buf[0].data = seq_result->seq_start; + req->dst.buf[0].buf_len = KAE_LZ77_SEQ_DATA_SIZE_PER_64K - sizeof(seq_result->seq_num); + } +} + +static void kaelz4_fill_hw_req_src_buf_list(struct kaelz4_async_req *req, const struct kaelz4_buffer_list *src, + unsigned int *index, size_t *offset, size_t rem_len) { size_t req_size; @@ -1266,7 +1327,8 @@ static int kaelz4_async_compress_process(struct kaelz4_async_ctrl *ctrl, void *a req->done = 0; req->compress_ctx = compress_ctx; req->next = NULL; - kaelz4_fill_hw_req_buf_list(req, compress_ctx->src, &buf_index, &buf_offset, remainingLength); + kaelz4_fill_hw_req_src_buf_list(req, compress_ctx->src, &buf_index, &buf_offset, remainingLength); + kaelz4_fill_hw_req_dst_buf_list(req, compress_ctx->dst, compress_ctx->data_format); remainingLength -= req->src_size; // 最后一块实际下发给芯片的长度是 src_size - MFLIMIT if (remainingLength == 0) { @@ -1309,6 +1371,7 @@ const kaelz4_post_process_handle_t g_post_process_handle[KAELZ4_ASYNC_BUTT] = { [KAELZ4_ASYNC_SMALL_BLOCK] = kaelz4_triples_rebuild, [KAELZ4_ASYNC_BLOCK] = kaelz4_triples_rebuild_64Kblock, [KAELZ4_ASYNC_FRAME] = kaelz4_async_frame_padding, + [KAELZ4_ASYNC_LZ77_RAW] = kaelz4_async_lz77_post_handle, }; int kaelz4_compress_async(struct kaelz4_async_ctrl *ctrl, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, @@ -1325,18 +1388,18 @@ int kaelz4_compress_async(struct kaelz4_async_ctrl *ctrl, const struct kaelz4_bu compress_ctx->dstCapacity = result->dst_len; compress_ctx->src = src; compress_ctx->srcSize = result->src_size; - compress_ctx->recv_cnt = 0; compress_ctx->callback = callback; compress_ctx->result = result; compress_ctx->data_format = data_format; - compress_ctx->preferences = *ptr; compress_ctx->kaelz4_post_process_handle = g_post_process_handle[data_format]; compress_ctx->dst_len = 0; compress_ctx->next = NULL; compress_ctx->status = KAE_LZ4_SUCC; compress_ctx->req_list = NULL; - compress_ctx->prev_last_lit_ptr = NULL; - compress_ctx->prev_last_lit_len = 0; + compress_ctx->save_info.preferences = *ptr; + compress_ctx->save_info.prev_last_lit_ptr = NULL; + compress_ctx->save_info.prev_last_lit_len = 0; + compress_ctx->save_info.src = src; if (ctrl->ctx_head) { ctrl->tail->next = compress_ctx; @@ -1367,3 +1430,58 @@ err_callback: callback(result); return KAE_LZ4_ALLOC_FAIL; } + +int kaelz4_triples_rebuild_impl(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *tuple_buf, struct kaelz4_buffer_list *dst, + struct kaelz4_result *result, enum kae_lz4_async_data_format data_format, const LZ4F_preferences_t *ptr) +{ + size_t remainingLength = result->src_size; // 该值用于保存剩余的待压缩数据长度 + unsigned int buf_index = 0; + size_t buf_offset = 0; + size_t dst_len = 0; + int idx = 0; + struct kaelz4_seq_result *seq_result = tuple_buf->buf[0].data; + struct kaelz4_priv_save_info save_info = {0}; + int ret; + + save_info.src = src; + if (ptr) + save_info.preferences = *ptr; + + while (remainingLength) { + struct kaelz4_async_req req; + + req.special_flag = 0; + req.last = 0; + req.idx = idx; + kaelz4_fill_hw_req_src_buf_list(&req, src, &buf_index, &buf_offset, remainingLength); + remainingLength -= req.src_size; + // 最后一块实际下发给芯片的长度是 src_size - MFLIMIT + if (remainingLength == 0) { + req.last = 1; + } + req.zc.seqStore.sequencesStart = (seqDef *)seq_result->seq_start; + req.zc.seqnum = seq_result->seq_num; + ret = g_post_process_handle[data_format](&req, &req.src, dst->buf[0].data + dst_len, &save_info); + if (ret <= 0) { + result->status = KAE_LZ4_COMP_FAIL; + return KAE_LZ4_COMP_FAIL; + } + + idx++; + dst_len += ret; + seq_result = (void *)seq_result + KAE_LZ77_SEQ_DATA_SIZE_PER_64K; + } + + if (result->ibuf_crc != NULL) { + for (int i = 0; i < src->buf_num; i++) { + *result->ibuf_crc = KAELZ4CRC32(*result->ibuf_crc, src->buf[i].data, src->buf[i].buf_len); + } + } + + if (result->obuf_crc != NULL) { + *result->obuf_crc = KAELZ4CRC32(*result->obuf_crc, dst->buf[0].data, dst_len); + } + + result->dst_len = dst_len; + return KAE_LZ4_SUCC; +} -- Gitee From 4f1e5e506b3ac8a57553ad074613278b5998be2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Tue, 1 Jul 2025 07:11:58 +0000 Subject: [PATCH 3/3] test: add lz77 raw test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- .../test/kzip/alg/KAELz4Async/lz4AsyncLz77.c | 41 ++++++++++ KAELz4/test/kzip/alg/manage.c | 1 + KAELz4/test/kzip/alg/manage.h | 1 + KAELz4/test/kzip/compress_ctx.h | 7 ++ KAELz4/test/kzip/main.c | 75 ++++++++++++++++++- KAELz4/test/kzip/runFunc.sh | 2 +- 6 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c new file mode 100644 index 0000000..ccb263c --- /dev/null +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c @@ -0,0 +1,41 @@ +#include "../manage.h" +#include +#include + +// LZ4 压缩实现 +static int lz4async_block_compress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +{ + return KAELZ4_compress_lz77_async_in_session(sess, src, dst, cb, result); +} + +// LZ4 解压实现 +static int lz4async_block_decompress(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +{ + int ret = LZ4_decompress_async(src, dst, cb, result); + return ret; +} + +static int lz4_bound(int src_len) { + return LZ4_compressBound(src_len); +} +// LZ4 初始化 +static int lz4_async_block_init() { + printf("Initializing LZ4...\n"); + return 0; +} + +// LZ4 算法实例 +compression_algorithm_t lz4async_lz77_algorithm = { + .name = "kaelz4async_lz77", + .async_compress = lz4async_block_compress, + .poll = KAELZ4_compress_async_polling_in_session, + .bound = lz4_bound, + .async_decompress = lz4async_block_decompress, + .init = lz4_async_block_init +}; + +// 注册 LZ4 算法 +void register_lz4async_lz77_algorithm(void) +{ + register_algorithm(&lz4async_lz77_algorithm); +} \ No newline at end of file diff --git a/KAELz4/test/kzip/alg/manage.c b/KAELz4/test/kzip/alg/manage.c index bcb4e51..537a507 100644 --- a/KAELz4/test/kzip/alg/manage.c +++ b/KAELz4/test/kzip/alg/manage.c @@ -48,4 +48,5 @@ void initialize_algorithms(void) { register_lz4_frame_algorithm(); register_lz4async_block_algorithm(); register_lz4async_frame_algorithm(); + register_lz4async_lz77_algorithm(); } \ No newline at end of file diff --git a/KAELz4/test/kzip/alg/manage.h b/KAELz4/test/kzip/alg/manage.h index 36131d0..a8569a5 100644 --- a/KAELz4/test/kzip/alg/manage.h +++ b/KAELz4/test/kzip/alg/manage.h @@ -51,4 +51,5 @@ void register_lz4_algorithm(void); void register_lz4_frame_algorithm(void); void register_lz4async_block_algorithm(void); void register_lz4async_frame_algorithm(void); +void register_lz4async_lz77_algorithm(void); #endif diff --git a/KAELz4/test/kzip/compress_ctx.h b/KAELz4/test/kzip/compress_ctx.h index 8b5a2fb..0b54cf6 100644 --- a/KAELz4/test/kzip/compress_ctx.h +++ b/KAELz4/test/kzip/compress_ctx.h @@ -36,10 +36,12 @@ struct __attribute__((aligned(64))) compress_param { unsigned int dst_len; struct kaelz4_buffer_list src; struct kaelz4_buffer_list dst; + struct kaelz4_buffer_list tuple; uint64_t start_time; volatile unsigned int done; struct kaelz4_buffer src_buf[128]; struct kaelz4_buffer dst_buf[128]; + struct kaelz4_buffer tuple_buf[128]; }; struct compress_ctx { @@ -59,11 +61,16 @@ struct compress_ctx { struct compress_out_buf *out_buf_list; struct compress_out_buf *out_buf_tail; void *page_info; + void *tuple_page_info; + void *tuple_buf; + size_t tuple_buf_offset; + size_t tuple_buf_len; int thread_id; int with_crc; unsigned int src_buf_num; void *sess; uint64_t *all_delays; + int is_lz77_mode; }; diff --git a/KAELz4/test/kzip/main.c b/KAELz4/test/kzip/main.c index 53b1c35..39d7e43 100644 --- a/KAELz4/test/kzip/main.c +++ b/KAELz4/test/kzip/main.c @@ -143,7 +143,7 @@ void *get_huge_pages(size_t total_size) if (addr == MAP_FAILED) { fprintf(stderr, "申请内存大页失败。\n"); fprintf(stderr, "系统可能没有足够的大页可用。\n"); - fprintf(stderr, "请尝试分配更多大页: sudo sysctl vm.nr_hugepages=1000\n"); + fprintf(stderr, "请尝试分配更多大页: sudo sysctl vm.nr_hugepages=10000\n"); exit(EXIT_FAILURE); } @@ -434,6 +434,12 @@ static void compress_async_callback(struct kaelz4_result *result) } struct compress_param *param = (struct compress_param *)result->user_data; + if (param->ctx->is_lz77_mode) { + if (KAELZ4_rebuild_lz77_to_block(¶m->src, ¶m->tuple, ¶m->dst, result) != 0) { + printf("[user]KAELZ4_rebuild_lz77_to_block : %d\n", result->status); + } + } + param->dst_len = result->dst_len; if ((param->ctx->algorithm->async_compress != NULL && param->ctx->compress_or_decompress != 0) || ((param->ctx->algorithm->async_decompress != NULL && param->ctx->compress_or_decompress == 0))) { @@ -447,6 +453,7 @@ static void compress_async_callback(struct kaelz4_result *result) record_latency(param->ctx->all_delays, timeonce, param->sn); } } + param->done = 1; return; } @@ -494,6 +501,17 @@ static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompre ctx->all_delays = (uint64_t *)malloc(sizeof(uint64_t) * MAX_LATENCY_COUNT); memset(ctx->param_buf, 0, ctx->inflight_num * sizeof(struct compress_param)); ctx->param_index = 0; + ctx->is_lz77_mode = 0; + + if (strcmp(algorithm->name, "kaelz4async_lz77") == 0 && ctx->compress_or_decompress != 0) { + if (g_file_chunk_size == 0 || g_file_chunk_size * 1024 > HPAGE_SIZE) { + // TBM: 当前chunk_size超过2M kzip不支持lz77模式,因为大页内存不连续 + ctx->algorithm = get_algorithm("kaelz4async_block"); + return; + } + ctx->is_lz77_mode = 1; + g_enable_polling_mode = 1; + } } static void compress_ctx_destory(struct compress_ctx *ctx) @@ -811,8 +829,22 @@ static int do_comp_with_split_file( param->result.user_data = param; param->result.src_size = chunk_len_this_loop; param->result.dst_len = output_sz_tmp; - ret = do_real_compression( - ctx, ¶m->src, (unsigned int *)&chunk_len_this_loop, ¶m->dst, (unsigned int *)&output_sz_tmp, ¶m->result); + if (!ctx->is_lz77_mode) { + ret = do_real_compression( + ctx, ¶m->src, (unsigned int *)&chunk_len_this_loop, ¶m->dst, (unsigned int *)&output_sz_tmp, ¶m->result); + } else { + param->tuple.buf_num = 1; + param->tuple.buf = param->tuple_buf; + param->tuple.buf[0].data = ctx->tuple_buf + ctx->tuple_buf_offset; + param->tuple.buf[0].buf_len = KAELZ4_compress_get_tuple_buf_len(chunk_len_this_loop); + param->tuple.usr_data = ctx->tuple_page_info; + ctx->tuple_buf_offset += param->tuple.buf[0].buf_len; + if (ctx->tuple_buf_offset > ctx->tuple_buf_len) { + printf("ctx->tuple_buf_offset[0x%lx] > ctx->tuple_buf_len[0x%lx]\n", ctx->tuple_buf_offset, ctx->tuple_buf_len); + return -1; + } + ret = do_real_compression(ctx, ¶m->src, (unsigned int *)&chunk_len_this_loop, ¶m->tuple, (unsigned int *)&output_sz_tmp, ¶m->result); + } if (ret != 0) { printf("Error: do_real_compression error. ret = %d \nexit\n", ret); return ret; @@ -831,6 +863,34 @@ static int do_comp_with_split_file( return ret; } +static int prepare_tuple_buf(struct compress_ctx *ctx, size_t src_len) +{ + size_t tuple_buf_len = KAELZ4_compress_get_tuple_buf_len(g_file_chunk_size * 1024) * (src_len / (g_file_chunk_size * 1024) + 1) * 2; + size_t huge_page_num = tuple_buf_len * sizeof(Bytef) / HPAGE_SIZE + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 + size_t total_size = huge_page_num * HPAGE_SIZE; + ctx->tuple_buf = get_huge_pages(total_size); + printf("申请的tuple buf大页虚拟地址: %p len: 0x%lx\n", ctx->tuple_buf, total_size); + + if (ctx->tuple_buf == NULL) { + return -1; + } + + memset(ctx->tuple_buf, 0, total_size); + + struct cache_page_map* cache = init_cache_page_map(ctx->tuple_buf, total_size); + if (cache == NULL) { + printf("init_cache_page_map failed\n"); + return -1; + } + uint64_t phys_addr = get_physical_address_cache_page_map(cache, ctx->tuple_buf); + + printf("tuple buf大页物理地址: 0x%" PRIx64 "\n", phys_addr); + ctx->tuple_page_info = cache; + ctx->tuple_buf_offset = 0; + ctx->tuple_buf_len = tuple_buf_len; + return 0; +} + static int start_work(struct compress_ctx *ctx, const char* in_filename, const char* out_filename, int multi, int window_bits, int level) { @@ -891,6 +951,12 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c return -1; } + if (ctx->is_lz77_mode) { + if (prepare_tuple_buf(ctx, src_len) != 0) { + return -1; + } + } + struct timeval start, stop; gettimeofday(&start, NULL); unsigned long out_offset = 0; // 用于选择 outbuf 填充数据的偏移值。 @@ -899,6 +965,7 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c ctx->loop_index = j; if (j > 0) { // 为第1次之后的循环的产物复用空间 out_offset = output_sz; + ctx->tuple_buf_offset = ctx->tuple_buf_len / 2; } if (g_file_chunk_size != 0) { // 分片逻辑 ret = do_comp_with_split_file(ctx, inbuf, src_len, outbuf, output_sz, &out_offset); @@ -947,7 +1014,7 @@ static void *start_work_thread(void *arg) int multi = args->multi; int window_bits = args->window_bits; int level = args->level; - if (ctx->compress_or_decompress) + if (ctx->compress_or_decompress || g_file_chunk_size == 0) start_work(ctx, in_filename, out_filename, multi, window_bits, level); else start_work_decompress(ctx, in_filename, out_filename, multi, window_bits, level); diff --git a/KAELz4/test/kzip/runFunc.sh b/KAELz4/test/kzip/runFunc.sh index d8ceea4..bc04d57 100644 --- a/KAELz4/test/kzip/runFunc.sh +++ b/KAELz4/test/kzip/runFunc.sh @@ -5,7 +5,7 @@ export KAE_LZ4_COMP_TYPE=8 sh build.sh kaelz4 -Algthm=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame") +Algthm=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77") Datasets=("calgary" "itemdata" "dickens" "mozilla" "mr" "nci" "ooffice" "osdb" "reymont" "samba" "sao" "webster" "xml" "x-ray") Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") BlockSize=("0" "4" "8" "16" "60" "64" "68" "128" "512" "1024" "2090" "10244") -- Gitee