From ae27971669a3d49be844b1fa58dac6b2b4d1be70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Tue, 28 Oct 2025 20:05:07 +0800
Subject: [PATCH 1/7] open source rectification

---
 .../kernel_impl/faster_gelu_custom.h          | 10 +++----
 .../normalize/kernel_impl/normalize_custom.h  | 20 +++++++-------
 .../welford_finalize_custom_tiling.h          |  2 +-
 .../kernel_impl/welford_finalize_custom.h     | 14 +++++-----
 .../welford_update_custom_tiling.h            |  4 +--
 .../kernel_impl/welford_update_custom.h       | 14 +++++-----
 .../op_host/welford_update_custom_tiling.h    |  4 +--
 examples/reduce/sum/main.cpp                  |  2 +-
 .../reduce/sum/op_host/sum_custom_tiling.cpp  | 16 ++++++------
 .../reduce/sum/op_host/sum_custom_tiling.h    |  2 +-
 .../reduce/sum/op_kernel/sum_custom_impl.h    | 10 +++----
 examples/sort/topk/kernel_impl/topk_custom.h  | 16 ++++++------
 .../topk_custom_tiling.cpp                    |  2 +-
 .../kernel_impl/init_global_memory_custom.h   |  2 +-
 lib/quantization/ascend_antiquant.h           | 24 ++++++++---------
 .../groupnorm/test_operator_groupnorm.cpp     |  9 ++++---
 .../test_operator_welfordfinalize.cpp         |  6 ++---
 .../reduce_all/test_operator_reduce_all.cpp   |  2 +-
 .../reduce_any/test_operator_reduce_any.cpp   |  2 +-
 .../reduce_max/test_operator_reduce_max.cpp   |  2 +-
 .../reduce_mean/test_operator_reduce_mean.cpp |  2 +-
 .../reduce_min/test_operator_reduce_min.cpp   |  2 +-
 .../reduce_prod/test_operator_reduce_prod.cpp |  2 +-
 .../reduce_sum/test_operator_reduce_sum.cpp   |  2 +-
 tests/reduce/sum/test_operator_sum.cpp        | 16 +++++++-----
 .../test_operator_confusion_transpose.cpp     | 26 +++++++++----------
 26 files changed, 109 insertions(+), 104 deletions(-)
diff --git a/examples/activation/fastergelu/kernel_impl/faster_gelu_custom.h b/examples/activation/fastergelu/kernel_impl/faster_gelu_custom.h
index ab84dacd..32602ded 100644
--- a/examples/activation/fastergelu/kernel_impl/faster_gelu_custom.h
+++ b/examples/activation/fastergelu/kernel_impl/faster_gelu_custom.h
@@ -21,12 +21,12 @@ struct VecTiling {
 template <typename srcType> class KernelFasterGelu {
 public:
     __aicore__ inline KernelFasterGelu() {}
-    __aicore__ inline void Init(GM_ADDR src_gm, GM_ADDR dst_gm, uint32_t inputSize)
+    __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstGm, uint32_t inputSize)
     {
         dataSize = inputSize;
 
-        srcGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ srcType*>(src_gm), dataSize);
-        dstGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ srcType*>(dst_gm), dataSize);
+        srcGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ srcType*>(srcGm), dataSize);
+        dstGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ srcType*>(dstGm), dataSize);
 
         pipe.InitBuffer(inQueueX, 1, dataSize * sizeof(srcType));
         pipe.InitBuffer(outQueue, 1, dataSize * sizeof(srcType));
@@ -50,8 +50,8 @@ private:
         AscendC::LocalTensor<srcType> dstLocal = outQueue.AllocTensor<srcType>();
         AscendC::LocalTensor<srcType> srcLocal = inQueueX.DeQue<srcType>();
         AscendC::FasterGelu<srcType>(dstLocal, srcLocal, dataSize);
-        // AscendC::FasterGelu<srcType, true, false>(dstLocal, srcLocal, dataSize); //开启高精度模式
-        // AscendC::FasterGelu<srcType,false, true>(dstLocal, srcLocal, dataSize); //开启高性能模式
+        // AscendC::FasterGelu<srcType, true, false>(dstLocal, srcLocal, dataSize); //Enable high precision mode
+        // AscendC::FasterGelu<srcType,false, true>(dstLocal, srcLocal, dataSize); //Enable high performance mode
         outQueue.EnQue<srcType>(dstLocal);
         inQueueX.FreeTensor(srcLocal);
     }
diff --git a/examples/normalization/normalize/kernel_impl/normalize_custom.h b/examples/normalization/normalize/kernel_impl/normalize_custom.h
index 89b1b960..5de70bb8 100644
--- a/examples/normalization/normalize/kernel_impl/normalize_custom.h
+++ b/examples/normalization/normalize/kernel_impl/normalize_custom.h
@@ -28,21 +28,21 @@ template <typename T, typename U, bool isReuseSource = false>
 class KernelNormalize {
 public:
     __aicore__ inline KernelNormalize() {}
-    __aicore__ inline void Init(GM_ADDR inputX_gm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR gamma_gm,
-        GM_ADDR beta_gm, GM_ADDR output_gm, GM_ADDR outputRstd_gm, NormalizeTiling tilingData) {
+    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMeanGm, GM_ADDR inputVarGm, GM_ADDR gammaGm,
+        GM_ADDR betaGm, GM_ADDR outputGm, GM_ADDR outputRstdGm, NormalizeTiling tilingData) {
         aLength = tilingData.aLength;
         rLength = tilingData.rLength;
         rLengthWithPadding = tilingData.rLengthWithPadding;
         tmpLocalBytes = tilingData.tmpLocalSize;
         uint32_t totalLength = aLength * rLengthWithPadding;
-        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputX_gm), totalLength);                // [A, R]
-        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputMean_gm), aLength);          // [A]
-        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputVar_gm), aLength);            // [A]
-        inputGamma_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(gamma_gm), rLengthWithPadding);  // [R]
-        inputBeta_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(beta_gm), rLengthWithPadding);    // [R]
-
-        output_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(output_gm), totalLength);
-        outputRstd_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(outputRstd_gm), aLength);
+        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputXGm), totalLength);                // [A, R]
+        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputMeanGm), aLength);          // [A]
+        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputVarGm), aLength);            // [A]
+        inputGamma_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(gammaGm), rLengthWithPadding);  // [R]
+        inputBeta_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(betaGm), rLengthWithPadding);    // [R]
+
+        output_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(outputGm), totalLength);
+        outputRstd_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(outputRstdGm), aLength);
 
         pipe.InitBuffer(inQueueX, 1, sizeof(T) * totalLength);
         pipe.InitBuffer(inQueueMean, 1, sizeof(float) * aLength);
diff --git a/examples/normalization/welford_finalize/host_tiling/welford_finalize_custom_tiling.h b/examples/normalization/welford_finalize/host_tiling/welford_finalize_custom_tiling.h
index 13ac35cf..6a91d8ca 100644
--- a/examples/normalization/welford_finalize/host_tiling/welford_finalize_custom_tiling.h
+++ b/examples/normalization/welford_finalize/host_tiling/welford_finalize_custom_tiling.h
@@ -38,7 +38,7 @@ void ComputeTiling(uint32_t rnLength, uint32_t abLength, uint32_t head, uint32_t
     ge::Shape srcShape(shapeVec);
     uint32_t maxsize = 0;
     uint32_t minsize = 0;
-    uint32_t dtypesize = 4;  // float类型
+    uint32_t dtypesize = 4;  // float type
 
     tiling.set_rnLength(rnLength);
     tiling.set_abLength(abLength);
diff --git a/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h b/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h
index f0b48af6..889905ea 100644
--- a/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h
+++ b/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h
@@ -32,8 +32,8 @@ class KernelWelfordFinalize {
 public:
     __aicore__ inline KernelWelfordFinalize()
     {}
-    __aicore__ inline void Init(GM_ADDR inputMean_gm, GM_ADDR inputVariance_gm, GM_ADDR counts_gm, GM_ADDR outputMean_gm,
-        GM_ADDR outputVariance_gm, VecTiling tilingData)
+    __aicore__ inline void Init(GM_ADDR inputMeanGm, GM_ADDR inputVarianceGm, GM_ADDR countsGm, GM_ADDR outputMeanGm,
+        GM_ADDR outputVarianceGm, VecTiling tilingData)
     {
         this->rnLength = tilingData.rnLength;
         this->abLength = tilingData.abLength;
@@ -51,11 +51,11 @@ public:
         this->rRec = 1.0f / rLength;
         this->outLength = OUT_SIZE;
  
-        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputMean_gm), abLength);
-        inputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputVariance_gm), abLength);
-        inputcounts_global.SetGlobalBuffer(reinterpret_cast<__gm__ int32_t *>(counts_gm), abLength);
-        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputMean_gm), outLength);
-        outputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputVariance_gm), outLength);
+        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputMeanGm), abLength);
+        inputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputVarianceGm), abLength);
+        inputcounts_global.SetGlobalBuffer(reinterpret_cast<__gm__ int32_t *>(countsGm), abLength);
+        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputMeanGm), outLength);
+        outputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputVarianceGm), outLength);
 
         pipe.InitBuffer(inQueueMean, 1, abLength * sizeof(dataType));
         pipe.InitBuffer(inQueueVariance, 1, abLength * sizeof(dataType));
diff --git a/examples/normalization/welford_update/host_tiling/welford_update_custom_tiling.h b/examples/normalization/welford_update/host_tiling/welford_update_custom_tiling.h
index ef4335a3..b23571d0 100644
--- a/examples/normalization/welford_update/host_tiling/welford_update_custom_tiling.h
+++ b/examples/normalization/welford_update/host_tiling/welford_update_custom_tiling.h
@@ -36,8 +36,8 @@ void ComputeTiling(bool inplace, uint32_t nLength, uint32_t rLength, uint32_t ab
     ge::Shape srcShape(shapeVec);
     uint32_t maxsize = 0;
     uint32_t minsize = 0;
-    uint32_t dtypesizeT = 2;  // half类型
-    uint32_t dtypesizeU = 4;  // float类型
+    uint32_t dtypesizeT = 2;  // half type
+    uint32_t dtypesizeU = 4;  // float type
 
     tiling.set_inplace(inplace);
     tiling.set_nLength(nLength);
diff --git a/examples/normalization/welford_update/kernel_impl/welford_update_custom.h b/examples/normalization/welford_update/kernel_impl/welford_update_custom.h
index e91dee4e..154a9b14 100644
--- a/examples/normalization/welford_update/kernel_impl/welford_update_custom.h
+++ b/examples/normalization/welford_update/kernel_impl/welford_update_custom.h
@@ -30,8 +30,8 @@ template <typename T, typename U, bool isReuseSource = false, bool tmpLocal = tr
 class KernelWelfordUpdate {
 public:
     __aicore__ inline KernelWelfordUpdate() {}
-    __aicore__ inline void Init(GM_ADDR inputX_gm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR outputMean_gm,
-        GM_ADDR outputVar_gm, VecTiling tilingData) {
+    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMeanGm, GM_ADDR inputVarGm, GM_ADDR outputMeanGm,
+        GM_ADDR outputVarGm, VecTiling tilingData) {
         nLength = tilingData.nLength;
         rLength = tilingData.rLength;
         abComputeLength = tilingData.abComputeLength;
@@ -40,12 +40,12 @@ public:
         inplace = tilingData.inplace;
         tmpLocalBytes = tilingData.tmpLocalSize;
 
-        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputX_gm), bshLength);
-        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputMean_gm), bshLength);
-        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputVar_gm), bshLength);
+        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputXGm), bshLength);
+        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputMeanGm), bshLength);
+        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputVarGm), bshLength);
 
-        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputMean_gm), bshLength);
-        outputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputVar_gm), bshLength);
+        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputMeanGm), bshLength);
+        outputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputVarGm), bshLength);
 
         pipe.InitBuffer(inQueueX, 1, sizeof(T) * bshLength);
         pipe.InitBuffer(inQueueMean, 1, sizeof(U) * bshLength);
diff --git a/examples/normalization/welford_update/kernel_launch_method_by_framework/op_host/welford_update_custom_tiling.h b/examples/normalization/welford_update/kernel_launch_method_by_framework/op_host/welford_update_custom_tiling.h
index ef4335a3..b23571d0 100644
--- a/examples/normalization/welford_update/kernel_launch_method_by_framework/op_host/welford_update_custom_tiling.h
+++ b/examples/normalization/welford_update/kernel_launch_method_by_framework/op_host/welford_update_custom_tiling.h
@@ -36,8 +36,8 @@ void ComputeTiling(bool inplace, uint32_t nLength, uint32_t rLength, uint32_t ab
     ge::Shape srcShape(shapeVec);
     uint32_t maxsize = 0;
     uint32_t minsize = 0;
-    uint32_t dtypesizeT = 2;  // half类型
-    uint32_t dtypesizeU = 4;  // float类型
+    uint32_t dtypesizeT = 2;  // half type
+    uint32_t dtypesizeU = 4;  // float type
 
     tiling.set_inplace(inplace);
     tiling.set_nLength(nLength);
diff --git a/examples/reduce/sum/main.cpp b/examples/reduce/sum/main.cpp
index c3ec5710..4d7da545 100644
--- a/examples/reduce/sum/main.cpp
+++ b/examples/reduce/sum/main.cpp
@@ -26,7 +26,7 @@ constexpr uint32_t M = 7;    // outter
 constexpr uint32_t N = 2023; // inner_actual
 }
 
-extern void GenerateTilingData(uint8_t *tilingBuf, const uint32_t M, const uint32_t N);
+extern void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t n);
 
 static bool CompareResult(const void *outputData, uint32_t outSize) {
     void *goldenData;
diff --git a/examples/reduce/sum/op_host/sum_custom_tiling.cpp b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
index ebfc06e6..5e3cbb4c 100644
--- a/examples/reduce/sum/op_host/sum_custom_tiling.cpp
+++ b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
@@ -16,25 +16,25 @@ namespace {
 constexpr uint32_t PADDING_BYTE = 32U;
 }
 
-void GenerateTilingData(uint8_t *tilingBuf, const uint32_t M, const uint32_t N) {
+void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t n) {
     uint32_t minValue = 0;
     uint32_t maxValue = 0;
 
-    AscendC::GetSumMaxMinTmpSize(N, sizeof(uint32_t), false, maxValue, minValue);
+    AscendC::GetSumMaxMinTmpSize(n, sizeof(uint32_t), false, maxValue, minValue);
 
     SumCustomTilingData *tiling = reinterpret_cast<SumCustomTilingData *>(tilingBuf);
 
-    auto paddingFunc = [](const uint32_t n, const uint32_t typeSize) -> uint32_t {
+    auto paddingFunc = [](const uint32_t n1, const uint32_t typeSize) -> uint32_t {
         if (typeSize == 0) {
             return 0;
         }
-        return (n * typeSize + PADDING_BYTE - 1U) / PADDING_BYTE * PADDING_BYTE / typeSize;
+        return (n1 * typeSize + PADDING_BYTE - 1U) / PADDING_BYTE * PADDING_BYTE / typeSize;
     };
 
-    tiling->outter = M;
-    tiling->inner = paddingFunc(N, sizeof(uint32_t));
-    tiling->n = N;
+    tiling->outter = m;
+    tiling->inner = paddingFunc(n, sizeof(uint32_t));
+    tiling->n = n;
     tiling->tmpBufSize = minValue;
 
-    tiling->out_inner = paddingFunc(M, sizeof(uint32_t));
+    tiling->out_inner = paddingFunc(m, sizeof(uint32_t));
 }
\ No newline at end of file
diff --git a/examples/reduce/sum/op_host/sum_custom_tiling.h b/examples/reduce/sum/op_host/sum_custom_tiling.h
index e3947535..2df3f1ed 100644
--- a/examples/reduce/sum/op_host/sum_custom_tiling.h
+++ b/examples/reduce/sum/op_host/sum_custom_tiling.h
@@ -18,7 +18,7 @@ struct SumCustomTilingData {
     uint32_t outter;
     uint32_t n;
     uint32_t tmpBufSize;
-    uint32_t out_inner;
+    uint32_t outInner;
 };
 
 #endif // EXAMPLES_REDUCE_SUM_COSTOM_TILING_H
\ No newline at end of file
diff --git a/examples/reduce/sum/op_kernel/sum_custom_impl.h b/examples/reduce/sum/op_kernel/sum_custom_impl.h
index 71f8bef1..0f3a97c5 100644
--- a/examples/reduce/sum/op_kernel/sum_custom_impl.h
+++ b/examples/reduce/sum/op_kernel/sum_custom_impl.h
@@ -25,7 +25,7 @@ public:
         outter = tilingData.outter;
         n = tilingData.n;
         tmpBufSize = tilingData.tmpBufSize;
-        out_inner = tilingData.out_inner;
+        outInner = tilingData.outInner;
 
         params.inner = inner;
         params.outter = outter;
@@ -36,7 +36,7 @@ public:
 
         pipe = pipeIn;
         pipe->InitBuffer(inQueue, 1, inner * outter * sizeof(T));
-        pipe->InitBuffer(outQueue, 1, out_inner * sizeof(T));
+        pipe->InitBuffer(outQueue, 1, outInner * sizeof(T));
         pipe->InitBuffer(tmpBuf, tmpBufSize * sizeof(uint8_t));
     }
     __aicore__ inline void Process() {
@@ -57,7 +57,7 @@ private:
         AscendC::LocalTensor<uint8_t> sharedTmpBuffer = tmpBuf.AllocTensor<uint8_t>();
 
         T scalar(0);
-        AscendC::Duplicate<T>(yLocal, scalar, out_inner);
+        AscendC::Duplicate<T>(yLocal, scalar, outInner);
         AscendC::Sum(yLocal, xLocal, sharedTmpBuffer, params);
 
         outQueue.EnQue<T>(yLocal);
@@ -66,7 +66,7 @@ private:
     }
     __aicore__ inline void CopyOut() {
         AscendC::LocalTensor<T> yLocal = outQueue.DeQue<T>();
-        AscendC::DataCopy(yGm, yLocal, out_inner);
+        AscendC::DataCopy(yGm, yLocal, outInner);
         outQueue.FreeTensor(yLocal);
     }
 
@@ -82,7 +82,7 @@ private:
     uint32_t outter = 0;
     uint32_t n = 0;
     uint32_t tmpBufSize = 0;
-    uint32_t out_inner = 0;
+    uint32_t outInner = 0;
     AscendC::SumParams params;
 };
 }
diff --git a/examples/sort/topk/kernel_impl/topk_custom.h b/examples/sort/topk/kernel_impl/topk_custom.h
index ad24c567..7b0a1883 100644
--- a/examples/sort/topk/kernel_impl/topk_custom.h
+++ b/examples/sort/topk/kernel_impl/topk_custom.h
@@ -47,17 +47,17 @@ public:
         tmplocalBytes = tilingData.minsize;
         topKTilingData = tilingData.topKTilingData;
         k = tilingData.k;
-        // 计算k_pad
+        // calculation kPad
         if (sizeof(T) == sizeof(float)) {
-            k_pad = (k + K_FLOAT - 1) / K_FLOAT * K_FLOAT;
+            kPad = (k + K_FLOAT - 1) / K_FLOAT * K_FLOAT;
         } else {
-            k_pad = (k + K_HALF - 1) / K_HALF * K_HALF;
+            kPad = (k + K_HALF - 1) / K_HALF * K_HALF;
         }
-        kpad_index = (k + K_FLOAT) / K_FLOAT * K_FLOAT;
+        kPadIndex = (k + K_FLOAT) / K_FLOAT * K_FLOAT;
         isLargest = tilingData.isLargest;
         inDataSize = inner * outter;
-        outValueDataSize = k_pad * outter;
-        outIndexDataSize = kpad_index * outter;
+        outValueDataSize = kPad * outter;
+        outIndexDataSize = kPadIndex * outter;
 
         inputdexDataSize = inner;
         if (topkMode == true) {
@@ -189,8 +189,8 @@ private:
     uint32_t outValueDataSize = 0;
     uint32_t outIndexDataSize = 0;
     uint32_t k;
-    uint32_t k_pad;
-    uint32_t kpad_index;
+    uint32_t kPad;
+    uint32_t kPadIndex;
     bool isLargest = true;
     TopkTiling topKTilingData;
     uint32_t outter;
diff --git a/examples/sort/topk/kernel_launch_method_by_direct/topk_custom_tiling.cpp b/examples/sort/topk/kernel_launch_method_by_direct/topk_custom_tiling.cpp
index cd5af26c..c2239d97 100644
--- a/examples/sort/topk/kernel_launch_method_by_direct/topk_custom_tiling.cpp
+++ b/examples/sort/topk/kernel_launch_method_by_direct/topk_custom_tiling.cpp
@@ -29,7 +29,7 @@ uint8_t* GenerateTiling(uint32_t k, uint32_t outter, uint32_t inner, uint32_t n,
 
     uint32_t maxsize = 0;
     uint32_t minsize = 0;
-    uint32_t dtypesize = 4;  // float类型
+    uint32_t dtypesize = 4;  // float type
 
     platform_ascendc::PlatformAscendC* ascendcPlatform;
     if (socVersion != nullptr) {
diff --git a/examples/utils/init_global_memory/kernel_impl/init_global_memory_custom.h b/examples/utils/init_global_memory/kernel_impl/init_global_memory_custom.h
index 21ca183c..bb57df5f 100644
--- a/examples/utils/init_global_memory/kernel_impl/init_global_memory_custom.h
+++ b/examples/utils/init_global_memory/kernel_impl/init_global_memory_custom.h
@@ -30,7 +30,7 @@ public:
         // init zGm value
         AscendC::InitGlobalMemory(zGm, INIT_SIZE, (float)(AscendC::GetBlockIdx()));
 
-        //需要插MTE2等MTE3的同步
+        // sync of MTE2 and MTE3 is requied
         AscendC::TEventID eventIdMTE3ToMTE2 = GetTPipePtr()->FetchEventID(AscendC::HardEvent::MTE3_MTE2);
         AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(eventIdMTE3ToMTE2);
         AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(eventIdMTE3ToMTE2);
diff --git a/lib/quantization/ascend_antiquant.h b/lib/quantization/ascend_antiquant.h
index 0275ae1e..c1c338f9 100644
--- a/lib/quantization/ascend_antiquant.h
+++ b/lib/quantization/ascend_antiquant.h
@@ -33,12 +33,12 @@ namespace AscendC {
 template <typename InputDataType, typename OutputDataType, bool isTranspose>
 __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, const LocalTensor<InputDataType> &src,
     const LocalTensor<OutputDataType> &offset, const LocalTensor<OutputDataType> &scale,
-    const LocalTensor<uint8_t> &sharedTmpBuffer, const uint32_t K, const AntiQuantShapeInfo& shapeInfo = {})
+    const LocalTensor<uint8_t> &sharedTmpBuffer, const uint32_t k, const AntiQuantShapeInfo& shapeInfo = {})
 {
     if ASCEND_IS_AIC {
         return;
     }
-    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, sharedTmpBuffer, K,
+    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, sharedTmpBuffer, k,
         shapeInfo);
 }
 
@@ -54,13 +54,13 @@ __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, c
  */
 template <typename InputDataType, typename OutputDataType, bool isTranspose>
 __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, const LocalTensor<InputDataType> &src,
-    const LocalTensor<OutputDataType> &scale, const LocalTensor<uint8_t> &sharedTmpBuffer, const uint32_t K,
+    const LocalTensor<OutputDataType> &scale, const LocalTensor<uint8_t> &sharedTmpBuffer, const uint32_t k,
     const AntiQuantShapeInfo& shapeInfo = {})
 {
     if ASCEND_IS_AIC {
         return;
     }
-    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, scale, sharedTmpBuffer, K, shapeInfo);
+    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, scale, sharedTmpBuffer, k, shapeInfo);
 }
 
 /* !
@@ -75,13 +75,13 @@ __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, c
  */
 template <typename InputDataType, typename OutputDataType, bool isTranspose>
 __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, const LocalTensor<InputDataType> &src,
-    const LocalTensor<OutputDataType> &offset, const LocalTensor<OutputDataType> &scale, const uint32_t K,
+    const LocalTensor<OutputDataType> &offset, const LocalTensor<OutputDataType> &scale, const uint32_t k,
     const AntiQuantShapeInfo& shapeInfo = {})
 {
     if ASCEND_IS_AIC {
         return;
     }
-    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, K, shapeInfo);
+    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, k, shapeInfo);
 }
 
 /* !
@@ -98,12 +98,12 @@ __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, c
 template <typename InputDataType, typename OutputDataType, bool isTranspose>
 __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, const LocalTensor<InputDataType> &src,
     const OutputDataType offset, const OutputDataType scale, const LocalTensor<uint8_t> &sharedTmpBuffer,
-    const uint32_t K, const AntiQuantShapeInfo& shapeInfo = {})
+    const uint32_t k, const AntiQuantShapeInfo& shapeInfo = {})
 {
     if ASCEND_IS_AIC {
         return;
     }
-    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, sharedTmpBuffer, K,
+    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, sharedTmpBuffer, k,
         shapeInfo);
 }
 
@@ -119,13 +119,13 @@ __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, c
  */
 template <typename InputDataType, typename OutputDataType, bool isTranspose>
 __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, const LocalTensor<InputDataType> &src,
-    const OutputDataType scale, const LocalTensor<uint8_t> &sharedTmpBuffer, const uint32_t K,
+    const OutputDataType scale, const LocalTensor<uint8_t> &sharedTmpBuffer, const uint32_t k,
     const AntiQuantShapeInfo& shapeInfo = {})
 {
     if ASCEND_IS_AIC {
         return;
     }
-    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, scale, sharedTmpBuffer, K, shapeInfo);
+    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, scale, sharedTmpBuffer, k, shapeInfo);
 }
 
 /* !
@@ -140,12 +140,12 @@ __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, c
  */
 template <typename InputDataType, typename OutputDataType, bool isTranspose>
 __aicore__ inline void AscendAntiQuant(const LocalTensor<OutputDataType> &dst, const LocalTensor<InputDataType> &src,
-    const OutputDataType offset, const OutputDataType scale, const uint32_t K, const AntiQuantShapeInfo& shapeInfo = {})
+    const OutputDataType offset, const OutputDataType scale, const uint32_t k, const AntiQuantShapeInfo& shapeInfo = {})
 {
     if ASCEND_IS_AIC {
         return;
     }
-    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, K, shapeInfo);
+    AscendAntiQuantImpl<InputDataType, OutputDataType, isTranspose>(dst, src, offset, scale, k, shapeInfo);
 }
 #pragma end_pipe
 } // namespace AscendC
diff --git a/tests/normalization/groupnorm/test_operator_groupnorm.cpp b/tests/normalization/groupnorm/test_operator_groupnorm.cpp
index 7dd522b9..4fa37322 100644
--- a/tests/normalization/groupnorm/test_operator_groupnorm.cpp
+++ b/tests/normalization/groupnorm/test_operator_groupnorm.cpp
@@ -70,11 +70,12 @@ __aicore__ inline void GetGroupNormNDTillingInfo(const ShapeInfo& inputShapeInfo
     tiling.tmpBufSize = stackBufferSize / ONE_BLK_SIZE * ONE_BLK_SIZE / B32_BYTE_SIZE;
     tiling.oneTmpSize = (tiling.tmpBufSize - meanVarTotalSize) / tiling.numberOfTmpBuf;
 
-    // 为了使 MeanVarTensor 可以直接使用 Add 而不需使用 GetValue, 需保证每个迭代至少有8的整数倍组 group
+    // to enable MeanVarTensor to directly use Add without need to use GetValue
+    // it is necessary to ensure that each iteration has at least 8 integer multiples of groups
     tiling.bsCurLength = tiling.oneTmpSize / (GROUPNORM_MIN_BSCURLENGHT_IN_ITERATION * tiling.d * tiling.hwAlignSize) *
         GROUPNORM_MIN_BSCURLENGHT_IN_ITERATION;
 
-    // 判断是否满足 smallShape 计算
+    // determine whether the condition for smallShape is met
     uint32_t k = GROUPNORM_REDUCESUM_MAX_REPEAT_SMALLSHAPE;
     while ((tiling.dhwAlignSize / (ONE_BLK_SIZE / B32_BYTE_SIZE)) % k != 0) {
         k--;
@@ -82,7 +83,9 @@ __aicore__ inline void GetGroupNormNDTillingInfo(const ShapeInfo& inputShapeInfo
     tiling.smallShape = (tiling.hwAlignSize <= GROUPNORM_REDUCESUM_MAX_FLOAT_NUM) &&
     (tiling.hwAlignSize * tiling.d <= GROUPNORM_REDUCESUM_MAX_FLOAT_NUM * k);
 
-    // ReduceSum0级接口带来的约束, 根据DHW计算2次 ReduceSum 的 mask/repeat, 以及 DHW/bsCurLength 取值范围
+    // the constraints instroduced by the ReduceSum0 interface 
+    // base one the DHW calculation of the mask/repeat for 2 ReduceSum operations,
+    // as well as the value range of DHW/bsCurLength
     if (tiling.smallShape) {
         uint32_t mask1{GROUPNORM_MAX_MASK_VAL};
         if (tiling.dhwAlignSize > GROUPNORM_MAX_MASK_VAL) {
diff --git a/tests/normalization/welfordfinalize/test_operator_welfordfinalize.cpp b/tests/normalization/welfordfinalize/test_operator_welfordfinalize.cpp
index c5b08ec1..c937fbfa 100644
--- a/tests/normalization/welfordfinalize/test_operator_welfordfinalize.cpp
+++ b/tests/normalization/welfordfinalize/test_operator_welfordfinalize.cpp
@@ -210,9 +210,9 @@ protected:
     {}
 };
 
-// 1、有尾块；
-// 2、有counts；
-// 3、 有buffer约束；
+// 1. with tail block;
+// 2. there is counts;
+// 3. there is buffer constraint
 INSTANTIATE_TEST_CASE_P(TEST_PACKAGE_WelfordFinalize, WelfordFinalizeTestSuite,
    ::testing::Values(
    WelfordFinalizeTestParams { 4, 32, 4, 32, 4, 0, kernel_WelfordFinalize_test<float, false, false> }, // !1 + !2 + !3
diff --git a/tests/reduce/reduce_all/test_operator_reduce_all.cpp b/tests/reduce/reduce_all/test_operator_reduce_all.cpp
index 4cf97671..bd570304 100644
--- a/tests/reduce/reduce_all/test_operator_reduce_all.cpp
+++ b/tests/reduce/reduce_all/test_operator_reduce_all.cpp
@@ -180,7 +180,7 @@ TEST_P(ReduceAllTestsuite, ReduceAllOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/reduce_any/test_operator_reduce_any.cpp b/tests/reduce/reduce_any/test_operator_reduce_any.cpp
index bf4335fc..9a09028e 100644
--- a/tests/reduce/reduce_any/test_operator_reduce_any.cpp
+++ b/tests/reduce/reduce_any/test_operator_reduce_any.cpp
@@ -179,7 +179,7 @@ TEST_P(ReduceAnyTestsuite, ReduceAnyOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/reduce_max/test_operator_reduce_max.cpp b/tests/reduce/reduce_max/test_operator_reduce_max.cpp
index 25891741..8d8ae90c 100644
--- a/tests/reduce/reduce_max/test_operator_reduce_max.cpp
+++ b/tests/reduce/reduce_max/test_operator_reduce_max.cpp
@@ -172,7 +172,7 @@ TEST_P(MaxTestsuite, MaxOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/reduce_mean/test_operator_reduce_mean.cpp b/tests/reduce/reduce_mean/test_operator_reduce_mean.cpp
index 5d7ad967..b0dca21c 100644
--- a/tests/reduce/reduce_mean/test_operator_reduce_mean.cpp
+++ b/tests/reduce/reduce_mean/test_operator_reduce_mean.cpp
@@ -159,7 +159,7 @@ TEST_P(ReduceMeanTestsuite, ReduceMeanOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/reduce_min/test_operator_reduce_min.cpp b/tests/reduce/reduce_min/test_operator_reduce_min.cpp
index 6d0787df..7137cd17 100644
--- a/tests/reduce/reduce_min/test_operator_reduce_min.cpp
+++ b/tests/reduce/reduce_min/test_operator_reduce_min.cpp
@@ -172,7 +172,7 @@ TEST_P(MinTestsuite, MinOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/reduce_prod/test_operator_reduce_prod.cpp b/tests/reduce/reduce_prod/test_operator_reduce_prod.cpp
index d27778c9..80079e0a 100644
--- a/tests/reduce/reduce_prod/test_operator_reduce_prod.cpp
+++ b/tests/reduce/reduce_prod/test_operator_reduce_prod.cpp
@@ -136,7 +136,7 @@ TEST_P(ProdTestsuite, ProdOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/reduce_sum/test_operator_reduce_sum.cpp b/tests/reduce/reduce_sum/test_operator_reduce_sum.cpp
index 2b0927cd..e0e418ee 100644
--- a/tests/reduce/reduce_sum/test_operator_reduce_sum.cpp
+++ b/tests/reduce/reduce_sum/test_operator_reduce_sum.cpp
@@ -163,7 +163,7 @@ TEST_P(ReduceSumTestsuite, ReduceSumOpTestCase)
     auto last = param.last;
     constexpr uint32_t BLK_SIZE = 32;
     auto padLast = (last * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
-    uint8_t srcGm[first * padLast] = {0}; // 外部保证inner是32B对齐
+    uint8_t srcGm[first * padLast] = {0}; // external guarantee inner is 32B aligned
     uint32_t dstLen = param.isAr ? first : last;
     auto padDst = (dstLen * param.typeSize + BLK_SIZE - 1) / BLK_SIZE * BLK_SIZE;
     uint8_t dstGm[padDst] = {0};
diff --git a/tests/reduce/sum/test_operator_sum.cpp b/tests/reduce/sum/test_operator_sum.cpp
index 015bcf70..5a9dd78c 100644
--- a/tests/reduce/sum/test_operator_sum.cpp
+++ b/tests/reduce/sum/test_operator_sum.cpp
@@ -31,10 +31,10 @@ public:
         src1Global.SetGlobalBuffer((__gm__ T*)src0Gm);
         dstGlobal.SetGlobalBuffer((__gm__ T*)dstGm);
         pipe.InitBuffer(inQueueSrc1, 1, 8 * 160 * sizeof(T));
-        pipe.InitBuffer(outQueueDst, 1, ONE_BLK_SIZE); // 8个数整体对齐
-        int32_t repeatTimes = (160 + elementNumPerRep - 1) / elementNumPerRep; // workSize = repeatTimes向上取整
+        pipe.InitBuffer(outQueueDst, 1, ONE_BLK_SIZE); // align the 8 number as a whole
+        int32_t repeatTimes = (160 + elementNumPerRep - 1) / elementNumPerRep; // workSize = ceil(repeatTimes)
         int32_t finalWorkSize = (repeatTimes + elementNumPerBlk - 1) / elementNumPerBlk * elementNumPerBlk * sizeof(T);
-        pipe.InitBuffer(workQueue, 1, finalWorkSize); // 向上取整
+        pipe.InitBuffer(workQueue, 1, finalWorkSize); // round up
     }
 
     __aicore__ inline void Process()
@@ -59,7 +59,7 @@ private:
         LocalTensor<uint8_t> workLocal = workQueue.AllocTensor<uint8_t>();
         LocalTensor<T> dstLocal = outQueueDst.AllocTensor<T>();
 
-        SumParams params {8, 160, 152}; // n是自己填的
+        SumParams params {8, 160, 152};
         Sum(dstLocal, srcLocal1, workLocal, params);
 
         outQueueDst.EnQue<T>(dstLocal);
@@ -75,11 +75,13 @@ private:
 
 private:
     TPipe pipe;
-    TQue<TPosition::VECIN, 1> inQueueSrc1; // 用于申请临时tensor
+    // used for applying a temporary tensor
+    TQue<TPosition::VECIN, 1> inQueueSrc1;
     TQue<TPosition::VECIN, 1> workQueue;
 
     TQue<TPosition::VECOUT, 1> outQueueDst;
-    GlobalTensor<T> src1Global, dstGlobal; // 用于关联Gm
+    // used for associating GM
+    GlobalTensor<T> src1Global, dstGlobal;
 };
 } // namespace AscendC
 
@@ -117,7 +119,7 @@ INSTANTIATE_TEST_CASE_P(TEST_OPEARATION_SUM, SumTestsuite,
 TEST_P(SumTestsuite, SumOpTestCase)
 {
     auto param = GetParam();
-    uint8_t src0Gm[8 * 160 * param.typeSize]; // 外部保证inner是32B对齐
+    uint8_t src0Gm[8 * 160 * param.typeSize]; // external guarantee inner is 32B aligned
     uint32_t dstLen = (8 * param.typeSize + ONE_BLK_SIZE - 1) / ONE_BLK_SIZE * ONE_BLK_SIZE;
     uint8_t dstGm[dstLen];
     param.cal_func(dstGm, src0Gm);
diff --git a/tests/transpose/confusion_transpose/test_operator_confusion_transpose.cpp b/tests/transpose/confusion_transpose/test_operator_confusion_transpose.cpp
index c0624cc1..fe6b6b58 100644
--- a/tests/transpose/confusion_transpose/test_operator_confusion_transpose.cpp
+++ b/tests/transpose/confusion_transpose/test_operator_confusion_transpose.cpp
@@ -19,9 +19,9 @@
 using namespace std;
 using namespace AscendC;
 
-// 场景1
+// scene 1
 namespace AscendC {
-// 场景1、2: srcShape[B, A1, A2, A3]
+// scene 1, 2: srcShape[B, A1, A2, A3]
 __aicore__ inline void GetConfusionTranspose0213TilingInfo(const ShapeInfo srcShape, const uint32_t stackBufferSize,
     const uint32_t typeSize, ConfusionTransposeTiling& tiling)
 {
@@ -32,7 +32,7 @@ __aicore__ inline void GetConfusionTranspose0213TilingInfo(const ShapeInfo srcSh
     uint32_t widthTiling = (srcShape.originalShape[3] + BLOCK_CUBE - 1) / BLOCK_CUBE;
     uint32_t alignA3 = widthTiling * BLOCK_CUBE;
 
-    // stackBuffer向 [16,16]对齐
+    // stackBuffer is aligned to [16,16]
     uint32_t newPopSize = (stackBufferSize / CUBE_MAX_SIZE) * CUBE_MAX_SIZE; // element
     uint32_t newPopH = newPopSize / BLOCK_CUBE;
     uint32_t needSize = alignA2 * BLOCK_CUBE;
@@ -62,7 +62,7 @@ __aicore__ inline void GetConfusionTranspose0213TilingInfo(const ShapeInfo srcSh
     tiling.param15 = mainOffset;
 }
 
-// 场景3：srcShape[B, N, S, H/N]
+// scene 3：srcShape[B, N, S, H/N]
 __aicore__ inline void GetConfusionTranspose2NZ012NTilingInfo(const ShapeInfo srcShape, const uint32_t stackBufferSize,
     const uint32_t typeSize, ConfusionTransposeTiling& tiling)
 {
@@ -103,7 +103,7 @@ __aicore__ inline void GetConfusionTranspose2NZ012NTilingInfo(const ShapeInfo sr
     tiling.param16 = srcBatchOffset;
 }
 
-// 场景4：srcShape[B, N, S, H/N]
+// scene 4：srcShape[B, N, S, H/N]
 __aicore__ inline void GetConfusionTranspose2ND012NTilingInfo(const ShapeInfo srcShape, const uint32_t stackBufferSize,
     const uint32_t typeSize, ConfusionTransposeTiling& tiling)
 {
@@ -146,7 +146,7 @@ __aicore__ inline void GetConfusionTranspose2ND012NTilingInfo(const ShapeInfo sr
     tiling.param17 = blockNum;
 }
 
-// 场景5、6：srcShape[B, N, S, H/N]
+// scene 5, 6：srcShape[B, N, S, H/N]
 __aicore__ inline void GetConfusionTranspose012TilingInfo(const ShapeInfo srcShape, const uint32_t stackBufferSize,
     const uint32_t typeSize, ConfusionTransposeTiling& tiling)
 {
@@ -183,7 +183,7 @@ __aicore__ inline void GetConfusionTranspose012TilingInfo(const ShapeInfo srcSha
     tiling.param14 = blockNum;
 }
 
-// 场景7：srcShape[height, width]
+// scene 7：srcShape[height, width]
 __aicore__ inline void GetConfusionTransposeOnlyTilingInfo(const ShapeInfo srcShape, const uint32_t stackBufferSize,
     const uint32_t typeSize, ConfusionTransposeTiling& tiling)
 {
@@ -335,7 +335,7 @@ TEST_P(ConfusionTransposeFirstTestsuite, ConfusionTransposeFirstTestCase)
     }
 }
 
-// 场景2
+// scene 2
 namespace AscendC {
 template <typename T>
 class KernelConfusionTransposeSecond {
@@ -458,7 +458,7 @@ TEST_P(ConfusionTransposeSecondTestsuite, ConfusionTransposeSecondTestCase)
     }
 }
 
-// 场景3
+// scene 3
 namespace AscendC {
 template <typename T>
 class KernelConfusionTransposeThird {
@@ -604,7 +604,7 @@ TEST_P(ConfusionTransposeThirdTestsuite, ConfusionTransposeThirdTestCase)
     }
 }
 
-// 场景4
+// scene 4
 namespace AscendC {
 template <typename T>
 class KernelConfusionTransposeFourth {
@@ -751,7 +751,7 @@ TEST_P(ConfusionTransposeFourthTestsuite, ConfusionTransposeFourthTestCase)
 }
 
 
-// 场景5
+// scene 5
 namespace AscendC {
 template <typename T>
 class KernelConfusionTransposeFifth {
@@ -897,7 +897,7 @@ TEST_P(ConfusionTransposeFifthTestsuite, ConfusionTransposeFifthTestCase)
 }
 
 
-// 场景6
+// scene 6
 namespace AscendC {
 template <typename T>
 class KernelConfusionTransposeSixth {
@@ -1042,7 +1042,7 @@ TEST_P(ConfusionTransposeSixthTestsuite, ConfusionTransposeSixthTestCase)
     }
 }
 
-// 场景7
+// scene 7
 namespace AscendC {
 template <typename T>
 class KernelConfusionTransposeSeventh {
-- 
Gitee


From edab8174dcb94d963c203391093e5810e9d06ad4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Wed, 29 Oct 2025 11:05:24 +0800
Subject: [PATCH 2/7] update var naming

---
 .../normalize/kernel_impl/normalize_custom.h     | 16 ++++++++--------
 .../kernel_impl/welford_finalize_custom.h        | 14 +++++++-------
 .../kernel_impl/welford_update_custom.h          | 14 +++++++-------
 examples/reduce/sum/op_host/sum_custom_tiling.h  |  2 +-
 examples/reduce/sum/op_kernel/sum_custom_impl.h  | 10 +++++-----
 5 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/examples/normalization/normalize/kernel_impl/normalize_custom.h b/examples/normalization/normalize/kernel_impl/normalize_custom.h
index 5de70bb8..60ab5f52 100644
--- a/examples/normalization/normalize/kernel_impl/normalize_custom.h
+++ b/examples/normalization/normalize/kernel_impl/normalize_custom.h
@@ -28,21 +28,21 @@ template <typename T, typename U, bool isReuseSource = false>
 class KernelNormalize {
 public:
     __aicore__ inline KernelNormalize() {}
-    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMeanGm, GM_ADDR inputVarGm, GM_ADDR gammaGm,
-        GM_ADDR betaGm, GM_ADDR outputGm, GM_ADDR outputRstdGm, NormalizeTiling tilingData) {
+    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR gammaGm,
+        GM_ADDR betaGm, GM_ADDR output_gm, GM_ADDR outputRstd_gm, NormalizeTiling tilingData) {
         aLength = tilingData.aLength;
         rLength = tilingData.rLength;
         rLengthWithPadding = tilingData.rLengthWithPadding;
         tmpLocalBytes = tilingData.tmpLocalSize;
         uint32_t totalLength = aLength * rLengthWithPadding;
         inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputXGm), totalLength);                // [A, R]
-        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputMeanGm), aLength);          // [A]
-        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputVarGm), aLength);            // [A]
-        inputGamma_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(gammaGm), rLengthWithPadding);  // [R]
-        inputBeta_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(betaGm), rLengthWithPadding);    // [R]
+        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputMean_gm), aLength);          // [A]
+        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputVar_gm), aLength);            // [A]
+        inputGamma_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(gamma_gm), rLengthWithPadding);  // [R]
+        inputBeta_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(beta_gm), rLengthWithPadding);    // [R]
 
-        output_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(outputGm), totalLength);
-        outputRstd_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(outputRstdGm), aLength);
+        output_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(output_gm), totalLength);
+        outputRstd_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(outputRstd_gm), aLength);
 
         pipe.InitBuffer(inQueueX, 1, sizeof(T) * totalLength);
         pipe.InitBuffer(inQueueMean, 1, sizeof(float) * aLength);
diff --git a/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h b/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h
index 889905ea..f0b48af6 100644
--- a/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h
+++ b/examples/normalization/welford_finalize/kernel_impl/welford_finalize_custom.h
@@ -32,8 +32,8 @@ class KernelWelfordFinalize {
 public:
     __aicore__ inline KernelWelfordFinalize()
     {}
-    __aicore__ inline void Init(GM_ADDR inputMeanGm, GM_ADDR inputVarianceGm, GM_ADDR countsGm, GM_ADDR outputMeanGm,
-        GM_ADDR outputVarianceGm, VecTiling tilingData)
+    __aicore__ inline void Init(GM_ADDR inputMean_gm, GM_ADDR inputVariance_gm, GM_ADDR counts_gm, GM_ADDR outputMean_gm,
+        GM_ADDR outputVariance_gm, VecTiling tilingData)
     {
         this->rnLength = tilingData.rnLength;
         this->abLength = tilingData.abLength;
@@ -51,11 +51,11 @@ public:
         this->rRec = 1.0f / rLength;
         this->outLength = OUT_SIZE;
  
-        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputMeanGm), abLength);
-        inputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputVarianceGm), abLength);
-        inputcounts_global.SetGlobalBuffer(reinterpret_cast<__gm__ int32_t *>(countsGm), abLength);
-        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputMeanGm), outLength);
-        outputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputVarianceGm), outLength);
+        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputMean_gm), abLength);
+        inputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(inputVariance_gm), abLength);
+        inputcounts_global.SetGlobalBuffer(reinterpret_cast<__gm__ int32_t *>(counts_gm), abLength);
+        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputMean_gm), outLength);
+        outputVariance_global.SetGlobalBuffer(reinterpret_cast<__gm__ dataType *>(outputVariance_gm), outLength);
 
         pipe.InitBuffer(inQueueMean, 1, abLength * sizeof(dataType));
         pipe.InitBuffer(inQueueVariance, 1, abLength * sizeof(dataType));
diff --git a/examples/normalization/welford_update/kernel_impl/welford_update_custom.h b/examples/normalization/welford_update/kernel_impl/welford_update_custom.h
index 154a9b14..e91dee4e 100644
--- a/examples/normalization/welford_update/kernel_impl/welford_update_custom.h
+++ b/examples/normalization/welford_update/kernel_impl/welford_update_custom.h
@@ -30,8 +30,8 @@ template <typename T, typename U, bool isReuseSource = false, bool tmpLocal = tr
 class KernelWelfordUpdate {
 public:
     __aicore__ inline KernelWelfordUpdate() {}
-    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMeanGm, GM_ADDR inputVarGm, GM_ADDR outputMeanGm,
-        GM_ADDR outputVarGm, VecTiling tilingData) {
+    __aicore__ inline void Init(GM_ADDR inputX_gm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR outputMean_gm,
+        GM_ADDR outputVar_gm, VecTiling tilingData) {
         nLength = tilingData.nLength;
         rLength = tilingData.rLength;
         abComputeLength = tilingData.abComputeLength;
@@ -40,12 +40,12 @@ public:
         inplace = tilingData.inplace;
         tmpLocalBytes = tilingData.tmpLocalSize;
 
-        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputXGm), bshLength);
-        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputMeanGm), bshLength);
-        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputVarGm), bshLength);
+        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputX_gm), bshLength);
+        inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputMean_gm), bshLength);
+        inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(inputVar_gm), bshLength);
 
-        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputMeanGm), bshLength);
-        outputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputVarGm), bshLength);
+        outputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputMean_gm), bshLength);
+        outputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ U *>(outputVar_gm), bshLength);
 
         pipe.InitBuffer(inQueueX, 1, sizeof(T) * bshLength);
         pipe.InitBuffer(inQueueMean, 1, sizeof(U) * bshLength);
diff --git a/examples/reduce/sum/op_host/sum_custom_tiling.h b/examples/reduce/sum/op_host/sum_custom_tiling.h
index 2df3f1ed..e3947535 100644
--- a/examples/reduce/sum/op_host/sum_custom_tiling.h
+++ b/examples/reduce/sum/op_host/sum_custom_tiling.h
@@ -18,7 +18,7 @@ struct SumCustomTilingData {
     uint32_t outter;
     uint32_t n;
     uint32_t tmpBufSize;
-    uint32_t outInner;
+    uint32_t out_inner;
 };
 
 #endif // EXAMPLES_REDUCE_SUM_COSTOM_TILING_H
\ No newline at end of file
diff --git a/examples/reduce/sum/op_kernel/sum_custom_impl.h b/examples/reduce/sum/op_kernel/sum_custom_impl.h
index 0f3a97c5..71f8bef1 100644
--- a/examples/reduce/sum/op_kernel/sum_custom_impl.h
+++ b/examples/reduce/sum/op_kernel/sum_custom_impl.h
@@ -25,7 +25,7 @@ public:
         outter = tilingData.outter;
         n = tilingData.n;
         tmpBufSize = tilingData.tmpBufSize;
-        outInner = tilingData.outInner;
+        out_inner = tilingData.out_inner;
 
         params.inner = inner;
         params.outter = outter;
@@ -36,7 +36,7 @@ public:
 
         pipe = pipeIn;
         pipe->InitBuffer(inQueue, 1, inner * outter * sizeof(T));
-        pipe->InitBuffer(outQueue, 1, outInner * sizeof(T));
+        pipe->InitBuffer(outQueue, 1, out_inner * sizeof(T));
         pipe->InitBuffer(tmpBuf, tmpBufSize * sizeof(uint8_t));
     }
     __aicore__ inline void Process() {
@@ -57,7 +57,7 @@ private:
         AscendC::LocalTensor<uint8_t> sharedTmpBuffer = tmpBuf.AllocTensor<uint8_t>();
 
         T scalar(0);
-        AscendC::Duplicate<T>(yLocal, scalar, outInner);
+        AscendC::Duplicate<T>(yLocal, scalar, out_inner);
         AscendC::Sum(yLocal, xLocal, sharedTmpBuffer, params);
 
         outQueue.EnQue<T>(yLocal);
@@ -66,7 +66,7 @@ private:
     }
     __aicore__ inline void CopyOut() {
         AscendC::LocalTensor<T> yLocal = outQueue.DeQue<T>();
-        AscendC::DataCopy(yGm, yLocal, outInner);
+        AscendC::DataCopy(yGm, yLocal, out_inner);
         outQueue.FreeTensor(yLocal);
     }
 
@@ -82,7 +82,7 @@ private:
     uint32_t outter = 0;
     uint32_t n = 0;
     uint32_t tmpBufSize = 0;
-    uint32_t outInner = 0;
+    uint32_t out_inner = 0;
     AscendC::SumParams params;
 };
 }
-- 
Gitee


From d5e0ed123721223223048cf63e078d663e41bf58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Wed, 29 Oct 2025 11:08:17 +0800
Subject: [PATCH 3/7] fix: compile error

---
 .../normalization/normalize/kernel_impl/normalize_custom.h    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/normalization/normalize/kernel_impl/normalize_custom.h b/examples/normalization/normalize/kernel_impl/normalize_custom.h
index 60ab5f52..6fdc4974 100644
--- a/examples/normalization/normalize/kernel_impl/normalize_custom.h
+++ b/examples/normalization/normalize/kernel_impl/normalize_custom.h
@@ -28,8 +28,8 @@ template <typename T, typename U, bool isReuseSource = false>
 class KernelNormalize {
 public:
     __aicore__ inline KernelNormalize() {}
-    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR gammaGm,
-        GM_ADDR betaGm, GM_ADDR output_gm, GM_ADDR outputRstd_gm, NormalizeTiling tilingData) {
+    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR gamma_gm,
+        GM_ADDR beta_gm, GM_ADDR output_gm, GM_ADDR outputRstd_gm, NormalizeTiling tilingData) {
         aLength = tilingData.aLength;
         rLength = tilingData.rLength;
         rLengthWithPadding = tilingData.rLengthWithPadding;
-- 
Gitee


From 8daf6f0a20c8fef719ca4a72743bc2871d1c622a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Wed, 29 Oct 2025 11:10:00 +0800
Subject: [PATCH 4/7] update var naming

---
 .../normalization/normalize/kernel_impl/normalize_custom.h    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/normalization/normalize/kernel_impl/normalize_custom.h b/examples/normalization/normalize/kernel_impl/normalize_custom.h
index 6fdc4974..89b1b960 100644
--- a/examples/normalization/normalize/kernel_impl/normalize_custom.h
+++ b/examples/normalization/normalize/kernel_impl/normalize_custom.h
@@ -28,14 +28,14 @@ template <typename T, typename U, bool isReuseSource = false>
 class KernelNormalize {
 public:
     __aicore__ inline KernelNormalize() {}
-    __aicore__ inline void Init(GM_ADDR inputXGm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR gamma_gm,
+    __aicore__ inline void Init(GM_ADDR inputX_gm, GM_ADDR inputMean_gm, GM_ADDR inputVar_gm, GM_ADDR gamma_gm,
         GM_ADDR beta_gm, GM_ADDR output_gm, GM_ADDR outputRstd_gm, NormalizeTiling tilingData) {
         aLength = tilingData.aLength;
         rLength = tilingData.rLength;
         rLengthWithPadding = tilingData.rLengthWithPadding;
         tmpLocalBytes = tilingData.tmpLocalSize;
         uint32_t totalLength = aLength * rLengthWithPadding;
-        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputXGm), totalLength);                // [A, R]
+        inputX_global.SetGlobalBuffer(reinterpret_cast<__gm__ T *>(inputX_gm), totalLength);                // [A, R]
         inputMean_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputMean_gm), aLength);          // [A]
         inputVar_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(inputVar_gm), aLength);            // [A]
         inputGamma_global.SetGlobalBuffer(reinterpret_cast<__gm__ float *>(gamma_gm), rLengthWithPadding);  // [R]
-- 
Gitee


From c2640022843ca9e43ea550d760749ac0a857efa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Wed, 29 Oct 2025 11:23:54 +0800
Subject: [PATCH 5/7] fix: compile error

---
 examples/reduce/sum/op_host/sum_custom_tiling.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/reduce/sum/op_host/sum_custom_tiling.cpp b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
index 5e3cbb4c..549592ef 100644
--- a/examples/reduce/sum/op_host/sum_custom_tiling.cpp
+++ b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
@@ -16,7 +16,7 @@ namespace {
 constexpr uint32_t PADDING_BYTE = 32U;
 }
 
-void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t n) {
+void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t N) {
     uint32_t minValue = 0;
     uint32_t maxValue = 0;
 
@@ -24,16 +24,16 @@ void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t n)
 
     SumCustomTilingData *tiling = reinterpret_cast<SumCustomTilingData *>(tilingBuf);
 
-    auto paddingFunc = [](const uint32_t n1, const uint32_t typeSize) -> uint32_t {
+    auto paddingFunc = [](const uint32_t N, const uint32_t typeSize) -> uint32_t {
         if (typeSize == 0) {
             return 0;
         }
-        return (n1 * typeSize + PADDING_BYTE - 1U) / PADDING_BYTE * PADDING_BYTE / typeSize;
+        return (n * typeSize + PADDING_BYTE - 1U) / PADDING_BYTE * PADDING_BYTE / typeSize;
     };
 
     tiling->outter = m;
-    tiling->inner = paddingFunc(n, sizeof(uint32_t));
-    tiling->n = n;
+    tiling->inner = paddingFunc(N, sizeof(uint32_t));
+    tiling->n = N;
     tiling->tmpBufSize = minValue;
 
     tiling->out_inner = paddingFunc(m, sizeof(uint32_t));
-- 
Gitee


From 1d67e00c4beecdeb6cb3ffadf78cc7781695e404 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Wed, 29 Oct 2025 11:28:47 +0800
Subject: [PATCH 6/7] update var naming

---
 examples/reduce/sum/op_host/sum_custom_tiling.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/reduce/sum/op_host/sum_custom_tiling.cpp b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
index 549592ef..12c654aa 100644
--- a/examples/reduce/sum/op_host/sum_custom_tiling.cpp
+++ b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
@@ -20,11 +20,11 @@ void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t N)
     uint32_t minValue = 0;
     uint32_t maxValue = 0;
 
-    AscendC::GetSumMaxMinTmpSize(n, sizeof(uint32_t), false, maxValue, minValue);
+    AscendC::GetSumMaxMinTmpSize(N, sizeof(uint32_t), false, maxValue, minValue);
 
     SumCustomTilingData *tiling = reinterpret_cast<SumCustomTilingData *>(tilingBuf);
 
-    auto paddingFunc = [](const uint32_t N, const uint32_t typeSize) -> uint32_t {
+    auto paddingFunc = [](const uint32_t n, const uint32_t typeSize) -> uint32_t {
         if (typeSize == 0) {
             return 0;
         }
-- 
Gitee


From 15c27ec41fa47c3a37ff35bf216ba94d2cee95b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AF=9B=E6=B5=B7=E5=B9=B3?= <maohaiping@huawei.com>
Date: Wed, 29 Oct 2025 11:37:17 +0800
Subject: [PATCH 7/7] fix: compile error

---
 examples/reduce/sum/op_host/sum_custom_tiling.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/reduce/sum/op_host/sum_custom_tiling.cpp b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
index 12c654aa..5e3cbb4c 100644
--- a/examples/reduce/sum/op_host/sum_custom_tiling.cpp
+++ b/examples/reduce/sum/op_host/sum_custom_tiling.cpp
@@ -16,24 +16,24 @@ namespace {
 constexpr uint32_t PADDING_BYTE = 32U;
 }
 
-void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t N) {
+void GenerateTilingData(uint8_t *tilingBuf, const uint32_t m, const uint32_t n) {
     uint32_t minValue = 0;
     uint32_t maxValue = 0;
 
-    AscendC::GetSumMaxMinTmpSize(N, sizeof(uint32_t), false, maxValue, minValue);
+    AscendC::GetSumMaxMinTmpSize(n, sizeof(uint32_t), false, maxValue, minValue);
 
     SumCustomTilingData *tiling = reinterpret_cast<SumCustomTilingData *>(tilingBuf);
 
-    auto paddingFunc = [](const uint32_t n, const uint32_t typeSize) -> uint32_t {
+    auto paddingFunc = [](const uint32_t n1, const uint32_t typeSize) -> uint32_t {
         if (typeSize == 0) {
             return 0;
         }
-        return (n * typeSize + PADDING_BYTE - 1U) / PADDING_BYTE * PADDING_BYTE / typeSize;
+        return (n1 * typeSize + PADDING_BYTE - 1U) / PADDING_BYTE * PADDING_BYTE / typeSize;
     };
 
     tiling->outter = m;
-    tiling->inner = paddingFunc(N, sizeof(uint32_t));
-    tiling->n = N;
+    tiling->inner = paddingFunc(n, sizeof(uint32_t));
+    tiling->n = n;
     tiling->tmpBufSize = minValue;
 
     tiling->out_inner = paddingFunc(m, sizeof(uint32_t));
-- 
Gitee