From de06a442088247fb1038a1e008415824fb6f8d8e Mon Sep 17 00:00:00 2001 From: lhz Date: Sun, 28 Sep 2025 10:47:22 +0800 Subject: [PATCH] Add loop sve mode optimization --- 0404-Add-loop-sve-mode-optimization.patch | 357 ++++++++++++++++++++++ gcc.spec | 8 +- 2 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 0404-Add-loop-sve-mode-optimization.patch diff --git a/0404-Add-loop-sve-mode-optimization.patch b/0404-Add-loop-sve-mode-optimization.patch new file mode 100644 index 0000000..bf8fde7 --- /dev/null +++ b/0404-Add-loop-sve-mode-optimization.patch @@ -0,0 +1,357 @@ +diff --git a/gcc/common.opt b/gcc/common.opt +index ed4696b7a..219eb2b69 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1134,6 +1134,10 @@ floop-crc + Common Var(flag_loop_crc) Optimization + Do the loop crc conversion. + ++floop-sve-mode-opt ++Common Var(flag_loop_sve_mode_opt) Optimization ++Optimization of adding sve mode for some loop ++ + fauto-inc-dec + Common Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 52cac0b82..394c4d1e3 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -19123,6 +19123,7 @@ override_C_optimize_options (struct gcc_options *opts) + opts->x_flag_ipa_prefetch = 1; + opts->x_flag_ipa_ic = 1; + opts->x_flag_cmlt_arith = 1; ++ opts->x_flag_loop_sve_mode_opt = 1; + } + + /* Check whether in CPP language or LTO with only CPP language. */ +diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c +new file mode 100644 +index 000000000..4beca21df +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8-a+sve" } */ ++#include ++ ++void foo(unsigned int* dest, uint8_t* src, unsigned int len, unsigned int* mul) ++{ ++ for(int i = 0; i < len; ++i) ++ dest[i] = src[i] * (*mul) + 8; ++} ++ ++/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c +new file mode 100644 +index 000000000..60941ec5c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8-a+sve" } */ ++#include ++ ++void foo(uint8_t* dest, uint8_t* src, unsigned int len, unsigned int* mul) ++{ ++ for(int i = 0; i < len; ++i) ++ dest[i] = src[i] * (*mul) + 8; ++} ++ ++/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c +new file mode 100644 +index 000000000..5075f6ce8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8-a+sve" } */ ++#include ++ ++void foo(unsigned int* dest, uint8_t* src, unsigned int len) ++{ ++ for(int i = 0; i < len; ++i) ++ dest[i] = src[i] + 8; ++} ++ ++/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c +new file mode 100644 +index 000000000..56af86846 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8-a+sve" } */ ++#include ++ ++void foo(unsigned int* dest, uint8_t* src, unsigned int len, unsigned int* mul) ++{ ++ for(int i = 0; i < len; ++i) ++ dest[i] = src[i] * (*mul); ++} ++ ++/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c +new file mode 100644 +index 000000000..a6c0e23be +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8-a+sve" } */ ++#include ++ ++void foo(unsigned int* dest, uint8_t* src, unsigned int len, ++ unsigned int* mul, unsigned int* append) ++{ ++ for(int i = 0; i < len; ++i) ++ dest[i] = ((unsigned int)src[i]) * (*mul) + (*append); ++} ++ ++/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c +new file mode 100644 +index 000000000..545cc69cc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8-a+sve" } */ ++#include ++ ++void foo(unsigned int* dest, uint8_t* src, unsigned int len, ++ unsigned int* mul, unsigned int* append) ++{ ++ for(int i = 0; i < len; ++i) { ++ dest[i] = src[i] * (*mul); ++ dest[i] -= 8; ++ dest[i] += *append; ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp +index ae5212411..a61c37a53 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect.exp ++++ b/gcc/testsuite/gcc.dg/vect/vect.exp +@@ -124,6 +124,13 @@ et-dg-runtest dg-runtest [lsort \ + [glob -nocomplain $srcdir/$subdir/transpose-*.\[cS\]]] \ + "" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3" + ++# -floop-sve-mode-opt tests ++set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS ++lappend VECT_SLP_CFLAGS "-floop-sve-mode-opt" ++et-dg-runtest dg-runtest [lsort \ ++ [glob -nocomplain $srcdir/$subdir/sve-mode-opt*.\[cS\]]] \ ++ "" "-floop-sve-mode-opt -fdump-tree-vect-details -O3" ++ + # -ffast-math tests + set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS + lappend DEFAULT_VECTCFLAGS "-ffast-math" +diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc +index 7f7577951..a01404087 100644 +--- a/gcc/tree-vect-loop.cc ++++ b/gcc/tree-vect-loop.cc +@@ -2958,6 +2958,161 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, + return opt_loop_vec_info::success (loop_vinfo); + } + ++bool load_by_specific_width_p (gimple *stmt, unsigned int width) ++{ ++ if (!is_gimple_assign (stmt)) ++ return false; ++ ++ if (gimple_assign_rhs_code (stmt) != MEM_REF ++ && gimple_assign_rhs_code (stmt) != COMPONENT_REF) ++ return false; ++ ++ tree rhs = gimple_assign_rhs1 (stmt); ++ tree type = TREE_TYPE (rhs); ++ return TYPE_PRECISION (type) == width; ++} ++ ++bool converse_by_specific_width_p (gimple *stmt, ++ unsigned int w1, unsigned int w2) ++{ ++ if (!is_gimple_assign (stmt)) ++ return false; ++ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ ++ if (code != NOP_EXPR && code != CONVERT_EXPR) ++ return false; ++ ++ tree rhs = gimple_assign_rhs1 (stmt); ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs_type = TREE_TYPE (rhs); ++ tree lhs_type = TREE_TYPE (lhs); ++ bool rhs_is_width1 = INTEGRAL_TYPE_P (rhs_type) ++ && TYPE_PRECISION (rhs_type) == w1; ++ bool lhs_is_width2 = INTEGRAL_TYPE_P (lhs_type) ++ && TYPE_PRECISION (lhs_type) == w2; ++ ++ return rhs_is_width1 && lhs_is_width2; ++} ++ ++bool multiply_by_specific_nodes_p (gimple *stmt, tree n1, tree n2) ++{ ++ if (!is_gimple_assign (stmt)) ++ return false; ++ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ ++ if (code != MULT_EXPR) ++ return false; ++ ++ tree type = TREE_TYPE (gimple_assign_lhs (stmt)); ++ ++ if (TREE_CODE (type) != INTEGER_TYPE) ++ return false; ++ ++ tree mul1 = gimple_assign_rhs1 (stmt); ++ tree mul2 = gimple_assign_rhs2 (stmt); ++ ++ return ((mul1 == n1 && mul2 == n2) ++ || (mul1 == n2 && mul2 == n1)); ++} ++ ++bool plus_by_specific_node_p (gimple *stmt, tree n1) ++{ ++ if (!is_gimple_assign (stmt)) ++ return false; ++ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ ++ if (code != PLUS_EXPR) ++ return false; ++ ++ tree type = TREE_TYPE (gimple_assign_lhs (stmt)); ++ ++ if (TREE_CODE (type) != INTEGER_TYPE) ++ return false; ++ ++ tree addend1 = gimple_assign_rhs1 (stmt); ++ tree addend2 = gimple_assign_rhs2 (stmt); ++ ++ return ((addend1 == n1) || (addend2 == n1)); ++} ++ ++bool converse_and_multiply_p (loop_p loop, gimple_stmt_iterator gsi) ++{ ++ gimple *stmt = gsi_stmt (gsi); ++ if(!load_by_specific_width_p (stmt, 8)) ++ return false; ++ ++ tree load1_lhs = gimple_assign_lhs (stmt); ++ tree conv_lhs = NULL_TREE, load2_lhs = NULL_TREE, mul_lhs = NULL_TREE; ++ ++ while (!gsi_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (converse_by_specific_width_p (stmt, 8, 32)) ++ { ++ if (load1_lhs == gimple_assign_rhs1 (stmt)) ++ break; ++ } ++ gsi_next (&gsi); ++ } ++ if (gsi_end_p (gsi)) ++ return false; ++ ++ conv_lhs = gimple_assign_lhs (stmt); ++ ++ while (!gsi_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (load_by_specific_width_p (stmt, 32)) ++ { ++ load2_lhs = gimple_assign_lhs (stmt); ++ break; ++ } ++ gsi_next (&gsi); ++ } ++ if (gsi_end_p (gsi)) ++ return false; ++ ++ while (!gsi_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (multiply_by_specific_nodes_p (stmt, conv_lhs, load2_lhs)) ++ { ++ mul_lhs = gimple_assign_lhs (stmt); ++ break; ++ } ++ gsi_next (&gsi); ++ } ++ if (gsi_end_p (gsi)) ++ return false; ++ ++ while (!gsi_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (plus_by_specific_node_p (stmt, mul_lhs)) ++ break; ++ gsi_next (&gsi); ++ } ++ ++ return !gsi_end_p (gsi); ++} ++ ++bool sve_mode_opt_analyze_loop (loop_p loop) ++{ ++ basic_block *bbs = get_loop_body (loop); ++ for (int i = 0; i < loop->num_nodes; i++) { ++ basic_block bb = bbs[i]; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { ++ if (converse_and_multiply_p (loop, gsi)) ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + /* Function vect_analyze_loop. + + Apply a set of analyses on LOOP, and create a loop_vec_info struct +@@ -3007,10 +3162,29 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared, + auto_vector_modes vector_modes; + /* Autodetect first vector size we try. */ + vector_modes.safe_push (VOIDmode); +- unsigned int autovec_flags +- = targetm.vectorize.autovectorize_vector_modes (&vector_modes, ++ ++#if !defined (CROSS_DIRECTORY_STRUCTURE) && defined (__aarch64__) ++ bool sve_chance = false; ++ if (flag_loop_sve_mode_opt && TARGET_SVE ++ && targetm.vector_mode_supported_p (VNx4QImode) ++ && sve_mode_opt_analyze_loop (loop)) ++ { ++ if (dump_enabled_p ()) ++ dump_printf (MSG_NOTE, "Loop sve mode optimization success\n"); ++ sve_chance = true; ++ vector_modes.safe_push (VNx4QImode); ++ } ++#endif ++ ++ unsigned int autovec_flags = targetm.vectorize.autovectorize_vector_modes (&vector_modes, + loop->simdlen != 0); +- bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) ++ ++#if !defined (CROSS_DIRECTORY_STRUCTURE) && defined (__aarch64__) ++ if (sve_chance) ++ autovec_flags |= VECT_COMPARE_COSTS; ++#endif ++ ++ bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) + && !unlimited_cost_model (loop)); + machine_mode autodetected_vector_mode = VOIDmode; + opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); diff --git a/gcc.spec b/gcc.spec index 50bab7d..f4a46e9 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 100 +%global gcc_release 101 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -513,6 +513,7 @@ Patch400: 0400-c-fix-unsigned-__int128_t-semantics-PR108099.patch Patch401: 0401-testsuite-Fix-up-g-.dg-ext-int128-8.C-testcase-PR109.patch Patch402: 0402-c-fix-unsigned-typedef-name-extension-PR108099.patch Patch403: 0403-FIX-aarch64-align-arch-name-offset-in-aarch64-_core.patch +Patch404: 0404-Add-loop-sve-mode-optimization.patch # Part 1001-1999 %ifarch sw_64 @@ -1702,6 +1703,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P401 -p1 %patch -P402 -p1 %patch -P403 -p1 +%patch -P404 -p1 %ifarch sw_64 %patch -P1001 -p1 @@ -4329,6 +4331,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Sun Sep 28 2025 linhouzhong - 12.3.1-101 +- Type: Sync +- DESC: Sync patches from openeuler/gcc. + * Mon Sep 22 2025 Cutie Deng - 12.3.1-100 - Type: Bugfix - DESC: Correct CPU architecture info display for aarch64 -march=native -- Gitee