From de06a442088247fb1038a1e008415824fb6f8d8e Mon Sep 17 00:00:00 2001
From: lhz <hz_lin8@163.com>
Date: Sun, 28 Sep 2025 10:47:22 +0800
Subject: [PATCH] Add loop sve mode optimization

---
 0404-Add-loop-sve-mode-optimization.patch | 357 ++++++++++++++++++++++
 gcc.spec                                  |   8 +-
 2 files changed, 364 insertions(+), 1 deletion(-)
 create mode 100644 0404-Add-loop-sve-mode-optimization.patch

diff --git a/0404-Add-loop-sve-mode-optimization.patch b/0404-Add-loop-sve-mode-optimization.patch
new file mode 100644
index 0000000..bf8fde7
--- /dev/null
+++ b/0404-Add-loop-sve-mode-optimization.patch
@@ -0,0 +1,357 @@
+diff --git a/gcc/common.opt b/gcc/common.opt
+index ed4696b7a..219eb2b69 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1134,6 +1134,10 @@ floop-crc
+ Common Var(flag_loop_crc) Optimization
+ Do the loop crc conversion.
+ 
++floop-sve-mode-opt
++Common Var(flag_loop_sve_mode_opt) Optimization
++Optimization of adding sve mode for some loop
++
+ fauto-inc-dec
+ Common Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 52cac0b82..394c4d1e3 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -19123,6 +19123,7 @@ override_C_optimize_options (struct gcc_options *opts)
+   opts->x_flag_ipa_prefetch = 1;
+   opts->x_flag_ipa_ic = 1;
+   opts->x_flag_cmlt_arith = 1;
++  opts->x_flag_loop_sve_mode_opt = 1; 
+ }
+ 
+ /* Check whether in CPP language or LTO with only CPP language.  */
+diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c
+new file mode 100644
+index 000000000..4beca21df
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv8-a+sve" } */
++#include<stdint.h>
++
++void foo(unsigned int* dest, uint8_t* src, unsigned int len, unsigned int* mul)
++{
++  for(int i = 0; i < len; ++i)
++    dest[i] = src[i] * (*mul) + 8;
++}
++
++/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c
+new file mode 100644
+index 000000000..60941ec5c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv8-a+sve" } */
++#include<stdint.h>
++
++void foo(uint8_t* dest, uint8_t* src, unsigned int len, unsigned int* mul)
++{
++  for(int i = 0; i < len; ++i)
++    dest[i] = src[i] * (*mul) + 8;
++}
++
++/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c
+new file mode 100644
+index 000000000..5075f6ce8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv8-a+sve" } */
++#include<stdint.h>
++
++void foo(unsigned int* dest, uint8_t* src, unsigned int len)
++{
++  for(int i = 0; i < len; ++i)
++    dest[i] = src[i] + 8;
++}
++
++/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c
+new file mode 100644
+index 000000000..56af86846
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv8-a+sve" } */
++#include<stdint.h>
++
++void foo(unsigned int* dest, uint8_t* src, unsigned int len, unsigned int* mul)
++{
++  for(int i = 0; i < len; ++i)
++    dest[i] = src[i] * (*mul);
++}
++
++/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c
+new file mode 100644
+index 000000000..a6c0e23be
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv8-a+sve" } */
++#include<stdint.h>
++
++void foo(unsigned int* dest, uint8_t* src, unsigned int len, 
++    unsigned int* mul, unsigned int* append)
++{
++  for(int i = 0; i < len; ++i)
++    dest[i] = ((unsigned int)src[i]) * (*mul) + (*append);
++}
++
++/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c
+new file mode 100644
+index 000000000..545cc69cc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=armv8-a+sve" } */
++#include<stdint.h>
++
++void foo(unsigned int* dest, uint8_t* src, unsigned int len,
++        unsigned int* mul, unsigned int* append)
++{
++  for(int i = 0; i < len; ++i) {
++    dest[i] = src[i] * (*mul);
++    dest[i] -= 8;
++    dest[i] += *append;
++  }
++}
++
++/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp
+index ae5212411..a61c37a53 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect.exp
++++ b/gcc/testsuite/gcc.dg/vect/vect.exp
+@@ -124,6 +124,13 @@ et-dg-runtest dg-runtest [lsort \
+ 	[glob -nocomplain $srcdir/$subdir/transpose-*.\[cS\]]] \
+ 	"" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3"
+ 
++# -floop-sve-mode-opt tests
++set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
++lappend VECT_SLP_CFLAGS "-floop-sve-mode-opt"
++et-dg-runtest dg-runtest [lsort \
++	[glob -nocomplain $srcdir/$subdir/sve-mode-opt*.\[cS\]]] \
++	"" "-floop-sve-mode-opt -fdump-tree-vect-details -O3"
++
+ # -ffast-math tests
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+ lappend DEFAULT_VECTCFLAGS "-ffast-math"
+diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
+index 7f7577951..a01404087 100644
+--- a/gcc/tree-vect-loop.cc
++++ b/gcc/tree-vect-loop.cc
+@@ -2958,6 +2958,161 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
+   return opt_loop_vec_info::success (loop_vinfo);
+ }
+ 
++bool load_by_specific_width_p (gimple *stmt, unsigned int width)
++{
++  if (!is_gimple_assign (stmt))
++    return false;
++  
++  if (gimple_assign_rhs_code (stmt) != MEM_REF
++    && gimple_assign_rhs_code (stmt) != COMPONENT_REF)
++    return false;
++  
++  tree rhs = gimple_assign_rhs1 (stmt);
++  tree type = TREE_TYPE (rhs);
++  return TYPE_PRECISION (type) == width;
++}
++
++bool converse_by_specific_width_p (gimple *stmt,
++  unsigned int w1, unsigned int w2)
++{
++  if (!is_gimple_assign (stmt))
++    return false;
++  
++  enum tree_code code = gimple_assign_rhs_code (stmt);
++  
++  if (code != NOP_EXPR && code != CONVERT_EXPR)
++    return false;
++  
++  tree rhs = gimple_assign_rhs1 (stmt);
++  tree lhs = gimple_assign_lhs (stmt);
++  tree rhs_type = TREE_TYPE (rhs);
++  tree lhs_type = TREE_TYPE (lhs);
++  bool rhs_is_width1 = INTEGRAL_TYPE_P (rhs_type)
++    && TYPE_PRECISION (rhs_type) == w1;
++  bool lhs_is_width2 = INTEGRAL_TYPE_P (lhs_type)
++    && TYPE_PRECISION (lhs_type) == w2;
++  
++  return rhs_is_width1 && lhs_is_width2;
++}
++
++bool multiply_by_specific_nodes_p (gimple *stmt, tree n1, tree n2)
++{
++  if (!is_gimple_assign (stmt))
++    return false;
++  
++  enum tree_code code = gimple_assign_rhs_code (stmt);
++
++  if (code != MULT_EXPR)
++    return false;
++  
++  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
++  
++  if (TREE_CODE (type) != INTEGER_TYPE)
++    return false;
++  
++  tree mul1 = gimple_assign_rhs1 (stmt);
++  tree mul2 = gimple_assign_rhs2 (stmt);
++
++  return ((mul1 == n1 && mul2 == n2)
++    || (mul1 == n2 && mul2 == n1));
++}
++
++bool plus_by_specific_node_p (gimple *stmt, tree n1)
++{
++  if (!is_gimple_assign (stmt))
++    return false;
++  
++  enum tree_code code = gimple_assign_rhs_code (stmt);
++
++  if (code != PLUS_EXPR)
++    return false;
++  
++  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
++  
++  if (TREE_CODE (type) != INTEGER_TYPE)
++    return false;
++  
++  tree addend1 = gimple_assign_rhs1 (stmt);
++  tree addend2 = gimple_assign_rhs2 (stmt);
++
++  return ((addend1 == n1) || (addend2 == n1));
++}
++
++bool converse_and_multiply_p (loop_p loop, gimple_stmt_iterator gsi)
++{
++  gimple *stmt = gsi_stmt (gsi);
++  if(!load_by_specific_width_p (stmt, 8))
++    return false;
++  
++  tree load1_lhs = gimple_assign_lhs (stmt);
++  tree conv_lhs = NULL_TREE, load2_lhs = NULL_TREE, mul_lhs = NULL_TREE;
++
++  while (!gsi_end_p (gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (converse_by_specific_width_p (stmt, 8, 32))
++	{
++	  if (load1_lhs == gimple_assign_rhs1 (stmt))
++	    break;
++	    }
++      gsi_next (&gsi);
++    }
++  if (gsi_end_p (gsi))
++    return false;
++
++  conv_lhs = gimple_assign_lhs (stmt);
++
++  while (!gsi_end_p (gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (load_by_specific_width_p (stmt, 32))
++	{
++	  load2_lhs = gimple_assign_lhs (stmt);
++	  break;
++	}
++      gsi_next (&gsi);
++    }
++  if (gsi_end_p (gsi))
++    return false;
++  
++  while (!gsi_end_p (gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (multiply_by_specific_nodes_p (stmt, conv_lhs, load2_lhs))
++	{
++	  mul_lhs = gimple_assign_lhs (stmt);
++	  break;
++	}
++      gsi_next (&gsi);
++    }
++  if (gsi_end_p (gsi))
++    return false;
++
++  while (!gsi_end_p (gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (plus_by_specific_node_p (stmt, mul_lhs))
++	break;
++      gsi_next (&gsi);
++    }
++
++  return !gsi_end_p (gsi);
++}
++
++bool sve_mode_opt_analyze_loop (loop_p loop)
++{
++  basic_block *bbs = get_loop_body (loop);
++  for (int i = 0; i < loop->num_nodes; i++) {
++    basic_block bb = bbs[i];
++    for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) {
++      if (converse_and_multiply_p (loop, gsi))
++	return true;
++    }
++  }
++  
++  return false;
++}
++
+ /* Function vect_analyze_loop.
+ 
+    Apply a set of analyses on LOOP, and create a loop_vec_info struct
+@@ -3007,10 +3162,29 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared,
+   auto_vector_modes vector_modes;
+   /* Autodetect first vector size we try.  */
+   vector_modes.safe_push (VOIDmode);
+-  unsigned int autovec_flags
+-    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
++
++#if !defined (CROSS_DIRECTORY_STRUCTURE) && defined (__aarch64__)
++  bool sve_chance = false;
++  if (flag_loop_sve_mode_opt && TARGET_SVE
++    && targetm.vector_mode_supported_p (VNx4QImode)
++    && sve_mode_opt_analyze_loop (loop))
++    {
++      if (dump_enabled_p ()) 
++	dump_printf (MSG_NOTE, "Loop sve mode optimization success\n");
++      sve_chance = true;
++      vector_modes.safe_push (VNx4QImode);
++    }
++#endif
++
++  unsigned int autovec_flags = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
+ 						    loop->simdlen != 0);
+-  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
++
++#if !defined (CROSS_DIRECTORY_STRUCTURE) && defined (__aarch64__)
++  if (sve_chance)
++    autovec_flags |= VECT_COMPARE_COSTS;
++#endif
++
++  bool  pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
+ 			     && !unlimited_cost_model (loop));
+   machine_mode autodetected_vector_mode = VOIDmode;
+   opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
diff --git a/gcc.spec b/gcc.spec
index 50bab7d..f4a46e9 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 100
+%global gcc_release 101
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -513,6 +513,7 @@ Patch400: 0400-c-fix-unsigned-__int128_t-semantics-PR108099.patch
 Patch401: 0401-testsuite-Fix-up-g-.dg-ext-int128-8.C-testcase-PR109.patch
 Patch402: 0402-c-fix-unsigned-typedef-name-extension-PR108099.patch
 Patch403: 0403-FIX-aarch64-align-arch-name-offset-in-aarch64-_core.patch
+Patch404: 0404-Add-loop-sve-mode-optimization.patch
 
 # Part 1001-1999
 %ifarch sw_64
@@ -1702,6 +1703,7 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch -P401 -p1
 %patch -P402 -p1
 %patch -P403 -p1
+%patch -P404 -p1
 
 %ifarch sw_64
 %patch -P1001 -p1
@@ -4329,6 +4331,10 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Sun Sep 28 2025 linhouzhong <hz_lin8@163.com> - 12.3.1-101
+- Type: Sync
+- DESC: Sync patches from openeuler/gcc.
+
 * Mon Sep 22 2025 Cutie Deng <dengzhiren2@huawei.com> - 12.3.1-100
 - Type: Bugfix
 - DESC: Correct CPU architecture info display for aarch64 -march=native
-- 
Gitee