加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0014-Array-widen-compare-Add-a-new-optimization-for-array.patch 61.27 KB
一键复制 编辑 原始数据 按行查看 历史
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981
From 5ef5f6c4ae806f56ff81450c759f36d59b5b23db Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Sat, 29 Jul 2023 17:45:01 +0800
Subject: [PATCH 14/22] [Array-widen-compare] Add a new optimization for array
comparison scenarios
Add option farray-widen-compare.
For an array pointer whose element is a single-byte type,
by changing the pointer type to a long-byte type, the elements
can be combined and compared after loading.
---
gcc/Makefile.in | 1 +
gcc/common.opt | 5 +
gcc/doc/invoke.texi | 13 +-
gcc/passes.def | 1 +
.../gcc.dg/tree-ssa/awiden-compare-1.c | 19 +
.../gcc.dg/tree-ssa/awiden-compare-2.c | 90 +
.../gcc.dg/tree-ssa/awiden-compare-3.c | 22 +
.../gcc.dg/tree-ssa/awiden-compare-4.c | 22 +
.../gcc.dg/tree-ssa/awiden-compare-5.c | 19 +
.../gcc.dg/tree-ssa/awiden-compare-6.c | 19 +
.../gcc.dg/tree-ssa/awiden-compare-7.c | 22 +
.../gcc.dg/tree-ssa/awiden-compare-8.c | 24 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
gcc/tree-ssa-loop-array-widen-compare.cc | 1555 +++++++++++++++++
15 files changed, 1813 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
create mode 100644 gcc/tree-ssa-loop-array-widen-compare.cc
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31ff95500..0aabc6ea3 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1653,6 +1653,7 @@ OBJS = \
tree-ssa-loop-ivopts.o \
tree-ssa-loop-manip.o \
tree-ssa-loop-niter.o \
+ tree-ssa-loop-array-widen-compare.o \
tree-ssa-loop-prefetch.o \
tree-ssa-loop-split.o \
tree-ssa-loop-unswitch.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index e365a48bc..4d91ce8cf 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1116,6 +1116,11 @@ fasynchronous-unwind-tables
Common Var(flag_asynchronous_unwind_tables) Optimization
Generate unwind tables that are exact at each instruction boundary.
+farray-widen-compare
+Common Var(flag_array_widen_compare) Optimization
+Extends types for pointers to arrays to improve array comparsion performance.
+In some extreme situations this may result in unsafe behavior.
+
fauto-inc-dec
Common Var(flag_auto_inc_dec) Init(1) Optimization
Generate auto-inc/dec instructions.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ff8cd032f..a11e2c24b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -507,7 +507,7 @@ Objective-C and Objective-C++ Dialects}.
-falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol
-fno-allocation-dce -fallow-store-data-races @gol
-fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol
--fauto-inc-dec -fbranch-probabilities @gol
+-farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol
-fcaller-saves @gol
-fcombine-stack-adjustments -fconserve-stack @gol
-fcompare-elim -fcprop-registers -fcrossjumping @gol
@@ -11387,6 +11387,17 @@ This pass is always skipped on architectures that do not have
instructions to support this. Enabled by default at @option{-O1} and
higher on architectures that support this.
+@item -farray-widen-compare
+@opindex farray-widen-compare
+In the narrow-byte array comparison scenario, the types of pointers
+pointing to array are extended so that elements of multiple bytes can
+be loaded at a time when a wide type is used to dereference an array,
+thereby improving the performance of this comparison scenario. In some
+extreme situations this may result in unsafe behavior.
+
+This option may generate better or worse code; results are highly dependent
+on the structure of loops within the source code.
+
@item -fdce
@opindex fdce
Perform dead code elimination (DCE) on RTL@.
diff --git a/gcc/passes.def b/gcc/passes.def
index 375d3d62d..8dbb7983e 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_dse);
NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
NEXT_PASS (pass_phiopt, true /* early_p */);
+ NEXT_PASS (pass_array_widen_compare);
NEXT_PASS (pass_tail_recursion);
NEXT_PASS (pass_if_to_switch);
NEXT_PASS (pass_convert_switch);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
new file mode 100644
index 000000000..e18ef5ec1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
new file mode 100644
index 000000000..f4b20b43c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
@@ -0,0 +1,90 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define EMPTY_HASH_VALUE 0
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+#define true 1
+
+typedef struct {
+ uint32_t len;
+ uint32_t dist;
+} lzma_match;
+
+
+lzma_match *
+func (
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t depth,
+ uint32_t *const son,
+ const uint32_t cyclic_pos,
+ const uint32_t cyclic_size,
+ lzma_match *matches,
+ uint32_t len_best)
+{
+ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+ uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+ uint32_t len0 = 0;
+ uint32_t len1 = 0;
+
+ while (true)
+ {
+ const uint32_t delta = pos - cur_match;
+ if (depth-- == 0 || delta >= cyclic_size)
+ {
+ *ptr0 = EMPTY_HASH_VALUE;
+ *ptr1 = EMPTY_HASH_VALUE;
+ return matches;
+ }
+
+ uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1);
+
+ const uint8_t *const pb = cur -delta;
+ uint32_t len = my_min(len0, len1);
+
+ if (pb[len] == cur[len])
+ {
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+
+ if (len_best < len)
+ {
+ len_best = len;
+ matches->len = len;
+ matches->dist = delta - 1;
+ ++matches;
+
+ if (len == len_limit)
+ {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return matches;
+ }
+ }
+ }
+
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = cur_match;
+ ptr1 = pair + 1;
+ cur_match = *ptr1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = cur_match;
+ ptr0 = pair;
+ cur_match = *ptr0;
+ len0 = len;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
new file mode 100644
index 000000000..86f5e7a1e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len != len_limit)
+ {
+ if (pb[len] != cur[len])
+ break;
+ len = len + 1;
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
new file mode 100644
index 000000000..d66558699
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len != len_limit)
+ {
+ if (pb[len] != cur[len])
+ break;
+ len = len + 2;
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
new file mode 100644
index 000000000..e3e12bca4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (++len != len_limit)
+ if (pb[len] != cur[len-1])
+ break;
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
new file mode 100644
index 000000000..b8500735e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len++ != len_limit)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
new file mode 100644
index 000000000..977bf5685
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len != len_limit)
+ {
+ len = len + 1;
+ if (pb[len] != cur[len])
+ break;
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
new file mode 100644
index 000000000..386784c92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (++len != len_limit)
+ {
+ if (pb[len] != cur[len])
+ {
+ len = len - 1;
+ break;
+ }
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 2dae5e1c7..794b8017d 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -216,6 +216,7 @@ DEFTIMEVAR (TV_TREE_NRV , "tree NRV optimization")
DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies")
DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier")
DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier")
+DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare")
DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion")
DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering")
DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 606d1d60b..55ee2fe7f 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -453,6 +453,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop-array-widen-compare.cc b/gcc/tree-ssa-loop-array-widen-compare.cc
new file mode 100644
index 000000000..ba6170fa0
--- /dev/null
+++ b/gcc/tree-ssa-loop-array-widen-compare.cc
@@ -0,0 +1,1555 @@
+/* Array widen compare.
+ Copyright (C) 2022-2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "gimple-ssa.h"
+#include "tree-pretty-print.h"
+#include "fold-const.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "ssa.h"
+#include "tree-into-ssa.h"
+#include "cfganal.h"
+#include "cfgloop.h"
+#include "gimple-pretty-print.h"
+#include "tree-cfg.h"
+#include "cgraph.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "gimple-fold.h"
+
+/* This pass handles scenarios similar to the following:
+
+ uint32_t
+ func (uint32_t len0, uint32_t len1, const uint32_t len_limit,
+ const uint8_t *const pb, const uint8_t *const cur)
+ {
+ uint32_t len = my_min (len0, len1);
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+ }
+
+ Features of this type of loop:
+ 1) the loop has two exits;
+ 2) One of the exits comes from the comparison result of the array;
+
+ From the source code point of view, the pass completes the conversion of the
+ above scenario into:
+
+ uint32_t
+ func (uint32_t len0, uint32_t len1, const uint32_t len_limit,
+ const uint8_t *const pb, const uint8_t *const cur)
+ {
+ uint32_t len = my_min (len0, len1);
+ // align_loop
+ for(++len; len + sizeof(uint64_t) <= len_limit; len += sizeof (uint64_t))
+ {
+ uint64_t a = *((uint64_t*)(cur+len));
+ uint64_t b = *((uint64_t*)(pb+len));
+ if (a != b)
+ {
+ int lz = __builtin_ctzll (a ^ b);
+ len += lz / 8;
+ return len;
+ }
+ }
+ // epilogue_loop
+ for (;len != len_limit; ++len)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+ }
+
+ This pass is to complete the conversion of such scenarios from the internal
+ perspective of the compiler:
+ 1) determine_loop_form: The function completes the screening of such
+ scenarios;
+ 2) convert_to_new_loop: The function completes the conversion of
+ origin_loop to new loops, and removes origin_loop;
+ 3) origin_loop_info: The structure is used to record important information
+ of origin_loop: such as loop exit, growth step size
+ of loop induction variable, initial value
+ of induction variable, etc;
+ 4) create_new_loops: The function is used as the key content of the pass
+ to complete the creation of new loops. */
+
+/* The useful information of origin loop. */
+
+struct origin_loop_info
+{
+ tree base; /* The initial index of the array in the old loop. */
+ tree limit; /* The limit index of the array in the old loop. */
+ tree arr1; /* Array 1 in the old loop. */
+ tree arr2; /* Array 2 in the old loop. */
+ edge entry_edge; /* The edge into the old loop. */
+ basic_block exit_bb1;
+ basic_block exit_bb2;
+ edge exit_e1;
+ edge exit_e2;
+ gimple *cond_stmt1;
+ gimple *cond_stmt2;
+ gimple *update_stmt;
+ bool exist_prolog_assgin;
+ /* Whether the marker has an initial value assigned
+ to the array index. */
+ unsigned HOST_WIDE_INT step;
+ /* The growth step of the loop induction variable. */
+};
+
+typedef struct origin_loop_info origin_loop_info;
+
+static origin_loop_info origin_loop;
+hash_map <basic_block, tree> defs_map;
+
+/* Dump the bb information in a loop. */
+
+static void
+dump_loop_bb (struct loop *loop)
+{
+ basic_block *body = get_loop_body_in_dom_order (loop);
+ basic_block bb = NULL;
+
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+ if (bb->loop_father != loop)
+ {
+ continue;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i);
+ gimple_dump_bb (dump_file, bb, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+ }
+ free (body);
+}
+
+/* Return true if the loop has precisely one backedge. */
+
+static bool
+loop_single_backedge_p (class loop *loop)
+{
+ basic_block latch = loop->latch;
+ if (!single_succ_p (latch))
+ return false;
+
+ edge e = single_succ_edge (latch);
+ edge backedge = find_edge (latch, loop->header);
+
+ if (e != backedge)
+ return false;
+
+ return true;
+}
+
+/* Return true if the loop has precisely one preheader BB. */
+
+static bool
+loop_single_preheader_bb (class loop *loop)
+{
+ basic_block header = loop->header;
+ if (EDGE_COUNT (header->preds) != 2)
+ return false;
+
+ edge e1 = EDGE_PRED (header, 0);
+ edge e2 = EDGE_PRED (header, 1);
+
+ if ((e1->src == loop->latch && e2->src->loop_father != loop)
+ || (e2->src == loop->latch && e1->src->loop_father != loop))
+ return true;
+
+ return false;
+}
+
+/* Initialize the origin_loop structure. */
+static void
+init_origin_loop_structure ()
+{
+ origin_loop.base = NULL;
+ origin_loop.limit = NULL;
+ origin_loop.arr1 = NULL;
+ origin_loop.arr2 = NULL;
+ origin_loop.exit_e1 = NULL;
+ origin_loop.exit_e2 = NULL;
+ origin_loop.exit_bb1 = NULL;
+ origin_loop.exit_bb2 =NULL;
+ origin_loop.entry_edge = NULL;
+ origin_loop.cond_stmt1 = NULL;
+ origin_loop.cond_stmt2 = NULL;
+ origin_loop.update_stmt = NULL;
+ origin_loop.exist_prolog_assgin = false;
+ origin_loop.step = 0;
+}
+
+/* Get the edge that first entered the loop. */
+
+static edge
+get_loop_preheader_edge (class loop *loop)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, loop->header->preds)
+ if (e->src != loop->latch)
+ break;
+
+ if (!e)
+ {
+ gcc_assert (!loop_outer (loop));
+ return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ }
+
+ return e;
+}
+
+/* Make sure the exit condition stmt satisfies a specific form. */
+
+static bool
+check_cond_stmt (gimple *stmt)
+{
+ if (!stmt)
+ return false;
+ if (gimple_code (stmt) != GIMPLE_COND)
+ return false;
+
+ if (gimple_cond_code (stmt) != NE_EXPR && gimple_cond_code (stmt) != EQ_EXPR)
+ return false;
+
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+
+ /* The parameter that does not support the cond statement is not SSA_NAME.
+ eg: if (len_1 != 100). */
+ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ return true;
+}
+
+/* Record the exit information in the original loop including exit edge,
+ exit bb block, exit condition stmt,
+ eg: exit_eX origin_exit_bbX cond_stmtX. */
+
+static bool
+record_origin_loop_exit_info (class loop *loop)
+{
+ bool found = false;
+ edge e = NULL;
+ unsigned i = 0;
+ gimple *stmt;
+
+ if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL
+ || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL
+ || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL)
+ return false;
+
+ vec<edge> exit_edges = get_loop_exit_edges (loop);
+ if (exit_edges == vNULL)
+ return false;
+
+ if (exit_edges.length () != 2)
+ goto fail;
+
+ FOR_EACH_VEC_ELT (exit_edges, i, e)
+ {
+ if (e->src == loop->header)
+ {
+ origin_loop.exit_e1 = e;
+ origin_loop.exit_bb1 = e->dest;
+ stmt = gsi_stmt (gsi_last_bb (e->src));
+ if (check_cond_stmt (stmt))
+ origin_loop.cond_stmt1 = stmt;
+ }
+ else
+ {
+ origin_loop.exit_e2 = e;
+ origin_loop.exit_bb2 = e->dest;
+ stmt = gsi_stmt (gsi_last_bb (e->src));
+ if (check_cond_stmt (stmt))
+ origin_loop.cond_stmt2 = stmt;
+ }
+ }
+
+ if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL
+ && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL
+ && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL)
+ found = true;
+
+fail:
+ exit_edges.release ();
+ return found;
+}
+
+/* Returns true if t is SSA_NAME and user variable exists. */
+
+static bool
+ssa_name_var_p (tree t)
+{
+ if (!t || TREE_CODE (t) != SSA_NAME)
+ return false;
+ if (SSA_NAME_VAR (t))
+ return true;
+ return false;
+}
+
+/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */
+
+static bool
+same_ssa_name_var_p (tree t1, tree t2)
+{
+ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2))
+ return false;
+ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2))
+ return true;
+ return false;
+}
+
+/* Get origin loop induction variable upper bound. */
+
+static bool
+get_iv_upper_bound (gimple *stmt)
+{
+ if (origin_loop.limit != NULL)
+ return false;
+
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+
+ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE)
+ return false;
+
+ gimple *g = SSA_NAME_DEF_STMT (rhs);
+
+ /* TODO: Currently, the input restrictions on lhs and rhs are implemented
+ through PARM_DECL. We may consider releasing the restrictions later, and
+ we need to consider the overall adaptation scenario and adding test
+ cases. */
+ if (ssa_name_var_p (rhs) && TREE_CODE (SSA_NAME_VAR (rhs)) == PARM_DECL
+ && g && gimple_code (g) == GIMPLE_NOP
+ && (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) != PARM_DECL))
+ {
+ origin_loop.limit = rhs;
+ }
+ else
+ return false;
+
+ if (origin_loop.limit != NULL)
+ return true;
+
+ return false;
+}
+
+/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR,
+ rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is
+ INTEGER_CST. */
+
+static bool
+check_update_stmt (gimple *stmt)
+{
+ if (!stmt)
+ return false;
+
+ if (gimple_assign_rhs_code (stmt) == PLUS_EXPR)
+ {
+ tree rhs1 = gimple_assign_rhs1 (stmt);
+ tree rhs2 = gimple_assign_rhs2 (stmt);
+ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST
+ && same_ssa_name_var_p (rhs1, origin_loop.base))
+ {
+ origin_loop.step = tree_to_uhwi (rhs2);
+ if (origin_loop.step == 1)
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Get origin loop induction variable initial value. */
+
+static bool
+get_iv_base (gimple *stmt)
+{
+ tree lhs = gimple_cond_lhs (stmt);
+ if (origin_loop.base != NULL || origin_loop.update_stmt != NULL)
+ return false;
+
+ basic_block header = gimple_bb (stmt);
+
+ gphi_iterator gsi;
+ edge e;
+ edge_iterator ei;
+ tree iv_after;
+
+ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gphi *phi = gsi.phi ();
+ tree res = gimple_phi_result (phi);
+ if (!same_ssa_name_var_p (res, lhs))
+ continue;
+ tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge);
+ if (!same_ssa_name_var_p (base, lhs))
+ return false;
+ origin_loop.base = base;
+ FOR_EACH_EDGE (e, ei, header->preds)
+ {
+ if (e != origin_loop.entry_edge)
+ {
+ iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e);
+ gimple *update = SSA_NAME_DEF_STMT (iv_after);
+ if (!check_update_stmt (update))
+ return false;
+ origin_loop.update_stmt = update;
+ if (gimple_bb (update) == header && iv_after == lhs)
+ origin_loop.exist_prolog_assgin = true;
+ }
+ }
+ }
+
+ if (origin_loop.base != NULL && origin_loop.update_stmt != NULL)
+ return true;
+
+ return false;
+}
+
+/* Record the upper bound and initial value of the induction variable in the
+ original loop; When prolog_assign is present, make sure loop header is in
+ simple form; And the interpretation of prolog_assign is as follows:
+ eg: while (++len != limit)
+ ......
+ For such a loop, ++len will be processed before entering header_bb, and the
+ assign is regarded as the prolog_assign of the loop. */
+
+static bool
+record_origin_loop_header (class loop *loop)
+{
+ basic_block header = loop->header;
+
+ if (origin_loop.entry_edge != NULL || origin_loop.base != NULL
+ || origin_loop.update_stmt != NULL || origin_loop.limit != NULL)
+ return false;
+ origin_loop.entry_edge = get_loop_preheader_edge (loop);
+
+ gimple_stmt_iterator gsi;
+ gimple *stmt;
+
+ for (gsi = gsi_last_bb (header); !gsi_end_p (gsi); gsi_prev (&gsi))
+ {
+ stmt = gsi_stmt (gsi);
+ if (stmt && is_gimple_debug (stmt))
+ continue;
+ if (stmt && gimple_code (stmt) == GIMPLE_COND)
+ {
+ if (!get_iv_upper_bound (stmt))
+ return false;
+ if (!get_iv_base (stmt))
+ return false;
+ }
+ else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
+ {
+ if (stmt != origin_loop.update_stmt || !origin_loop.exist_prolog_assgin)
+ return false;
+ }
+ else
+ return false;
+ }
+
+ if (origin_loop.entry_edge != NULL && origin_loop.base != NULL
+ && origin_loop.update_stmt != NULL && origin_loop.limit != NULL)
+ return true;
+
+ return false;
+}
+
+/* When prolog_assign does not exist, make sure that update_stmt exists in the
+ loop latch, and its form is a specific form, eg:
+ len_2 = len_1 + 1. */
+
+static bool
+record_origin_loop_latch (class loop *loop)
+{
+ basic_block latch = loop->latch;
+ gimple_stmt_iterator gsi;
+ gimple *stmt;
+
+ gsi = gsi_start_bb (latch);
+
+ if (origin_loop.exist_prolog_assgin)
+ {
+ if (gsi_end_p (gsi))
+ return true;
+ }
+ else
+ {
+ if (gsi_one_before_end_p (gsi))
+ {
+ stmt = gsi_stmt (gsi);
+ if (stmt == origin_loop.update_stmt)
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree
+ satisfies the POINTER_PLUS_EXPR type. */
+
+static bool
+check_body_mem_ref (tree mem_ref)
+{
+ tree arg0 = TREE_OPERAND (mem_ref , 0);
+ tree arg1 = TREE_OPERAND (mem_ref , 1);
+
+ if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
+ && TREE_CODE (arg1) == INTEGER_CST
+ && tree_to_uhwi (arg1) == 0)
+ {
+ gimple *tmp_g = SSA_NAME_DEF_STMT (arg0);
+ if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR)
+ return true;
+ }
+ return false;
+}
+
+/* Returns true if the rh2 of the current stmt comes from the base in the
+ original loop. */
+
+static bool
+check_body_pointer_plus (gimple *stmt, tree &tmp_index)
+{
+ tree rhs1 = gimple_assign_rhs1 (stmt);
+ tree rhs2 = gimple_assign_rhs2 (stmt);
+ if (TREE_CODE (TREE_TYPE (rhs1)) == POINTER_TYPE)
+ {
+ gimple *g = SSA_NAME_DEF_STMT (rhs2);
+ if (g && gimple_assign_rhs_code (g) == NOP_EXPR)
+ {
+ tree nop_rhs = gimple_assign_rhs1 (g);
+ if (same_ssa_name_var_p (nop_rhs, origin_loop.base))
+ {
+ if (!origin_loop.arr1)
+ {
+ origin_loop.arr1 = rhs1;
+ tmp_index = rhs2;
+ }
+ else if (!origin_loop.arr2)
+ {
+ origin_loop.arr2 = rhs1;
+ if (tmp_index != rhs2)
+ return false;
+ }
+ else
+ return false;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/* Record the array comparison information in the original loop, while ensuring
+ that there are only statements related to cont_stmt in the loop body. */
+
+static bool
+record_origin_loop_body (class loop *loop)
+{
+ basic_block body = gimple_bb (origin_loop.cond_stmt2);
+
+ if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL)
+ return false;
+
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple_set_visited (gsi_stmt (gsi), false);
+ }
+
+ tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2);
+ tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2);
+ if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE)
+ return false;
+
+ auto_vec<tree> stack;
+ tree tmp_index = NULL;
+ stack.safe_push (cond_lhs);
+ stack.safe_push (cond_rhs);
+ gimple_set_visited (origin_loop.cond_stmt2, true);
+
+ while (!stack.is_empty ())
+ {
+ tree op = stack.pop ();
+ gimple *g = SSA_NAME_DEF_STMT (op);
+ if (!g || gimple_bb (g) != body || !is_gimple_assign (g))
+ continue;
+ gimple_set_visited (g, true);
+ if (gimple_assign_rhs_code (g) == MEM_REF)
+ {
+ tree mem_ref = gimple_assign_rhs1 (g);
+ if (!check_body_mem_ref (mem_ref))
+ return false;
+ stack.safe_push (TREE_OPERAND (mem_ref , 0));
+ }
+ else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR)
+ {
+ tree rhs2 = gimple_assign_rhs2 (g);
+ if (!check_body_pointer_plus (g, tmp_index))
+ return false;
+ stack.safe_push (rhs2);
+ }
+ else if (gimple_assign_rhs_code (g) == NOP_EXPR)
+ {
+ tree rhs = gimple_assign_rhs1 (g);
+ if (!same_ssa_name_var_p (rhs, origin_loop.base))
+ return false;
+ stack.safe_push (rhs);
+ }
+ else
+ return false;
+ }
+ bool allvisited = true;
+ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ if (!gimple_visited_p (gsi_stmt (gsi))
+ && !is_gimple_debug (gsi_stmt (gsi)))
+ allvisited = false;
+ }
+ if (allvisited)
+ {
+ if (origin_loop.arr1 != NULL && origin_loop.arr2 != NULL)
+ return true;
+ }
+ return false;
+}
+
+/* Dump the original loop information to see if the origin loop
+ form matches. */
+
+static void
+dump_origin_loop_info ()
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nThe origin loop info:\n");
+ fprintf (dump_file, "\n the origin_loop.limit is:\n");
+ print_node (dump_file, "", origin_loop.limit, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.base is:\n");
+ print_node (dump_file, "", origin_loop.base, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.arr1 is:\n");
+ print_node (dump_file, "", origin_loop.arr1, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.arr2 is:\n");
+ print_node (dump_file, "", origin_loop.arr2, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.cond_stmt1 is:\n");
+ print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.cond_stmt2 is:\n");
+ print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.update_stmt is:\n");
+ print_gimple_stmt (dump_file, origin_loop.update_stmt, 0);
+ fprintf (dump_file, "\n");
+ }
+}
+
+/* Returns true only if the exit bb of the original loop is unique and its phi
+ node parameter comes from the same variable. */
+
+static bool
+check_exit_bb (class loop *loop)
+{
+ if (origin_loop.exit_bb1 != origin_loop.exit_bb2
+ || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1))
+ return false;
+
+ gphi_iterator gsi;
+ for (gsi = gsi_start_phis (origin_loop.exit_bb1); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gphi *phi = gsi.phi ();
+ tree res = gimple_phi_result (phi);
+ if (!same_ssa_name_var_p (res, origin_loop.base))
+ continue;
+ if (gimple_phi_num_args (phi) == 2)
+ {
+ tree arg0 = gimple_phi_arg_def (phi, 0);
+ tree arg1 = gimple_phi_arg_def (phi, 1);
+ if (arg0 == arg1)
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Make sure that the recorded origin_loop information meets the
+ relative requirements. */
+
+static bool
+check_origin_loop_info (class loop *loop)
+{
+ dump_origin_loop_info ();
+ tree arr1_elem_size, arr2_elem_size;
+
+ if (!check_exit_bb (loop))
+ return false;
+
+ if (TREE_CODE (origin_loop.base) != SSA_NAME)
+ return false;
+
+ if (!TYPE_READONLY (TREE_TYPE (origin_loop.limit)))
+ return false;
+
+ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr1))))
+ return false;
+
+ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr2))))
+ return false;
+
+ if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE
+ || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE)
+ return false;
+
+ arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1)));
+ arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2)));
+
+ if (tree_to_uhwi (arr1_elem_size) != 8 || tree_to_uhwi (arr2_elem_size) != 8)
+ return false;
+
+ return true;
+}
+
+/* Record the useful information of the original loop and judge whether the
+ information meets the specified conditions. */
+
+static bool
+check_record_loop_form (class loop *loop)
+{
+ if (!record_origin_loop_exit_info (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop exit information.\n");
+ }
+ return false;
+ }
+
+ if (!record_origin_loop_header (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop header information.\n");
+ }
+ return false;
+ }
+
+ if (!record_origin_loop_latch (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop latch information.\n");
+ }
+ return false;
+ }
+
+ if (!record_origin_loop_body (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop body information.\n");
+ }
+ return false;
+ }
+
+ if (!check_origin_loop_info (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to check origin loop information.\n");
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/* The main entry for judging whether the loop meets some conditions. */
+
+static bool
+determine_loop_form (class loop *loop)
+{
+ /* Currently only standard loops are processed, that is, only loop_header,
+ loop_latch, loop_body 3 bb blocks are included. */
+ if (loop->inner || loop->num_nodes != 3)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, there is inner loop or"
+ "redundant bb.\n");
+ }
+ return false;
+ }
+
+ if (single_exit (loop) || !loop->latch)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch"
+ "does not exist.\n");
+ }
+ return false;
+ }
+
+ /* Support loop with only one backedge. */
+ if (!loop_single_backedge_p (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, loop back edges are not"
+ "unique.\n");
+ }
+ return false;
+ }
+
+ /* Support loop with only one preheader BB. */
+ if (!loop_single_preheader_bb (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, loop preheader bb are not"
+ "unique.\n");
+ }
+ return false;
+ }
+
+ init_origin_loop_structure ();
+ if (!check_record_loop_form (loop))
+ return false;
+
+ return true;
+}
+
+/* Create prolog bb for newly constructed loop; When prolog_assign exists in
+ the original loop, the corresponding assign needs to be added to prolog_bb;
+ eg: <bb 7>
+ len_16 = len_10 + 1
+ Create simple copy statement when prolog_assign does not exist;
+ eg: <bb 7>
+ len_16 = len_10
+
+ The IR of bb is as above. */
+
+static void
+create_prolog_bb (basic_block &prolog_bb, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer, edge entry_edge)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ tree lhs1;
+
+ prolog_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (prolog_bb, outer);
+ redirect_edge_and_branch (entry_edge, prolog_bb);
+ set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb);
+ gsi = gsi_last_bb (prolog_bb);
+ lhs1 = copy_ssa_name (origin_loop.base);
+
+ if (origin_loop.exist_prolog_assgin)
+ g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base,
+ build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step));
+ else
+ g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base);
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ set_current_def (origin_loop.base, lhs1);
+ defs_map.put (prolog_bb, lhs1);
+}
+
+/* Create preheader bb for new loop; In order to ensure the standard form of
+ the loop, add a preheader_bb before loop_header. */
+
+static void
+create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer)
+{
+ loop_pred_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (loop_pred_bb, outer);
+ set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb);
+ defs_map.put (loop_pred_bb, get_current_def (origin_loop.base));
+}
+
+/* Add phi_arg for bb with phi node. */
+
+static void
+rewrite_add_phi_arg (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+ gphi *phi;
+ gphi_iterator gsi;
+ tree res;
+ location_t loc;
+
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ phi = gsi.phi ();
+ res = gimple_phi_result (phi);
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ if (PHI_ARG_DEF_FROM_EDGE (phi, e))
+ continue;
+ tree var = *(defs_map.get (e->src));
+ if (!same_ssa_name_var_p (var, res))
+ continue;
+ if (virtual_operand_p (var))
+ loc = UNKNOWN_LOCATION;
+ else
+ loc = gimple_location (SSA_NAME_DEF_STMT (var));
+ add_phi_arg (phi, var, e, loc);
+ }
+ }
+}
+
+/* Create loop_header BB for align_loop.
+ eg: <bb 9>
+ _18 = (long unsigned int) len_17;
+ _19 = _18 + 8;
+ _20 = (long unsigned int) len_limit_12 (D);
+ if (_19 <= _20)
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_header (basic_block &align_loop_header, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gcond *cond_stmt;
+ gphi *phi;
+ tree res;
+
+ tree entry_node = get_current_def (origin_loop.base);
+ align_loop_header = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_header, outer);
+ make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb);
+ gsi = gsi_last_bb (align_loop_header);
+ phi = create_phi_node (NULL_TREE, align_loop_header);
+ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi));
+ res = gimple_phi_result (phi);
+
+ tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res);
+ tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1,
+ build_int_cst (TREE_TYPE (lhs1), 8));
+ tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node,
+ origin_loop.limit);
+ cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+
+ set_current_def (origin_loop.base, res);
+ defs_map.put (align_loop_header, res);
+}
+
+/* Create loop body BB for align_loop.
+ eg: <bb 10>
+ _21 = (sizetype) len_17;
+ _22 = cur_15 (D) + _21;
+ _23 = MEM[(long unsigned int *)_22];
+ _24 = pb_13 (D) + _21;
+ _25 = MEM[(long unsigned int *)_24];
+ if (_23 != _25)
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_body_bb (basic_block &align_loop_body_bb,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ gcond *cond_stmt;
+ tree lhs1, lhs2;
+
+ align_loop_body_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_body_bb, outer);
+ make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb);
+ gsi = gsi_last_bb (align_loop_body_bb);
+
+ tree var = gimple_build (&stmts, NOP_EXPR, sizetype,
+ get_current_def (origin_loop.base));
+ lhs1 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2),
+ origin_loop.arr2, var);
+ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node),
+ fold_build2 (MEM_REF, long_unsigned_type_node, lhs1,
+ build_int_cst (build_pointer_type (long_unsigned_type_node), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs1 = gimple_assign_lhs (g);
+ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1),
+ origin_loop.arr1, var);
+ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node),
+ fold_build2 (MEM_REF, long_unsigned_type_node, lhs2,
+ build_int_cst (build_pointer_type (long_unsigned_type_node), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs2 = gimple_assign_lhs (g);
+ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2),
+ lhs1, lhs2, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+}
+
+/* Create loop_latch BB for align_loop.
+ eg: <bb 11>
+ len_26 = len_17 + 8;
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ tree res;
+
+ tree entry_node = get_current_def (origin_loop.base);
+ align_loop_latch = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_latch, outer);
+ make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb);
+ gsi = gsi_last_bb (align_loop_latch);
+ res = copy_ssa_name (entry_node);
+ g = gimple_build_assign (res, PLUS_EXPR, entry_node,
+ build_int_cst (TREE_TYPE (entry_node), 8));
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (align_loop_latch, res);
+}
+
+/* Create a new loop and add it to outer_loop and return. */
+
+static class loop *
+init_new_loop (class loop *outer_loop, basic_block header, basic_block latch)
+{
+ class loop *new_loop;
+ new_loop = alloc_loop ();
+ new_loop->header = header;
+ new_loop->latch = latch;
+ add_loop (new_loop, outer_loop);
+
+ return new_loop;
+}
+
+/* Create necessary exit BB for align_loop.
+ eg: <bb 12>
+ _27 = _23 ^ _25;
+ _28 = __builtin_ctzll (_27);
+ _29 = _28 >> 3;
+ len_30 = _29 + len_17;
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_exit_bb (basic_block &align_loop_exit_bb,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ gimple *cond_stmt;
+ tree lhs1, lhs2;
+ tree cond_lhs, cond_rhs;
+ gcall *build_ctzll;
+
+ tree entry_node = get_current_def (origin_loop.base);
+ align_loop_exit_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_exit_bb, outer);
+ make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb);
+ gsi = gsi_last_bb (align_loop_exit_bb);
+
+ cond_stmt = gsi_stmt (gsi_last_bb (after_bb));
+ cond_lhs = gimple_cond_lhs (cond_stmt);
+ cond_rhs = gimple_cond_rhs (cond_stmt);
+
+ lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs,
+ cond_rhs);
+ build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1,
+ lhs1);
+ lhs1 = make_ssa_name (integer_type_node);
+ gimple_call_set_lhs (build_ctzll, lhs1);
+ gimple_seq_add_stmt (&stmts, build_ctzll);
+ lhs2 = copy_ssa_name (lhs1);
+ g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1,
+ build_int_cst (TREE_TYPE (lhs1), 3));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2);
+ lhs2 = copy_ssa_name (entry_node);
+ g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node);
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (align_loop_exit_bb, lhs2);
+}
+
+/* Create loop_header BB for epilogue_loop.
+ eg: <bb 14>
+ # len_31 = PHI <len_17 (13), len_37 (16)>
+ if (len_31 != len_limit_12 (D))
+
+ The IR of bb is as above. */
+
+static void
+create_epilogue_loop_header (basic_block &epilogue_loop_header,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gcond *cond_stmt;
+ tree res;
+ gphi *phi;
+
+ tree entry_node = get_current_def (origin_loop.base);
+ epilogue_loop_header = create_empty_bb (after_bb);
+ add_bb_to_loop (epilogue_loop_header, outer);
+ make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb);
+ gsi = gsi_last_bb (epilogue_loop_header);
+ phi = create_phi_node (NULL_TREE, epilogue_loop_header);
+ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi));
+ res = gimple_phi_result (phi);
+ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res,
+ origin_loop.limit, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+
+ set_current_def (origin_loop.base, res);
+ defs_map.put (epilogue_loop_header, res);
+}
+
+/* Create loop body BB for epilogue_loop.
+ eg: <bb 15>
+ _32 = (sizetype) len_31;
+ _33 = pb_13 (D) + _32;
+ _34 = *_33;
+ _35 = cur_15 (D) + _32;
+ _36 = *_35;
+ if (_34 != _36)
+
+ The IR of bb is as above. */
+
+static void
+create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ gcond *cond_stmt;
+ tree lhs1, lhs2, lhs3;
+
+ tree entry_node = get_current_def (origin_loop.base);
+ epilogue_loop_body_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (epilogue_loop_body_bb, outer);
+ make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb);
+ gsi = gsi_last_bb (epilogue_loop_body_bb);
+ lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node);
+ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1),
+ origin_loop.arr1, lhs1);
+ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
+ fold_build2 (MEM_REF, unsigned_char_type_node, lhs2,
+ build_int_cst (TREE_TYPE (lhs2), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs2 = gimple_assign_lhs (g);
+ lhs3 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2),
+ origin_loop.arr2, lhs1);
+ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
+ fold_build2 (MEM_REF, unsigned_char_type_node, lhs3,
+ build_int_cst (TREE_TYPE (lhs3), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs3 = gimple_assign_lhs (g);
+ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), lhs2,
+ lhs3, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (epilogue_loop_body_bb, get_current_def (origin_loop.base));
+}
+
+/* Create loop_latch BB for epilogue_loop.
+ eg: <bb 16>
+ len_37 = len_31 + 1;
+
+ The IR of bb is as above. */
+
+static void
+create_epilogue_loop_latch (basic_block &epilogue_loop_latch,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ tree res;
+
+ tree entry_node = get_current_def (origin_loop.base);
+ epilogue_loop_latch = create_empty_bb (after_bb);
+ add_bb_to_loop (epilogue_loop_latch, outer);
+ make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb);
+ gsi = gsi_last_bb (epilogue_loop_latch);
+ res = copy_ssa_name (entry_node);
+ g = gimple_build_assign (res, PLUS_EXPR, entry_node,
+ build_int_cst (TREE_TYPE (entry_node), origin_loop.step));
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (epilogue_loop_latch, res);
+}
+
+/* convert_to_new_loop
+ | |
+ | |
+ | | entry_edge
+ | ______ |
+ | / V V
+ | | -----origin_loop_header---
+ | | | |
+ | | -------------------------\
+ | | | \
+ | | V \___ ___ ___ ___ ___ ___ ___
+ | | -----origin_loop_body----- |
+ | | | | |
+ | | -------------------------\ |
+ | | | \___ ___ ___ ___ |
+ | | V V V
+ | | -----origin_loop_latch---- -----exit_bb------
+ | | | | | |
+ | | /-------------------------- ------------------
+ | \ __ /
+ |
+ | |
+ | ====> |entry_edge
+ | V
+ | -------prolog_bb-----
+ | | |
+ | ---------------------
+ | |
+ | V
+ | -----align_loop_header----
+ | /-----------------> | |
+ |/ --------------------------
+ || / \
+ || V V
+ || ---align_loop_body--- ---epilogue_loop_header--
+ || | | -------| |<---|
+ || --------------------\ / ------------------------- |
+ || | \____ | | |
+ || V | | V |
+ || ---align_loop_latch--- | | ---epilogue_loop_body---- |
+ || | | | | ----| | |
+ || ---------------------- | | / ------------------------- |
+ || / __________/ | | | |
+ || / | | | V |
+ | \ __________/ | | | ---epilogue_loop_latch--- |
+ | | | | | | |
+ | | | | ------------------------- /
+ | V | | | /
+ | -align_loop_exit_bb- | | \______________/
+ | | | | |
+ | -------------------- | |
+ | | | |
+ | | V V
+ | | -----exit_bb------
+ | |---->| |
+ | ------------------
+
+ The origin_loop conversion process starts from entry_edge and ends at
+ exit_bb; The execution logic of origin_loop is completely replaced by
+ align_loop + epilogue_loop:
+ 1) align_loop mainly implements the idea of ​​using wide-type dereference
+ and comparison on array elements, so as to achieve the effect of
+ acceleration; For the corresponding source code understanding, please
+ refer to the description of the pass at the beginning;
+ 2) epilogue_loop processes the previous loop remaining array element
+ comparison. */
+
+static void
+create_new_loops (edge entry_edge)
+{
+ basic_block prolog_bb;
+ basic_block align_loop_header, align_loop_latch, align_loop_body_bb;
+ basic_block align_pred_bb, align_loop_exit_bb;
+ basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb;
+ basic_block epilogue_loop_pred_bb;
+ class loop *align_loop;
+ class loop *epilogue_loop;
+
+ class loop *outer = entry_edge->src->loop_father;
+
+ create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer,
+ entry_edge);
+
+ create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer);
+ make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU);
+
+ create_align_loop_header (align_loop_header, align_pred_bb,
+ align_pred_bb, outer);
+
+ create_align_loop_body_bb (align_loop_body_bb, align_loop_header,
+ align_loop_header, outer);
+
+ create_align_loop_latch (align_loop_latch, align_loop_body_bb,
+ align_loop_body_bb, outer);
+ make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU);
+ rewrite_add_phi_arg (align_loop_header);
+
+ align_loop = init_new_loop (outer, align_loop_header, align_loop_latch);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num);
+ flow_loop_dump (align_loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
+
+ create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb,
+ align_loop_body_bb, outer);
+
+ create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header,
+ align_loop_header, outer);
+ make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE);
+
+ create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb,
+ epilogue_loop_pred_bb, outer);
+
+ create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header,
+ epilogue_loop_header, outer);
+
+ create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb,
+ epilogue_loop_body_bb, outer);
+ make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header,
+ EDGE_FALLTHRU);
+ rewrite_add_phi_arg (epilogue_loop_header);
+
+ epilogue_loop = init_new_loop (outer, epilogue_loop_header,
+ epilogue_loop_latch);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num);
+ flow_loop_dump (epilogue_loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
+ make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1,
+ EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1,
+ entry_edge->src);
+ make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE);
+
+ make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2,
+ entry_edge->src);
+
+ rewrite_add_phi_arg (origin_loop.exit_bb1);
+ rewrite_add_phi_arg (origin_loop.exit_bb2);
+
+ remove_edge (origin_loop.exit_e1);
+ remove_edge (origin_loop.exit_e2);
+}
+
+/* Make sure that the dominance relationship of the newly inserted cfg
+ is not missing. */
+
+static void
+update_loop_dominator (cdi_direction dir)
+{
+ gcc_assert (dom_info_available_p (dir));
+
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ basic_block imm_bb = get_immediate_dominator (dir, bb);
+ if (!imm_bb || bb == origin_loop.exit_bb1)
+ {
+ set_immediate_dominator (CDI_DOMINATORS, bb,
+ recompute_dominator (CDI_DOMINATORS, bb));
+ continue;
+ }
+ }
+}
+
+/* Clear information about the original loop. */
+
+static void
+remove_origin_loop (class loop *loop)
+{
+ basic_block *body;
+
+ body = get_loop_body_in_dom_order (loop);
+ unsigned n = loop->num_nodes;
+ for (unsigned i = 0; i < n; i++)
+ {
+ delete_basic_block (body[i]);
+ }
+ free (body);
+ delete_loop (loop);
+}
+
+/* Perform the conversion of origin_loop to new_loop. */
+
+static void
+convert_to_new_loop (class loop *loop)
+{
+ create_new_loops (origin_loop.entry_edge);
+ remove_origin_loop (loop);
+ update_loop_dominator (CDI_DOMINATORS);
+ update_ssa (TODO_update_ssa);
+}
+
+/* The main entry of array-widen-compare optimizes. */
+
+static unsigned int
+tree_ssa_array_widen_compare ()
+{
+ unsigned int todo = 0;
+ class loop *loop;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ flow_loops_dump (dump_file, NULL, 1);
+ fprintf (dump_file, "\nConfirm which loop can be optimized using"
+ " array-widen-compare\n");
+ }
+
+ enum li_flags LI = LI_FROM_INNERMOST;
+ for (auto loop : loops_list (cfun, LI))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "======================================\n");
+ fprintf (dump_file, "Processing loop %d:\n", loop->num);
+ fprintf (dump_file, "======================================\n");
+ flow_loop_dump (loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
+
+ if (determine_loop_form (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "The %dth loop form is success matched,"
+ "and the loop can be optimized.\n",
+ loop->num);
+ dump_loop_bb (loop);
+ }
+
+ convert_to_new_loop (loop);
+ }
+ }
+
+ todo |= (TODO_update_ssa);
+ return todo;
+}
+
+/* Array widen compare. */
+
+namespace {
+
+const pass_data pass_data_tree_array_widen_compare =
+{
+ GIMPLE_PASS,
+ "awiden_compare",
+ OPTGROUP_LOOP,
+ TV_TREE_ARRAY_WIDEN_COMPARE,
+ (PROP_cfg | PROP_ssa),
+ 0,
+ 0,
+ 0,
+ (TODO_update_ssa | TODO_verify_all)
+};
+
+class pass_array_widen_compare : public gimple_opt_pass
+{
+public:
+ pass_array_widen_compare (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *);
+ virtual unsigned int execute (function *);
+
+}; // class pass_array_widen_compare
+
+bool
+pass_array_widen_compare::gate (function *)
+{
+ return (flag_array_widen_compare > 0 && optimize >= 3);
+}
+
+unsigned int
+pass_array_widen_compare::execute (function *fun)
+{
+ if (number_of_loops (fun) <= 1)
+ return 0;
+
+ /* Only supports LP64 data mode. */
+ if (TYPE_PRECISION (long_integer_type_node) != 64
+ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "The current data mode is not supported,"
+ "only the LP64 date mode is supported.\n");
+ return 0;
+ }
+
+ return tree_ssa_array_widen_compare ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_array_widen_compare (gcc::context *ctxt)
+{
+ return new pass_array_widen_compare (ctxt);
+}
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化