加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0042-Add-split-complex-instructions-pass.patch 38.90 KB
一键复制 编辑 原始数据 按行查看 历史
郑晨卉 提交于 2024-04-11 10:45 . [Sync] Sync patch from openeuler/gcc
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245
From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001
From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com>
Date: Mon, 21 Aug 2023 12:35:19 +0300
Subject: [PATCH 09/18] Add split-complex-instructions pass
- Add option -fsplit-ldp-stp
- Add functionality to detect and split depended from store LDP instructions.
- Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range
- Add RTL tests
Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com>
Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
---
gcc/common.opt | 5 +
gcc/config/aarch64/aarch64.cc | 42 ++
gcc/doc/tm.texi | 8 +
gcc/doc/tm.texi.in | 4 +
gcc/params.opt | 3 +
gcc/passes.def | 1 +
gcc/sched-rgn.cc | 704 +++++++++++++++++-
gcc/target.def | 10 +
.../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 74 ++
.../rtl/aarch64/test-ldp-split-rearrange.c | 40 +
.../gcc.dg/rtl/aarch64/test-ldp-split.c | 174 +++++
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
13 files changed, 1066 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
diff --git a/gcc/common.opt b/gcc/common.opt
index a42bee250..c0e3f5687 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1797,6 +1797,11 @@ floop-nest-optimize
Common Var(flag_loop_nest_optimize) Optimization
Enable the loop nest optimizer.
+fsplit-ldp-stp
+Common Var(flag_split_ldp_stp) Optimization
+Split load/store pair instructions into separate load/store operations
+for better performance.
+
fstrict-volatile-bitfields
Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization
Force bitfield accesses to match their type width.
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 04072ca25..48e2eded0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void)
#endif /* #if CHECKING_P */
+/* TODO: refuse to use ranges intead of full list of an instruction codes. */
+
+bool
+is_aarch64_ldp_insn (int icode)
+{
+ if ((icode >= CODE_FOR_load_pair_sw_sisi
+ && icode <= CODE_FOR_load_pair_dw_tftf)
+ || (icode >= CODE_FOR_loadwb_pairsi_si
+ && icode <= CODE_FOR_loadwb_pairtf_di)
+ || (icode >= CODE_FOR_load_pairv8qiv8qi
+ && icode <= CODE_FOR_load_pairdfdf)
+ || (icode >= CODE_FOR_load_pairv16qiv16qi
+ && icode <= CODE_FOR_load_pairv8bfv2df)
+ || (icode >= CODE_FOR_load_pair_lanesv8qi
+ && icode <= CODE_FOR_load_pair_lanesdf))
+ return true;
+ return false;
+}
+
+bool
+is_aarch64_stp_insn (int icode)
+{
+ if ((icode >= CODE_FOR_store_pair_sw_sisi
+ && icode <= CODE_FOR_store_pair_dw_tftf)
+ || (icode >= CODE_FOR_storewb_pairsi_si
+ && icode <= CODE_FOR_storewb_pairtf_di)
+ || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
+ && icode <= CODE_FOR_vec_store_pairdfdf)
+ || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
+ && icode <= CODE_FOR_vec_store_pairv8bfv2df)
+ || (icode >= CODE_FOR_store_pair_lanesv8qi
+ && icode <= CODE_FOR_store_pair_lanesdf))
+ return true;
+ return false;
+}
+
+#undef TARGET_IS_LDP_INSN
+#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
+
+#undef TARGET_IS_STP_INSN
+#define TARGET_IS_STP_INSN is_aarch64_stp_insn
+
#undef TARGET_STACK_PROTECT_GUARD
#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c5006afc0..0c6415a9c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
+Return true if icode is corresponding to any of the LDP instruction types.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
+Return true if icode is corresponding to any of the STP instruction types.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
This target hook returns @code{true} past the point in which new jump
instructions could be created. On machines that require a register for
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f869ddd5e..6ff60e562 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@hook TARGET_IS_LDP_INSN
+
+@hook TARGET_IS_STP_INSN
+
@hook TARGET_CANNOT_MODIFY_JUMPS_P
@hook TARGET_HAVE_CONDITIONAL_EXECUTION
diff --git a/gcc/params.opt b/gcc/params.opt
index 7fcc2398d..6176d4790 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
Target size of compressed pointer, which should be 8, 16 or 32.
+-param=param-ldp-dependency-search-range=
+Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization
+Range for depended ldp search in split-ldp-stp path.
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/passes.def b/gcc/passes.def
index 941bbadf0..a30e05688 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_reorder_blocks);
NEXT_PASS (pass_leaf_regs);
NEXT_PASS (pass_split_before_sched2);
+ NEXT_PASS (pass_split_complex_instructions);
NEXT_PASS (pass_sched2);
NEXT_PASS (pass_stack_regs);
PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
index a0dfdb788..b4df8bdc5 100644
--- a/gcc/sched-rgn.cc
+++ b/gcc/sched-rgn.cc
@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3. If not see
are actually scheduled. */
#include "config.h"
+#define INCLUDE_SET
+#define INCLUDE_VECTOR
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3. If not see
#include "dbgcnt.h"
#include "pretty-print.h"
#include "print-rtl.h"
+#include "cfgrtl.h"
/* Disable warnings about quoting issues in the pp_xxx calls below
that (intentionally) don't follow GCC diagnostic conventions. */
@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt)
return new pass_sched_fusion (ctxt);
}
+namespace {
+
+/* Def-use analisys special functions implementation. */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg)
+{
+ df_ref use;
+ struct df_link *ref_chain, *ref_link;
+
+ FOR_EACH_INSN_USE (use, insn)
+ {
+ if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+ return NULL;
+ if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+ break;
+ }
+
+ gcc_assert (use != NULL);
+
+ ref_chain = DF_REF_CHAIN (use);
+
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ /* Problem getting some definition for this instruction. */
+ if (ref_link->ref == NULL)
+ return NULL;
+ if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+ return NULL;
+ /* As global regs are assumed to be defined at each function call
+ dataflow can report a call_insn as being a definition of REG.
+ But we can't do anything with that in this pass so proceed only
+ if the instruction really sets REG in a way that can be deduced
+ from the RTL structure. */
+ if (global_regs[REGNO (reg)]
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+ return NULL;
+ }
+
+ return ref_chain;
+}
+
+static struct df_link *
+get_uses (rtx_insn *insn, rtx reg)
+{
+ df_ref def;
+ struct df_link *ref_chain, *ref_link;
+
+ FOR_EACH_INSN_DEF (def, insn)
+ if (REGNO (DF_REF_REG (def)) == REGNO (reg))
+ break;
+
+ gcc_assert (def != NULL && "Broken def-use analisys chain.");
+
+ ref_chain = DF_REF_CHAIN (def);
+
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ /* Problem getting some use for this instruction. */
+ if (ref_link->ref == NULL)
+ return NULL;
+ }
+
+ return ref_chain;
+}
+
+const pass_data pass_data_split_complex_instructions = {
+ RTL_PASS, /* Type. */
+ "split_complex_instructions", /* Name. */
+ OPTGROUP_NONE, /* Optinfo_flags. */
+ TV_SPLIT_CMP_INS, /* Tv_id. */
+ 0, /* Properties_required. */
+ 0, /* Properties_provided. */
+ 0, /* Properties_destroyed. */
+ 0, /* Todo_flags_start. */
+ (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */
+};
+
+class pass_split_complex_instructions : public rtl_opt_pass
+{
+private:
+ enum complex_instructions_t
+ {
+ UNDEFINED,
+ LDP,
+ LDP_TI,
+ STP,
+ STR
+ };
+
+ void split_complex_insn (rtx_insn *insn);
+ void split_ldp_ti (rtx_insn *insn);
+ void split_ldp_with_offset (rtx_insn *ldp_insn);
+ void split_simple_ldp (rtx_insn *ldp_insn);
+ void split_ldp_stp (rtx_insn *insn);
+ complex_instructions_t get_insn_type (rtx_insn *insn);
+
+ basic_block bb;
+ rtx_insn *insn;
+ std::set<rtx_insn *> dependent_stores_candidates;
+ std::set<rtx_insn *> ldp_to_split_list;
+
+ complex_instructions_t complex_insn_type = UNDEFINED;
+ bool is_store_insn (rtx_insn *insn);
+ bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb);
+ bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb,
+ rtx_insn *search_insn,
+ int search_range
+ = param_ldp_dependency_search_range);
+ bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn);
+ void init_df ();
+ void find_dependent_stores_candidates (rtx_insn *ldp_insn);
+ int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type,
+ int *arith_operation_ptr = NULL);
+
+public:
+ pass_split_complex_instructions (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_split_complex_instructions, ctxt)
+ {
+ }
+ /* opt_pass methods: */
+ virtual bool gate (function *);
+
+ virtual unsigned int
+ execute (function *)
+ {
+ enum rtx_code ldp_memref_code;
+ init_df ();
+ ldp_to_split_list.clear ();
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ complex_instructions_t insn_type = get_insn_type (insn);
+ /* TODO: Add splitting of STP instructions. */
+ if (insn_type != LDP && insn_type != LDP_TI)
+ continue;
+ /* TODO: Currently support only ldp_ti and ldp with REG or
+ PLUS/MINUS offset expression. */
+ if (insn_type == LDP_TI)
+ {
+ ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1),
+ 0));
+ if (ldp_memref_code != REG && ldp_memref_code != PLUS
+ && ldp_memref_code != MINUS)
+ continue;
+ }
+ if (is_ldp_dependent_on_store (insn, bb))
+ {
+ ldp_to_split_list.insert (insn);
+ }
+ }
+ }
+
+ for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin ();
+ i != ldp_to_split_list.end (); ++i)
+ split_complex_insn (*i);
+
+ return 0;
+ }
+}; // class pass_split_complex_instructions
+
+bool
+pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn,
+ basic_block bb)
+{
+ find_dependent_stores_candidates (ldp_insn);
+ return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn);
+}
+
+bool
+pass_split_complex_instructions::bfs_for_reg_dependent_store (
+ rtx_insn *ldp_insn, basic_block search_bb, rtx_insn *search_insn,
+ int search_range)
+{
+ rtx_insn *current_search_insn = search_insn;
+
+ for (int i = search_range; i > 0; --i)
+ {
+ if (!current_search_insn)
+ return false;
+ bool checking_result
+ = is_store_reg_dependent (ldp_insn, current_search_insn);
+ if (checking_result)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "LDP to split:\n");
+ print_rtl_single (dump_file, ldp_insn);
+ fprintf (dump_file, "Found STR:\n");
+ print_rtl_single (dump_file, current_search_insn);
+ }
+ return true;
+ }
+ if (current_search_insn == BB_HEAD (search_bb))
+ {
+ /* Search in all parent BBs for the reg_dependent store. */
+ edge_iterator ei;
+ edge e;
+
+ FOR_EACH_EDGE (e, ei, search_bb->preds)
+ if (e->src->index != 0
+ && bfs_for_reg_dependent_store (ldp_insn, e->src,
+ BB_END (e->src), i - 1))
+ return true;
+ return false;
+ }
+ else
+ {
+ if (!active_insn_p (current_search_insn))
+ i++;
+ current_search_insn = PREV_INSN (current_search_insn);
+ }
+ }
+ return false;
+}
+
+void
+pass_split_complex_instructions::init_df ()
+{
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
+ df_mir_add_problem ();
+ df_live_add_problem ();
+ df_live_set_all_dirty ();
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+}
+
+void
+pass_split_complex_instructions::find_dependent_stores_candidates (
+ rtx_insn *ldp_insn)
+{
+ dependent_stores_candidates.clear ();
+ df_ref use;
+
+ FOR_EACH_INSN_USE (use, ldp_insn)
+ {
+ df_link *defs = get_defs (ldp_insn, DF_REF_REG (use));
+ if (!defs)
+ return;
+
+ for (df_link *def = defs; def; def = def->next)
+ {
+ df_link *uses
+ = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref));
+ if (!uses)
+ continue;
+
+ for (df_link *use = uses; use; use = use->next)
+ {
+ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR
+ && is_store_insn (DF_REF_INSN (use->ref)))
+ dependent_stores_candidates.insert (DF_REF_INSN (use->ref));
+ }
+ }
+ }
+}
+
+bool
+pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn,
+ rtx_insn *str_insn)
+{
+ if (!is_store_insn (str_insn)
+ || dependent_stores_candidates.find (str_insn)
+ == dependent_stores_candidates.end ())
+ return false;
+
+ int ldp_offset_sign = UNDEFINED;
+ int ldp_offset
+ = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign);
+ if (ldp_offset_sign == MINUS)
+ ldp_offset = -ldp_offset;
+
+ int str_offset_sign = UNDEFINED;
+ int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign);
+ if (str_offset_sign == MINUS)
+ str_offset = -str_offset;
+
+ if (str_offset == ldp_offset || str_offset == ldp_offset + 8)
+ return true;
+
+ return false;
+}
+
+bool
+pass_split_complex_instructions::is_store_insn (rtx_insn *insn)
+{
+ if (!insn)
+ return false;
+ rtx sset_b = single_set (insn);
+ /* TODO: The condition below allow to take only store instructions in which
+ the memory location's operand is either a register (base) or an plus/minus
+ operation (base + #imm). So it might make sense to add support for other
+ cases (e.g. multiply and shift). */
+ if (sset_b && MEM_P (SET_DEST (sset_b))
+ && GET_MODE (XEXP (sset_b, 0)) != BLKmode
+ && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG
+ || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS
+ || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS)
+ && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT)))
+ return true;
+
+ return false;
+}
+
+int
+pass_split_complex_instructions::get_insn_offset (
+ rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr)
+{
+ rtx insn_pat = PATTERN (insn);
+ int returned_offset = 0;
+
+ rtx offset_expr = NULL;
+ rtx offset_value_expr = NULL;
+
+ switch (insn_type)
+ {
+ case LDP:
+ {
+ int number_of_sub_insns = XVECLEN (insn_pat, 0);
+
+ /* Calculate it's own ofsset of first load insn. */
+ rtx_insn *first_load_insn = NULL;
+ if (number_of_sub_insns == 2)
+ {
+ first_load_insn
+ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
+ arith_operation_ptr = NULL;
+
+ offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0);
+ if (GET_CODE (offset_expr) == PLUS
+ || GET_CODE (offset_expr) == MINUS)
+ offset_value_expr
+ = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1);
+ else
+ offset_expr = NULL;
+ }
+ else if (number_of_sub_insns == 3)
+ {
+ rtx_insn *offset_sub_insn
+ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
+
+ offset_expr = XEXP (PATTERN (offset_sub_insn), 1);
+ offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1);
+ }
+ else
+ {
+ gcc_assert (false
+ && "Wrong number of elements in the ldp_insn vector");
+ }
+ break;
+ }
+ case LDP_TI:
+ {
+ offset_expr = XEXP (XEXP (insn_pat, 1), 0);
+ if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS)
+ return 0;
+ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1);
+ break;
+ }
+ case STR:
+ {
+ offset_expr = XEXP (XEXP (insn_pat, 0), 0);
+ /* If memory location is specified by single base register then the
+ offset is zero. */
+ if (GET_CODE (offset_expr) == REG)
+ return 0;
+ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1);
+ break;
+ }
+ default:
+ {
+ if (dumps_are_enabled && dump_file)
+ {
+ fprintf (dump_file, "Instruction that was tried to split:\n");
+ print_rtl_single (dump_file, insn);
+ }
+ gcc_assert (false && "Unsupported instruction type");
+ break;
+ }
+ }
+
+ if (offset_expr != NULL && offset_value_expr
+ && GET_CODE (offset_value_expr) == CONST_INT)
+ returned_offset = XINT (offset_value_expr, 0);
+
+ if (arith_operation_ptr != NULL)
+ {
+ *arith_operation_ptr = GET_CODE (offset_expr);
+ gcc_assert ((*arith_operation_ptr == MINUS
+ || *arith_operation_ptr == PLUS)
+ && "Unexpected arithmetic operation in the offset expr");
+ }
+
+ return returned_offset;
+}
+
+void
+pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn)
+{
+ rtx pat = PATTERN (ldp_insn);
+
+ rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0)));
+ rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1)));
+
+ int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1)));
+ int src_regno;
+
+ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0);
+
+ if (GET_CODE (srs_reg_insn) == REG)
+ src_regno = REGNO (srs_reg_insn);
+ else
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
+
+ rtx_insn *emited_insn_1, *emited_insn_2;
+
+ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
+ if (src_regno == dest_regno)
+ std::swap (mem_insn_1, mem_insn_2);
+
+ emited_insn_1 = emit_insn (PATTERN (mem_insn_1));
+ emited_insn_2 = emit_insn (PATTERN (mem_insn_2));
+
+ int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0);
+ int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0);
+
+ INSN_CODE (emited_insn_1) = sub_insn_1_code;
+ INSN_CODE (emited_insn_2) = sub_insn_2_code;
+}
+
+void
+pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn)
+{
+ rtx pat = PATTERN (ldp_insn);
+ bool post_index = true;
+
+ rtx_insn offset_insn;
+ rtx_insn mem_insn_1;
+ rtx_insn mem_insn_2;
+
+ int offset_insn_code;
+ int mem_insn_1_code = -1;
+ int mem_insn_2_code = -1;
+
+ int offset = 0;
+ int arith_operation = UNDEFINED;
+
+ for (int i = 0; i < 3; i++)
+ {
+ rtx sub_insn = XVECEXP (pat, 0, i);
+ rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn));
+ int sub_insn_code
+ = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0);
+
+ /* If sub_insn is offset related. */
+ if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY)
+ {
+ offset_insn = *copy_of_sub_insn;
+ offset_insn_code = sub_insn_code;
+ gcc_assert (i == 0
+ && "Offset related insn must be the first "
+ "element of a parallel insn vector");
+
+ offset = get_insn_offset (ldp_insn, LDP, &arith_operation);
+ }
+ else
+ {
+ if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG)
+ {
+ rtx &offset_expr
+ = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1);
+ if (GET_CODE (offset_expr) == CONST_INT)
+ {
+ int local_offset = XINT (offset_expr, 0);
+ offset = (arith_operation == PLUS ? offset : -offset);
+
+ offset_expr = GEN_INT (local_offset + offset);
+
+ gcc_assert (
+ (arith_operation == MINUS || arith_operation == PLUS)
+ && "Unexpected arithmetic operation in offset related "
+ "sub_insn");
+
+ if (i == 1)
+ post_index = false;
+ }
+ else
+ {
+ post_index = true;
+ }
+ }
+ }
+ if (i == 1)
+ {
+ mem_insn_1 = *copy_of_sub_insn;
+ mem_insn_1_code = sub_insn_code;
+ }
+ if (i == 2)
+ {
+ mem_insn_2 = *copy_of_sub_insn;
+ mem_insn_2_code = sub_insn_code;
+ }
+ }
+ gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1
+ && "Uninitialized memory insns");
+
+ int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1)));
+ int src_regno;
+
+ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0);
+
+ if (GET_CODE (srs_reg_insn) == REG)
+ src_regno = REGNO (srs_reg_insn);
+ else
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
+
+ /* Don't split such weird LDP. */
+ if (src_regno == dest_regno)
+ return;
+
+ rtx_insn *emited_offset_insn;
+ if (!post_index)
+ {
+ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
+ INSN_CODE (emited_offset_insn) = offset_insn_code;
+ }
+
+ rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1));
+ rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2));
+
+
+ INSN_CODE (emited_insn_1) = mem_insn_1_code;
+ INSN_CODE (emited_insn_2) = mem_insn_2_code;
+
+ if (post_index)
+ {
+ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
+ INSN_CODE (emited_offset_insn) = offset_insn_code;
+ }
+}
+
+void
+pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn)
+{
+ rtx_insn *prev_insn = PREV_INSN (insn);
+ int number_of_sub_insns = XVECLEN (PATTERN (insn), 0);
+
+ start_sequence ();
+
+ if (number_of_sub_insns == 2)
+ split_simple_ldp (insn);
+ else if (number_of_sub_insns == 3)
+ split_ldp_with_offset (insn);
+ else
+ gcc_assert (false && "Broken complex insn vector");
+
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
+
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
+ delete_insn_and_edges (insn);
+}
+
+void
+pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn)
+{
+ rtx_insn *prev_insn = PREV_INSN (insn);
+ rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn)));
+ rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn)));
+
+ rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0);
+ rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1);
+ rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1);
+
+ PUT_MODE (mem_insn_1, DImode);
+ PUT_MODE (mem_insn_2, DImode);
+
+ int reg_no_1 = REGNO (reg_insn_1);
+
+ XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1);
+ XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1);
+
+ rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0);
+ if (GET_CODE (load_insn_2_plus_expr) == REG)
+ {
+ XEXP (XEXP (PATTERN (load_insn_2), 1), 0)
+ = gen_rtx_PLUS (DImode,
+ gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)),
+ GEN_INT (GET_MODE_SIZE (DImode)));
+ }
+ else
+ {
+ rtx load_insn_2_offset_expr
+ = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1);
+
+ if (load_insn_2_offset_expr == NULL)
+ return;
+
+ if (GET_CODE (load_insn_2_offset_expr) == CONST_INT)
+ {
+ int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0);
+ XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1)
+ = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode));
+ }
+ }
+
+ start_sequence ();
+
+ int src_regno;
+ rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0);
+
+ if (GET_CODE (srs_reg_insn) == REG)
+ src_regno = REGNO (srs_reg_insn);
+ else
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
+
+ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
+ if (src_regno == reg_no_1)
+ std::swap (load_insn_1, load_insn_2);
+
+ rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1));
+ rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2));
+
+ INSN_CODE (emited_load_insn_1)
+ = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0);
+ INSN_CODE (emited_load_insn_2)
+ = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0);
+
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
+
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
+ delete_insn_and_edges (insn);
+}
+
+void
+pass_split_complex_instructions::split_complex_insn (rtx_insn *insn)
+{
+ complex_instructions_t insn_type = get_insn_type (insn);
+ /* TODO: Add splitting of STP instructions. */
+ if (insn_type == LDP || insn_type == STP)
+ split_ldp_stp (insn);
+ else if (insn_type == LDP_TI)
+ split_ldp_ti (insn);
+ else
+ gcc_assert (false && "Unsupported type of insn to split");
+}
+
+pass_split_complex_instructions::complex_instructions_t
+pass_split_complex_instructions::get_insn_type (rtx_insn *insn)
+{
+ if (!INSN_P (insn))
+ return UNDEFINED;
+
+ rtx pat = PATTERN (insn);
+ int icode = recog (PATTERN (insn), insn, NULL);
+
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ if (targetm.is_ldp_insn (icode))
+ {
+ return LDP;
+ }
+ if (targetm.is_stp_insn (icode))
+ {
+ return STP;
+ }
+ else
+ {
+ return UNDEFINED;
+ }
+ }
+ rtx set_insn = single_set (insn);
+ if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM
+ && GET_MODE (XEXP (set_insn, 1)) == E_TImode)
+ return LDP_TI;
+
+ return UNDEFINED;
+}
+
+bool
+pass_split_complex_instructions::gate (function *)
+{
+ return targetm.is_ldp_insn && targetm.is_stp_insn && optimize > 0
+ && flag_split_ldp_stp > 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_split_complex_instructions (gcc::context *ctxt)
+{
+ return new pass_split_complex_instructions (ctxt);
+}
+
#if __GNUC__ >= 10
# pragma GCC diagnostic pop
-#endif
+#endif
\ No newline at end of file
diff --git a/gcc/target.def b/gcc/target.def
index d85adf36a..a3a50b474 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2677,6 +2677,16 @@ modes and they have different conditional execution capability, such as ARM.",
bool, (void),
default_have_conditional_execution)
+DEFHOOK
+(is_ldp_insn,
+ "Return true if icode is corresponding to any of the LDP instruction types.",
+ bool, (int icode), NULL)
+
+DEFHOOK
+(is_stp_insn,
+ "Return true if icode is corresponding to any of the STP instruction types.",
+ bool, (int icode), NULL)
+
DEFHOOK
(gen_ccmp_first,
"This function prepares to emit a comparison insn for the first compare in a\n\
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
new file mode 100644
index 000000000..3918d43f6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
@@ -0,0 +1,74 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-additional-options "-fsplit-ldp-stp" } */
+/*
+ * Tests are:
+ * Patterns where LDP insns should NOT be split
+ * */
+
+int __RTL (startwith ("split_complex_instructions"))
+simple_ldp_after_store ()
+{
+(function "simple_ldp_after_store"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 32))[1 S4 A32])(reg:DI x0)))
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x29)))
+ (cinsn 14 (use (reg/i:DI x30)))
+ (cinsn 15 (use (reg/i:DI x0)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "simple_ldp_after_store"
+}
+
+int __RTL (startwith ("split_complex_instructions"))
+ldp_after_store_in_different_bb ()
+{
+(function "ldp_after_store_in_different_bb"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 32))[1 S4 A32])(reg:DI x0)))
+ (edge-to 3 (flags "FALLTHRU"))
+ ) ;; block 2
+ (block 3
+ (edge-from 2 (flags "FALLTHRU"))
+ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x29)))
+ (cinsn 14 (use (reg/i:DI x30)))
+ (cinsn 15 (use (reg/i:DI x0)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 3
+ ) ;; insn-chain
+) ;; function "ldp_after_store_in_different_bb"
+}
+
+/* Verify that the output code contains exactly 2 ldp. */
+/* { dg-final { scan-assembler-times {ldp\t} 2 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
new file mode 100644
index 000000000..653c30f83
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
@@ -0,0 +1,40 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-additional-options "-fsplit-ldp-stp" } */
+/*
+ * Test is:
+ * Pattern where LDP insns should be split with rearrangement in order
+ * to deal with data dependecy betwen subinstruction.
+ * */
+
+int __RTL (startwith ("split_complex_instructions"))
+simple_ldp_after_store ()
+{
+(function "ldp_equal_registers"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI x1)
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
+ (cinsn 10 (parallel [
+ (set (reg:DI x1)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x2)
+ (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x0)))
+ (cinsn 14 (use (reg/i:DI x1)))
+ (cinsn 15 (use (reg/i:DI x2)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "ldp_equal_registers"
+}
+
+/* Verify that the output code rearrange ldrs. */
+/* { dg-final { scan-assembler-times ".*ldr.*x2.*x1,.*16.*ldr.*x1.*x1.*8" 1 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
new file mode 100644
index 000000000..dc9f26efb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
@@ -0,0 +1,174 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */
+/*
+ * Tests are:
+ * Patterns where LDP insns should be split
+ * */
+
+int __RTL (startwith ("split_complex_instructions"))
+simple_ldp_after_store ()
+{
+(function "simple_ldp_after_store"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
+ (const_int -16)) [1 S4 A32])
+ (reg:DI x0)))
+ (cinsn 11 (parallel [
+ (set (reg:DI x3)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
+ (set (reg:DI x4)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
+ ]))
+
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
+ (reg:DI x0)))
+ (cinsn 12 (parallel [
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 8)) [1 S4 A32]))
+ ]))
+
+ (cinsn 13 (use (reg/i:DI sp)))
+ (cinsn 14 (use (reg/i:DI cc)))
+ (cinsn 15 (use (reg/i:DI x29)))
+ (cinsn 16 (use (reg/i:DI x30)))
+ (cinsn 17 (use (reg/i:DI x0)))
+ (cinsn 18 (use (reg/i:DI x3)))
+ (cinsn 19 (use (reg/i:DI x4)))
+ (cinsn 20 (use (reg/i:DI x5)))
+ (cinsn 21 (use (reg/i:DI x6)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "simple_ldp_after_store"
+}
+
+int __RTL (startwith ("split_complex_instructions"))
+ldp_ti_after_store ()
+{
+ (function "ldp_ti_after_store"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x2)
+ (reg/i:DI x0)))
+
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 136))[1 S4 A32])(reg:DI x0)))
+ (insn 81 (set (reg:TI x0 [1 S4 A32])
+ (mem/c:TI (plus:DI (reg/f:DI sp)
+ (const_int 136 )) [1 S4 A32]))
+ (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp)
+ (const_int -24 )) [1 S4 A32])
+ (nil)))
+
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2)
+ (const_int -16)) [1 S4 A32])
+ (reg:DI x0)))
+ (insn 82 (set (reg:TI x3 [1 S4 A32])
+ (mem/c:TI (plus:DI (reg/f:DI x2)
+ (const_int -16)) [1 S4 A32])))
+
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32])
+ (reg:DI x0)))
+ (insn 83 (set (reg:TI x5 [1 S4 A32])
+ (mem/c:TI (reg/f:DI x2) [1 S4 A32])))
+
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x29)))
+ (cinsn 14 (use (reg/i:DI x30)))
+ (cinsn 15 (use (reg/i:DI x0)))
+ (cinsn 16 (use (reg/i:DI x3)))
+ (cinsn 17 (use (reg/i:DI x5)))
+ (cinsn 18 (use (reg/i:DI x1)))
+ (cinsn 19 (use (reg/i:DI x4)))
+ (cinsn 20 (use (reg/i:DI x6)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "ldp_ti_after_store"
+}
+
+int __RTL (startwith ("split_complex_instructions"))
+ldp_after_store_in_different_bb ()
+{
+(function "ldp_after_store_in_different_bb"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
+ (const_int -16)) [1 S4 A32])
+ (reg:DI x0)))
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
+ (reg:DI x0)))
+ (edge-to 3 (flags "FALLTHRU"))
+ ) ;; block 2
+ (block 3
+ (edge-from 2 (flags "FALLTHRU"))
+ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (parallel [
+ (set (reg:DI x3)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
+ (set (reg:DI x4)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
+ ]))
+ (cinsn 12 (parallel [
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 8)) [1 S4 A32]))
+ ]))
+ (cinsn 13 (use (reg/i:DI sp)))
+ (cinsn 14 (use (reg/i:DI cc)))
+ (cinsn 15 (use (reg/i:DI x29)))
+ (cinsn 16 (use (reg/i:DI x30)))
+ (cinsn 17 (use (reg/i:DI x0)))
+ (cinsn 18 (use (reg/i:DI x3)))
+ (cinsn 19 (use (reg/i:DI x4)))
+ (cinsn 20 (use (reg/i:DI x5)))
+ (cinsn 21 (use (reg/i:DI x6)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 3
+ ) ;; insn-chain
+) ;; function "ldp_after_store_in_different_bb"
+}
+
+/* Verify that the output code doesn't contain ldp. */
+/* { dg-final { scan-assembler-not {ldp\t} } } */
\ No newline at end of file
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 1e7d4e74b..2ccecffb5 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -280,6 +280,7 @@ DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")
DEFTIMEVAR (TV_REE , "ree")
DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
+DEFTIMEVAR (TV_SPLIT_CMP_INS , "split complex instructions")
DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
DEFTIMEVAR (TV_SPLIT_PATHS , "split paths")
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 86f38e2f2..6daac7fc1 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -612,6 +612,7 @@ extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
*ctxt);
extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化