加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0043-Extending-and-refactoring-of-pass_split_complex_inst.patch 47.46 KB
一键复制 编辑 原始数据 按行查看 历史
郑晨卉 提交于 2024-04-11 10:45 . [Sync] Sync patch from openeuler/gcc
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426
From a49db831320ac70ca8f46b94ee60d7c6951f65c3 Mon Sep 17 00:00:00 2001
From: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
Date: Wed, 20 Dec 2023 21:36:07 +0300
Subject: [PATCH 10/18] Extending and refactoring of
pass_split_complex_instructions
- Add flag parameter in is_ldp_insn and is_stp_insn to know
if instruction has writeback operation
- Add support of PRE_*, POST_* operands as a memory address
expression
- Split only LDPs that intersect with a dependent store
instruction
- Make the selection of dependent store instructions stricter
so it will be enough to check by BFS that dependent store
instruction appears in search range.
- Add helper methods to retrieve fields of rtx
- Remove redundant iterations in find_dependent_stores_candidates
- Refactor generation of instructions
- Add more test cases
---
gcc/config/aarch64/aarch64.cc | 62 +-
gcc/doc/tm.texi | 12 +-
gcc/sched-rgn.cc | 771 +++++++++---------
gcc/target.def | 14 +-
.../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 35 +-
.../rtl/aarch64/test-ldp-split-rearrange.c | 2 +-
.../gcc.dg/rtl/aarch64/test-ldp-split.c | 181 +++-
7 files changed, 603 insertions(+), 474 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 48e2eded0..fa566dd80 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27507,39 +27507,59 @@ aarch64_run_selftests (void)
#endif /* #if CHECKING_P */
-/* TODO: refuse to use ranges intead of full list of an instruction codes. */
+/* TODO: refuse to use ranges instead of full list of an instruction codes. */
bool
-is_aarch64_ldp_insn (int icode)
+is_aarch64_ldp_insn (int icode, bool *has_wb)
{
if ((icode >= CODE_FOR_load_pair_sw_sisi
- && icode <= CODE_FOR_load_pair_dw_tftf)
+ && icode <= CODE_FOR_load_pair_sw_sfsf)
+ || (icode >= CODE_FOR_load_pair_dw_didi
+ && icode <= CODE_FOR_load_pair_dw_dfdf)
+ || (icode == CODE_FOR_load_pair_dw_tftf)
|| (icode >= CODE_FOR_loadwb_pairsi_si
- && icode <= CODE_FOR_loadwb_pairtf_di)
- || (icode >= CODE_FOR_load_pairv8qiv8qi
- && icode <= CODE_FOR_load_pairdfdf)
- || (icode >= CODE_FOR_load_pairv16qiv16qi
- && icode <= CODE_FOR_load_pairv8bfv2df)
- || (icode >= CODE_FOR_load_pair_lanesv8qi
- && icode <= CODE_FOR_load_pair_lanesdf))
- return true;
+ && icode <= CODE_FOR_loadwb_pairdi_di)
+ || (icode >= CODE_FOR_loadwb_pairsf_si
+ && icode <= CODE_FOR_loadwb_pairdf_di)
+ || (icode >= CODE_FOR_loadwb_pairti_si
+ && icode <= CODE_FOR_loadwb_pairtf_di))
+ {
+ if (has_wb)
+ *has_wb = ((icode >= CODE_FOR_loadwb_pairsi_si
+ && icode <= CODE_FOR_loadwb_pairdi_di)
+ || (icode >= CODE_FOR_loadwb_pairsf_si
+ && icode <= CODE_FOR_loadwb_pairdf_di)
+ || (icode >= CODE_FOR_loadwb_pairti_si
+ && icode <= CODE_FOR_loadwb_pairtf_di));
+ return true;
+ }
return false;
}
bool
-is_aarch64_stp_insn (int icode)
+is_aarch64_stp_insn (int icode, bool *has_wb)
{
if ((icode >= CODE_FOR_store_pair_sw_sisi
- && icode <= CODE_FOR_store_pair_dw_tftf)
+ && icode <= CODE_FOR_store_pair_sw_sfsf)
+ || (icode >= CODE_FOR_store_pair_dw_didi
+ && icode <= CODE_FOR_store_pair_dw_dfdf)
+ || (icode == CODE_FOR_store_pair_dw_tftf)
|| (icode >= CODE_FOR_storewb_pairsi_si
- && icode <= CODE_FOR_storewb_pairtf_di)
- || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
- && icode <= CODE_FOR_vec_store_pairdfdf)
- || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
- && icode <= CODE_FOR_vec_store_pairv8bfv2df)
- || (icode >= CODE_FOR_store_pair_lanesv8qi
- && icode <= CODE_FOR_store_pair_lanesdf))
- return true;
+ && icode <= CODE_FOR_storewb_pairdi_di)
+ || (icode >= CODE_FOR_storewb_pairsf_si
+ && icode <= CODE_FOR_storewb_pairdf_di)
+ || (icode >= CODE_FOR_storewb_pairti_si
+ && icode <= CODE_FOR_storewb_pairtf_di))
+ {
+ if (has_wb)
+ *has_wb = ((icode >= CODE_FOR_storewb_pairsi_si
+ && icode <= CODE_FOR_storewb_pairdi_di)
+ || (icode >= CODE_FOR_storewb_pairsf_si
+ && icode <= CODE_FOR_storewb_pairdf_di)
+ || (icode >= CODE_FOR_storewb_pairti_si
+ && icode <= CODE_FOR_storewb_pairtf_di));
+ return true;
+ }
return false;
}
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 0c6415a9c..3b6e90bf2 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12113,12 +12113,16 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
-@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
-Return true if icode is corresponding to any of the LDP instruction types.
+@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode}, bool *@var{has_wb})
+Return true if @var{icode} is corresponding to any of the LDP instruction
+types. If @var{has_wb} is not NULL then its value is set to true if LDP
+contains post-index or pre-index operation.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
-Return true if icode is corresponding to any of the STP instruction types.
+@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode}, bool *@var{has_wb})
+Return true if @var{icode} is corresponding to any of the STP instruction
+types. If @var{has_wb} is not NULL then its value is set to true if STP
+contains post-index or pre-index operation.
@end deftypefn
@deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
index b4df8bdc5..5f61de1c8 100644
--- a/gcc/sched-rgn.cc
+++ b/gcc/sched-rgn.cc
@@ -3956,7 +3956,7 @@ make_pass_sched_fusion (gcc::context *ctxt)
namespace {
-/* Def-use analisys special functions implementation. */
+/* Def-use analysis special functions implementation. */
static struct df_link *
get_defs (rtx_insn *insn, rtx reg)
@@ -4032,42 +4032,66 @@ const pass_data pass_data_split_complex_instructions = {
(TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */
};
+/* Pass split_complex_instructions finds LOAD PAIR instructions (LDP) that can
+ be split into two LDR instructions. It splits only those LDP for which one
+ half of the requested memory is contained in the preceding STORE (STR/STP)
+ instruction whose base register has the same definition. This allows
+ to use hardware store-to-load forwarding mechanism and to get one half of
+ requested memory from the store queue of CPU.
+
+ TODO: Add split of STP.
+ TODO: Add split of vector STP and LDP. */
class pass_split_complex_instructions : public rtl_opt_pass
{
private:
- enum complex_instructions_t
+ enum mem_access_insn_t
{
UNDEFINED,
LDP,
+ /* LDP with post-index (see loadwb_pair in config/aarch64.md). */
+ LDP_WB,
+ /* LDP that contains one destination register in RTL IR
+ (see movti_aarch64 in config/aarch64.md). */
LDP_TI,
STP,
+ /* STP with pre-index (see storewb_pair in config/aarch64.md). */
+ STP_WB,
+ /* STP that contains one source register in RTL IR
+ (see movti_aarch64 in config/aarch64.md). */
+ STP_TI,
STR
};
- void split_complex_insn (rtx_insn *insn);
- void split_ldp_ti (rtx_insn *insn);
- void split_ldp_with_offset (rtx_insn *ldp_insn);
- void split_simple_ldp (rtx_insn *ldp_insn);
- void split_ldp_stp (rtx_insn *insn);
- complex_instructions_t get_insn_type (rtx_insn *insn);
-
- basic_block bb;
- rtx_insn *insn;
std::set<rtx_insn *> dependent_stores_candidates;
std::set<rtx_insn *> ldp_to_split_list;
- complex_instructions_t complex_insn_type = UNDEFINED;
- bool is_store_insn (rtx_insn *insn);
- bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb);
+ void split_ldp_ti (rtx_insn *insn);
+ void split_ldp (rtx_insn *ldp_insn);
+ /* Emit a NEW_INSNS chain, recognize instruction code of each new instruction
+ and replace OLD_INSN with the emitted sequence. */
+ void replace_insn (rtx_insn *old_insn, rtx_insn *new_insns);
+
+ mem_access_insn_t get_insn_type (rtx_insn *insn);
+ bool is_typeof_ldp (mem_access_insn_t insn_type);
+ bool is_typeof_stp (mem_access_insn_t insn_type);
+
bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb,
rtx_insn *search_insn,
int search_range
= param_ldp_dependency_search_range);
bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn);
void init_df ();
- void find_dependent_stores_candidates (rtx_insn *ldp_insn);
- int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type,
- int *arith_operation_ptr = NULL);
+ void find_dependent_stores_candidates (rtx_insn *ldp_insn,
+ mem_access_insn_t insn_type);
+
+ rtx get_memref (rtx_insn *insn, mem_access_insn_t insn_type);
+ rtx get_base_reg (rtx memref);
+ /* Set OFFSET to the offset value. Returns TRUE if MEMREF's address
+ expression is supported, FALSE otherwise. */
+ bool get_offset (rtx memref, int &offset);
+ /* Return size of memory referenced by MEMREF. Returns -1 if INSN_TYPE
+ wasn't recognized. */
+ int get_unit_size (rtx memref, mem_access_insn_t insn_type);
public:
pass_split_complex_instructions (gcc::context *ctxt)
@@ -4080,28 +4104,22 @@ public:
virtual unsigned int
execute (function *)
{
- enum rtx_code ldp_memref_code;
+ basic_block bb;
+ rtx_insn *insn;
+
init_df ();
ldp_to_split_list.clear ();
FOR_EACH_BB_FN (bb, cfun)
{
FOR_BB_INSNS (bb, insn)
{
- complex_instructions_t insn_type = get_insn_type (insn);
- /* TODO: Add splitting of STP instructions. */
- if (insn_type != LDP && insn_type != LDP_TI)
+ mem_access_insn_t insn_type = get_insn_type (insn);
+ if (!is_typeof_ldp (insn_type))
continue;
- /* TODO: Currently support only ldp_ti and ldp with REG or
- PLUS/MINUS offset expression. */
- if (insn_type == LDP_TI)
- {
- ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1),
- 0));
- if (ldp_memref_code != REG && ldp_memref_code != PLUS
- && ldp_memref_code != MINUS)
- continue;
- }
- if (is_ldp_dependent_on_store (insn, bb))
+
+ find_dependent_stores_candidates (insn, insn_type);
+ if (!dependent_stores_candidates.empty ()
+ && bfs_for_reg_dependent_store (insn, bb, insn))
{
ldp_to_split_list.insert (insn);
}
@@ -4110,18 +4128,107 @@ public:
for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin ();
i != ldp_to_split_list.end (); ++i)
- split_complex_insn (*i);
+ split_ldp (*i);
return 0;
}
}; // class pass_split_complex_instructions
bool
-pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn,
- basic_block bb)
+pass_split_complex_instructions::is_typeof_ldp (
+ mem_access_insn_t insn_type)
{
- find_dependent_stores_candidates (ldp_insn);
- return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn);
+ return (insn_type == LDP || insn_type == LDP_WB || insn_type == LDP_TI);
+}
+
+bool
+pass_split_complex_instructions::is_typeof_stp (
+ mem_access_insn_t insn_type)
+{
+ return (insn_type == STP || insn_type == STP_WB || insn_type == STP_TI);
+}
+
+rtx
+pass_split_complex_instructions::get_memref (
+ rtx_insn *insn, mem_access_insn_t insn_type)
+{
+ rtx insn_pat = PATTERN (insn);
+ rtx memref = NULL;
+
+ switch (insn_type)
+ {
+ case LDP:
+ memref = SET_SRC (XVECEXP (insn_pat, 0, 0));
+ break;
+ case LDP_WB:
+ memref = SET_SRC (XVECEXP (insn_pat, 0, 1));
+ break;
+ case LDP_TI:
+ memref = SET_SRC (insn_pat);
+ break;
+ case STP:
+ memref = SET_DEST (XVECEXP (insn_pat, 0, 0));
+ break;
+ case STP_WB:
+ memref = SET_DEST (XVECEXP (insn_pat, 0, 1));
+ break;
+ case STP_TI:
+ case STR:
+ memref = SET_DEST (insn_pat);
+ break;
+ default:
+ break;
+ }
+
+ if (memref && !MEM_P (memref))
+ return NULL;
+ return memref;
+}
+
+rtx
+pass_split_complex_instructions::get_base_reg (rtx memref)
+{
+ if (!memref || !MEM_P (memref))
+ return NULL;
+ rtx addr_exp = XEXP (memref, 0);
+
+ switch (GET_CODE (addr_exp))
+ {
+ case REG:
+ return addr_exp;
+ case PLUS:
+ case PRE_DEC:
+ case PRE_INC:
+ case POST_DEC:
+ case POST_INC:
+ if (REG_P (XEXP (addr_exp, 0)))
+ return XEXP (addr_exp, 0);
+ default:
+ return NULL;
+ }
+}
+
+int
+pass_split_complex_instructions::get_unit_size (
+ rtx memref, mem_access_insn_t insn_type)
+{
+ if (!memref)
+ return -1;
+
+ switch (insn_type)
+ {
+ case LDP:
+ case STP:
+ case LDP_WB:
+ case STP_WB:
+ case STR:
+ return GET_MODE_SIZE (GET_MODE (memref)).to_constant ();
+ case LDP_TI:
+ case STP_TI:
+ return GET_MODE_SIZE (E_DImode).to_constant ();
+ default:
+ return -1;
+ }
}
bool
@@ -4135,9 +4242,9 @@ pass_split_complex_instructions::bfs_for_reg_dependent_store (
{
if (!current_search_insn)
return false;
- bool checking_result
- = is_store_reg_dependent (ldp_insn, current_search_insn);
- if (checking_result)
+
+ if (dependent_stores_candidates.find (current_search_insn)
+ != dependent_stores_candidates.end ())
{
if (dump_file)
{
@@ -4185,30 +4292,29 @@ pass_split_complex_instructions::init_df ()
void
pass_split_complex_instructions::find_dependent_stores_candidates (
- rtx_insn *ldp_insn)
+ rtx_insn *ldp_insn, mem_access_insn_t insn_type)
{
dependent_stores_candidates.clear ();
- df_ref use;
- FOR_EACH_INSN_USE (use, ldp_insn)
- {
- df_link *defs = get_defs (ldp_insn, DF_REF_REG (use));
- if (!defs)
- return;
+ rtx base_reg = get_base_reg (get_memref (ldp_insn, insn_type));
+ if (!base_reg)
+ return;
- for (df_link *def = defs; def; def = def->next)
- {
- df_link *uses
- = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref));
- if (!uses)
- continue;
+ df_link *defs = get_defs (ldp_insn, base_reg);
+ if (!defs)
+ return;
- for (df_link *use = uses; use; use = use->next)
- {
- if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR
- && is_store_insn (DF_REF_INSN (use->ref)))
- dependent_stores_candidates.insert (DF_REF_INSN (use->ref));
- }
+ for (df_link *def = defs; def; def = def->next)
+ {
+ df_link *uses = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref));
+ if (!uses)
+ continue;
+ for (df_link *use = uses; use; use = use->next)
+ {
+ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR
+ && DF_REF_INSN (use->ref) != ldp_insn
+ && is_store_reg_dependent (ldp_insn, DF_REF_INSN (use->ref)))
+ dependent_stores_candidates.insert (DF_REF_INSN (use->ref));
}
}
}
@@ -4217,423 +4323,274 @@ bool
pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn,
rtx_insn *str_insn)
{
- if (!is_store_insn (str_insn)
- || dependent_stores_candidates.find (str_insn)
- == dependent_stores_candidates.end ())
+ if (!str_insn)
return false;
- int ldp_offset_sign = UNDEFINED;
- int ldp_offset
- = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign);
- if (ldp_offset_sign == MINUS)
- ldp_offset = -ldp_offset;
+ mem_access_insn_t st_type = get_insn_type (str_insn);
+ if (!is_typeof_stp (st_type) && st_type != STR)
+ return false;
- int str_offset_sign = UNDEFINED;
- int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign);
- if (str_offset_sign == MINUS)
- str_offset = -str_offset;
+ mem_access_insn_t ld_type = get_insn_type (ldp_insn);
+ rtx ld_memref = get_memref (ldp_insn, ld_type);
+ rtx st_memref = get_memref (str_insn, st_type);
+ rtx ld_base_reg = get_base_reg (ld_memref);
+ rtx st_base_reg = get_base_reg (st_memref);
- if (str_offset == ldp_offset || str_offset == ldp_offset + 8)
- return true;
+ if (!ld_base_reg || !st_base_reg
+ || REGNO (ld_base_reg) != REGNO (st_base_reg))
+ return false;
- return false;
-}
+ int ld_offset = 0;
+ int st_offset = 0;
+ if (get_offset (ld_memref, ld_offset)
+ && get_offset (st_memref, st_offset))
+ {
+ int ld_unit_size = get_unit_size (ld_memref, ld_type);
+ int st_size = get_unit_size (st_memref, st_type);
+ if (st_type != STR)
+ st_size *= 2;
-bool
-pass_split_complex_instructions::is_store_insn (rtx_insn *insn)
-{
- if (!insn)
- return false;
- rtx sset_b = single_set (insn);
- /* TODO: The condition below allow to take only store instructions in which
- the memory location's operand is either a register (base) or an plus/minus
- operation (base + #imm). So it might make sense to add support for other
- cases (e.g. multiply and shift). */
- if (sset_b && MEM_P (SET_DEST (sset_b))
- && GET_MODE (XEXP (sset_b, 0)) != BLKmode
- && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG
- || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS
- || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS)
- && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT)))
- return true;
+ if (ld_unit_size < 0 || st_size < 0)
+ return false;
+
+ bool st_has_low_ld_part = (ld_offset >= st_offset
+ && (ld_offset + ld_unit_size <= st_offset + st_size));
+ bool st_has_high_ld_part = ((ld_offset + ld_unit_size >= st_offset)
+ && (ld_offset + 2 * ld_unit_size <= st_offset + st_size));
+ bool st_has_not_full_ld = (ld_offset < st_offset
+ || (ld_offset + 2 * ld_unit_size > st_offset + st_size));
+
+ if ((st_has_low_ld_part || st_has_high_ld_part) && st_has_not_full_ld)
+ return true;
+ }
return false;
}
-int
-pass_split_complex_instructions::get_insn_offset (
- rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr)
+bool
+pass_split_complex_instructions::get_offset (rtx memref, int &offset)
{
- rtx insn_pat = PATTERN (insn);
- int returned_offset = 0;
+ rtx addr_exp = XEXP (memref, 0);
- rtx offset_expr = NULL;
- rtx offset_value_expr = NULL;
-
- switch (insn_type)
+ switch (GET_CODE (addr_exp))
{
- case LDP:
- {
- int number_of_sub_insns = XVECLEN (insn_pat, 0);
-
- /* Calculate it's own ofsset of first load insn. */
- rtx_insn *first_load_insn = NULL;
- if (number_of_sub_insns == 2)
+ case REG:
+ case POST_DEC:
+ case POST_INC:
+ offset = 0;
+ return true;
+ case PRE_DEC:
+ offset = -(GET_MODE_SIZE (GET_MODE (memref)).to_constant ());
+ return true;
+ case PRE_INC:
+ offset = GET_MODE_SIZE (GET_MODE (memref)).to_constant ();
+ return true;
+ case PLUS:
+ if (CONST_INT_P (XEXP (addr_exp, 1)))
{
- first_load_insn
- = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
- arith_operation_ptr = NULL;
-
- offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0);
- if (GET_CODE (offset_expr) == PLUS
- || GET_CODE (offset_expr) == MINUS)
- offset_value_expr
- = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1);
- else
- offset_expr = NULL;
+ offset = INTVAL (XEXP (addr_exp, 1));
+ return true;
}
- else if (number_of_sub_insns == 3)
- {
- rtx_insn *offset_sub_insn
- = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
-
- offset_expr = XEXP (PATTERN (offset_sub_insn), 1);
- offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1);
- }
- else
- {
- gcc_assert (false
- && "Wrong number of elements in the ldp_insn vector");
- }
- break;
- }
- case LDP_TI:
- {
- offset_expr = XEXP (XEXP (insn_pat, 1), 0);
- if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS)
- return 0;
- offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1);
- break;
- }
- case STR:
- {
- offset_expr = XEXP (XEXP (insn_pat, 0), 0);
- /* If memory location is specified by single base register then the
- offset is zero. */
- if (GET_CODE (offset_expr) == REG)
- return 0;
- offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1);
- break;
- }
- default:
- {
- if (dumps_are_enabled && dump_file)
- {
- fprintf (dump_file, "Instruction that was tried to split:\n");
- print_rtl_single (dump_file, insn);
- }
- gcc_assert (false && "Unsupported instruction type");
- break;
- }
- }
-
- if (offset_expr != NULL && offset_value_expr
- && GET_CODE (offset_value_expr) == CONST_INT)
- returned_offset = XINT (offset_value_expr, 0);
-
- if (arith_operation_ptr != NULL)
- {
- *arith_operation_ptr = GET_CODE (offset_expr);
- gcc_assert ((*arith_operation_ptr == MINUS
- || *arith_operation_ptr == PLUS)
- && "Unexpected arithmetic operation in the offset expr");
+ default:
+ return false;
}
-
- return returned_offset;
}
void
-pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn)
+pass_split_complex_instructions::replace_insn (rtx_insn *old_insn,
+ rtx_insn *new_insns)
{
- rtx pat = PATTERN (ldp_insn);
-
- rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0)));
- rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1)));
-
- int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1)));
- int src_regno;
-
- rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0);
-
- if (GET_CODE (srs_reg_insn) == REG)
- src_regno = REGNO (srs_reg_insn);
- else
- src_regno = REGNO (XEXP (srs_reg_insn, 0));
-
- rtx_insn *emited_insn_1, *emited_insn_2;
+ rtx_insn *prev_insn = PREV_INSN (old_insn);
+ start_sequence ();
- /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
- if (src_regno == dest_regno)
- std::swap (mem_insn_1, mem_insn_2);
+ emit_insn (new_insns);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Split LDP:\n");
+ print_rtl_single (dump_file, old_insn);
+ fprintf (dump_file, "Split into:\n");
+ }
- emited_insn_1 = emit_insn (PATTERN (mem_insn_1));
- emited_insn_2 = emit_insn (PATTERN (mem_insn_2));
+ for (rtx_insn *insn = new_insns; insn; insn = NEXT_INSN (insn))
+ {
+ INSN_CODE (insn) = recog (PATTERN (insn), insn, NULL);
+ if (dump_file)
+ {
+ print_rtl_single (dump_file, insn);
+ }
+ }
- int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0);
- int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0);
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
- INSN_CODE (emited_insn_1) = sub_insn_1_code;
- INSN_CODE (emited_insn_2) = sub_insn_2_code;
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (old_insn));
+ delete_insn_and_edges (old_insn);
}
void
-pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn)
+pass_split_complex_instructions::split_ldp (rtx_insn *ldp_insn)
{
rtx pat = PATTERN (ldp_insn);
- bool post_index = true;
-
- rtx_insn offset_insn;
- rtx_insn mem_insn_1;
- rtx_insn mem_insn_2;
+ mem_access_insn_t insn_type = get_insn_type (ldp_insn);
+ gcc_assert (is_typeof_ldp (insn_type));
- int offset_insn_code;
- int mem_insn_1_code = -1;
- int mem_insn_2_code = -1;
+ rtx load_rtx_1 = NULL;
+ rtx load_rtx_2 = NULL;
+ rtx post_index_rtx = NULL;
- int offset = 0;
- int arith_operation = UNDEFINED;
-
- for (int i = 0; i < 3; i++)
+ switch (insn_type)
{
- rtx sub_insn = XVECEXP (pat, 0, i);
- rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn));
- int sub_insn_code
- = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0);
-
- /* If sub_insn is offset related. */
- if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY)
- {
- offset_insn = *copy_of_sub_insn;
- offset_insn_code = sub_insn_code;
- gcc_assert (i == 0
- && "Offset related insn must be the first "
- "element of a parallel insn vector");
-
- offset = get_insn_offset (ldp_insn, LDP, &arith_operation);
- }
- else
- {
- if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG)
- {
- rtx &offset_expr
- = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1);
- if (GET_CODE (offset_expr) == CONST_INT)
- {
- int local_offset = XINT (offset_expr, 0);
- offset = (arith_operation == PLUS ? offset : -offset);
-
- offset_expr = GEN_INT (local_offset + offset);
-
- gcc_assert (
- (arith_operation == MINUS || arith_operation == PLUS)
- && "Unexpected arithmetic operation in offset related "
- "sub_insn");
-
- if (i == 1)
- post_index = false;
- }
- else
- {
- post_index = true;
- }
- }
- }
- if (i == 1)
- {
- mem_insn_1 = *copy_of_sub_insn;
- mem_insn_1_code = sub_insn_code;
- }
- if (i == 2)
- {
- mem_insn_2 = *copy_of_sub_insn;
- mem_insn_2_code = sub_insn_code;
- }
+ case LDP:
+ load_rtx_1 = copy_rtx (XVECEXP (pat, 0, 0));
+ load_rtx_2 = copy_rtx (XVECEXP (pat, 0, 1));
+ break;
+ case LDP_WB:
+ post_index_rtx = copy_rtx (XVECEXP (pat, 0, 0));
+ load_rtx_1 = copy_rtx (XVECEXP (pat, 0, 1));
+ load_rtx_2 = copy_rtx (XVECEXP (pat, 0, 2));
+ break;
+ case LDP_TI:
+ split_ldp_ti (ldp_insn);
+ return;
+ default:
+ return;
}
- gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1
- && "Uninitialized memory insns");
- int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1)));
- int src_regno;
-
- rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0);
-
- if (GET_CODE (srs_reg_insn) == REG)
- src_regno = REGNO (srs_reg_insn);
- else
- src_regno = REGNO (XEXP (srs_reg_insn, 0));
+ int dest_regno = REGNO (SET_DEST (load_rtx_1));
+ int base_regno = REGNO (get_base_reg (get_memref (ldp_insn, insn_type)));
- /* Don't split such weird LDP. */
- if (src_regno == dest_regno)
- return;
-
- rtx_insn *emited_offset_insn;
- if (!post_index)
+ /* In cases like ldp r1,r2,[r1[, #imm]] emit ldr r2,[r1[, #imm]] first.
+ For LDP with post-index don't split such instruction. */
+ if (base_regno == dest_regno)
{
- emited_offset_insn = emit_insn (PATTERN (&offset_insn));
- INSN_CODE (emited_offset_insn) = offset_insn_code;
+ if (insn_type == LDP)
+ std::swap (load_rtx_1, load_rtx_2);
+ else
+ return;
}
- rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1));
- rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2));
-
-
- INSN_CODE (emited_insn_1) = mem_insn_1_code;
- INSN_CODE (emited_insn_2) = mem_insn_2_code;
-
- if (post_index)
+ /* Construct the instruction chain for subsequent emitting. */
+ rtx_insn *insn_seq = make_insn_raw (load_rtx_1);
+ rtx_insn *load_insn_2 = make_insn_raw (load_rtx_2);
+ SET_NEXT_INSN (insn_seq) = load_insn_2;
+ SET_NEXT_INSN (load_insn_2) = NULL;
+ if (post_index_rtx)
{
- emited_offset_insn = emit_insn (PATTERN (&offset_insn));
- INSN_CODE (emited_offset_insn) = offset_insn_code;
+ rtx_insn *post_index_insn = make_insn_raw (post_index_rtx);
+ SET_NEXT_INSN (load_insn_2) = post_index_insn;
+ SET_NEXT_INSN (post_index_insn) = NULL;
}
-}
-
-void
-pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn)
-{
- rtx_insn *prev_insn = PREV_INSN (insn);
- int number_of_sub_insns = XVECLEN (PATTERN (insn), 0);
-
- start_sequence ();
- if (number_of_sub_insns == 2)
- split_simple_ldp (insn);
- else if (number_of_sub_insns == 3)
- split_ldp_with_offset (insn);
- else
- gcc_assert (false && "Broken complex insn vector");
-
- rtx_insn *seq = get_insns ();
- unshare_all_rtl_in_chain (seq);
- end_sequence ();
-
- emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
- delete_insn_and_edges (insn);
+ replace_insn (ldp_insn, insn_seq);
}
void
pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn)
{
- rtx_insn *prev_insn = PREV_INSN (insn);
- rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn)));
- rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn)));
-
- rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0);
- rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1);
- rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1);
-
- PUT_MODE (mem_insn_1, DImode);
- PUT_MODE (mem_insn_2, DImode);
-
- int reg_no_1 = REGNO (reg_insn_1);
+ rtx pat = PATTERN (insn);
+ rtx memref = get_memref (insn, LDP_TI);
+ int unit_size = get_unit_size (memref, LDP_TI);
+ rtx base_reg = get_base_reg (memref);
+ rtx dest_reg = SET_DEST (pat);
+
+ rtx reg_index_rtx = NULL;
+ rtx load_rtx_1 = NULL;
+ rtx load_rtx_2 = NULL;
+ bool post_index = false;
+ int offset = 0;
- XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1);
- XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1);
+ rtx load_1_memref = gen_rtx_MEM (DImode, base_reg);
- rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0);
- if (GET_CODE (load_insn_2_plus_expr) == REG)
+ rtx addr_expr = XEXP (memref, 0);
+ if (GET_CODE (addr_expr) == PLUS)
{
- XEXP (XEXP (PATTERN (load_insn_2), 1), 0)
- = gen_rtx_PLUS (DImode,
- gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)),
- GEN_INT (GET_MODE_SIZE (DImode)));
+ offset = INTVAL (XEXP (addr_expr, 1));
+ XEXP (load_1_memref, 0) = gen_rtx_PLUS (DImode, base_reg,
+ GEN_INT (offset));
}
- else
- {
- rtx load_insn_2_offset_expr
- = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1);
- if (load_insn_2_offset_expr == NULL)
- return;
-
- if (GET_CODE (load_insn_2_offset_expr) == CONST_INT)
- {
- int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0);
- XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1)
- = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode));
- }
- }
-
- start_sequence ();
+ rtx load_2_memref = gen_rtx_MEM (DImode,
+ gen_rtx_PLUS (DImode, base_reg, GEN_INT (offset + unit_size)));
- int src_regno;
- rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0);
+ load_rtx_1 = gen_rtx_SET (gen_rtx_REG (DImode, REGNO (dest_reg)),
+ load_1_memref);
+ load_rtx_2 = gen_rtx_SET (gen_rtx_REG (DImode, REGNO (dest_reg) + 1),
+ load_2_memref);
- if (GET_CODE (srs_reg_insn) == REG)
- src_regno = REGNO (srs_reg_insn);
- else
- src_regno = REGNO (XEXP (srs_reg_insn, 0));
+ if (GET_CODE (addr_expr) == PRE_INC || GET_CODE (addr_expr) == PRE_DEC
+ || GET_CODE (addr_expr) == POST_INC || GET_CODE (addr_expr) == POST_DEC)
+ {
+ /* The amount of increment or decrement is equal to size of
+ machine-mode of the containing MEMREF (see rtl.def). */
+ int index_offset = GET_MODE_SIZE (GET_MODE (memref)).to_constant ();
- /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
- if (src_regno == reg_no_1)
- std::swap (load_insn_1, load_insn_2);
+ if (GET_CODE (addr_expr) == PRE_DEC || GET_CODE (addr_expr) == POST_DEC)
+ index_offset = -index_offset;
- rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1));
- rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2));
+ if (GET_CODE (addr_expr) == POST_INC || GET_CODE (addr_expr) == POST_DEC)
+ post_index = true;
- INSN_CODE (emited_load_insn_1)
- = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0);
- INSN_CODE (emited_load_insn_2)
- = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0);
+ reg_index_rtx = gen_rtx_SET (base_reg,
+ gen_rtx_PLUS (DImode, base_reg,
+ GEN_INT (index_offset)));
+ }
- rtx_insn *seq = get_insns ();
- unshare_all_rtl_in_chain (seq);
- end_sequence ();
+ /* In cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
+ if (REGNO (base_reg) == REGNO (dest_reg))
+ std::swap (load_rtx_1, load_rtx_2);
- emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
- delete_insn_and_edges (insn);
-}
+ /* Construct the instruction chain for subsequent emitting. */
+ rtx_insn *insn_seq = make_insn_raw (load_rtx_1);
+ rtx_insn *load_insn_2 = make_insn_raw (load_rtx_2);
+ SET_NEXT_INSN (insn_seq) = load_insn_2;
+ SET_NEXT_INSN (load_insn_2) = NULL;
+ if (post_index && reg_index_rtx)
+ {
+ rtx_insn *post_index_insn = make_insn_raw (reg_index_rtx);
+ SET_NEXT_INSN (load_insn_2) = post_index_insn;
+ SET_NEXT_INSN (post_index_insn) = NULL;
+ }
+ else if (!post_index && reg_index_rtx)
+ {
+ rtx_insn *pre_index = make_insn_raw (reg_index_rtx);
+ SET_NEXT_INSN (pre_index) = insn_seq;
+ insn_seq = pre_index;
+ }
-void
-pass_split_complex_instructions::split_complex_insn (rtx_insn *insn)
-{
- complex_instructions_t insn_type = get_insn_type (insn);
- /* TODO: Add splitting of STP instructions. */
- if (insn_type == LDP || insn_type == STP)
- split_ldp_stp (insn);
- else if (insn_type == LDP_TI)
- split_ldp_ti (insn);
- else
- gcc_assert (false && "Unsupported type of insn to split");
+ replace_insn (insn, insn_seq);
}
-pass_split_complex_instructions::complex_instructions_t
+pass_split_complex_instructions::mem_access_insn_t
pass_split_complex_instructions::get_insn_type (rtx_insn *insn)
{
if (!INSN_P (insn))
return UNDEFINED;
- rtx pat = PATTERN (insn);
- int icode = recog (PATTERN (insn), insn, NULL);
+ int icode = INSN_CODE (insn);
+ if (icode == -1)
+ icode = recog (PATTERN (insn), insn, 0);
+ bool has_wb = false;
+
+ if (targetm.is_ldp_insn (icode, &has_wb))
+ return (has_wb ? LDP_WB : LDP);
- if (GET_CODE (pat) == PARALLEL)
+ if (targetm.is_stp_insn (icode, &has_wb))
+ return (has_wb ? STP_WB : STP);
+
+ rtx set_insn = single_set (insn);
+ if (set_insn && (GET_MODE (SET_SRC (set_insn)) == E_TImode
+ || GET_MODE (SET_DEST (set_insn)) == E_TImode))
{
- if (targetm.is_ldp_insn (icode))
- {
- return LDP;
- }
- if (targetm.is_stp_insn (icode))
- {
- return STP;
- }
- else
- {
- return UNDEFINED;
- }
+ if (MEM_P (SET_SRC (set_insn)) && REG_P (SET_DEST (set_insn)))
+ return LDP_TI;
+ if (MEM_P (SET_DEST (set_insn)) && REG_P (SET_SRC (set_insn)))
+ return STP_TI;
}
- rtx set_insn = single_set (insn);
- if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM
- && GET_MODE (XEXP (set_insn, 1)) == E_TImode)
- return LDP_TI;
+
+ if (set_insn && MEM_P (SET_DEST (set_insn)) && REG_P (SET_SRC (set_insn))
+ && GET_MODE (SET_DEST (set_insn)) != BLKmode)
+ return STR;
return UNDEFINED;
}
diff --git a/gcc/target.def b/gcc/target.def
index a3a50b474..8797a21d5 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2679,13 +2679,19 @@ modes and they have different conditional execution capability, such as ARM.",
DEFHOOK
(is_ldp_insn,
- "Return true if icode is corresponding to any of the LDP instruction types.",
- bool, (int icode), NULL)
+ "Return true if @var{icode} is corresponding to any of the LDP instruction\n\
+types. If @var{has_wb} is not NULL then its value is set to true if LDP\n\
+contains post-index or pre-index operation.",
+ bool, (int icode, bool *has_wb),
+ NULL)
DEFHOOK
(is_stp_insn,
- "Return true if icode is corresponding to any of the STP instruction types.",
- bool, (int icode), NULL)
+ "Return true if @var{icode} is corresponding to any of the STP instruction\n\
+types. If @var{has_wb} is not NULL then its value is set to true if STP\n\
+contains post-index or pre-index operation.",
+ bool, (int icode, bool *has_wb),
+ NULL)
DEFHOOK
(gen_ccmp_first,
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
index 3918d43f6..2d42231dc 100644
--- a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target aarch64-*-* } } */
-/* { dg-additional-options "-fsplit-ldp-stp" } */
+/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */
/*
* Tests are:
* Patterns where LDP insns should NOT be split
@@ -15,6 +15,9 @@ simple_ldp_after_store ()
(cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
(cinsn 228 (set (reg/i:DI sp)
(reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+
(cinsn 101 (set (mem/c:DI
(plus:DI (reg/f:DI sp)
(const_int 32))[1 S4 A32])(reg:DI x0)))
@@ -24,11 +27,27 @@ simple_ldp_after_store ()
(set (reg:DI x30)
(mem:DI (plus:DI (reg/f:DI sp)
(const_int 16)) [1 S4 A32]))]))
- (cinsn 11 (use (reg/i:DI sp)))
- (cinsn 12 (use (reg/i:DI cc)))
- (cinsn 13 (use (reg/i:DI x29)))
- (cinsn 14 (use (reg/i:DI x30)))
- (cinsn 15 (use (reg/i:DI x0)))
+ (cinsn 11 (use (reg/i:DI x29)))
+ (cinsn 12 (use (reg/i:DI x30)))
+
+ /* stp x0, x2, [x1]. */
+ (cinsn 102 (parallel [
+ (set (mem:DI (reg/f:DI x1) [1 S4 A32])
+ (reg:DI x0))
+ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32])
+ (reg:DI x2))]))
+ /* ldp x5, x6, [x1]. */
+ (cinsn 13 (parallel [
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 8)) [1 S4 A32]))
+ ]))
+ (cinsn 14 (use (reg/i:DI x5)))
+ (cinsn 15 (use (reg/i:DI x6)))
+
+ (cinsn 100 (use (reg/i:DI sp)))
+ (cinsn 200 (use (reg/i:DI cc)))
+ (cinsn 300 (use (reg/i:DI x0)))
(edge-to exit (flags "FALLTHRU"))
) ;; block 2
) ;; insn-chain
@@ -70,5 +89,5 @@ ldp_after_store_in_different_bb ()
) ;; function "ldp_after_store_in_different_bb"
}
-/* Verify that the output code contains exactly 2 ldp. */
-/* { dg-final { scan-assembler-times {ldp\t} 2 } } */
\ No newline at end of file
+/* Verify that the output code contains exactly 3 ldp. */
+/* { dg-final { scan-assembler-times {ldp\t} 3 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
index 653c30f83..59ff82df9 100644
--- a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target aarch64-*-* } } */
-/* { dg-additional-options "-fsplit-ldp-stp" } */
+/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */
/*
* Test is:
* Pattern where LDP insns should be split with rearrangement in order
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
index dc9f26efb..e25762160 100644
--- a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
@@ -13,48 +13,131 @@ simple_ldp_after_store ()
(block 2
(edge-from entry (flags "FALLTHRU"))
(cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ /* mov sp, x0. */
(cinsn 228 (set (reg/i:DI sp)
- (reg/i:DI x0)))
+ (reg/i:DI x0)))
+ /* mov x1, x0. */
(cinsn 238 (set (reg/i:DI x1)
- (reg/i:DI x0)))
+ (reg/i:DI x0)))
+ /* str x0, [sp, 8]. */
(cinsn 101 (set (mem/c:DI
(plus:DI (reg/f:DI sp)
(const_int 8))[1 S4 A32])(reg:DI x0)))
+ /* ldp x29, x30, [sp, 8]. */
(cinsn 10 (parallel [
(set (reg:DI x29)
(mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
(set (reg:DI x30)
(mem:DI (plus:DI (reg/f:DI sp)
(const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI x29)))
+ (cinsn 12 (use (reg/i:DI x30)))
+ /* str x0, [x1, -16]. */
(cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
(const_int -16)) [1 S4 A32])
(reg:DI x0)))
- (cinsn 11 (parallel [
+ /* ldp x3, x4, [x1, -16]. */
+ (cinsn 13 (parallel [
(set (reg:DI x3)
(mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
(set (reg:DI x4)
(mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
]))
+ (cinsn 14 (use (reg/i:DI x3)))
+ (cinsn 15 (use (reg/i:DI x4)))
+ /* str x0, [x1]. */
(cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
(reg:DI x0)))
- (cinsn 12 (parallel [
+ /* ldp x5, x6, [x1]. */
+ (cinsn 16 (parallel [
(set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
(set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
(const_int 8)) [1 S4 A32]))
]))
+ (cinsn 17 (use (reg/i:DI x5)))
+ (cinsn 18 (use (reg/i:DI x6)))
- (cinsn 13 (use (reg/i:DI sp)))
- (cinsn 14 (use (reg/i:DI cc)))
- (cinsn 15 (use (reg/i:DI x29)))
- (cinsn 16 (use (reg/i:DI x30)))
- (cinsn 17 (use (reg/i:DI x0)))
- (cinsn 18 (use (reg/i:DI x3)))
- (cinsn 19 (use (reg/i:DI x4)))
- (cinsn 20 (use (reg/i:DI x5)))
- (cinsn 21 (use (reg/i:DI x6)))
+ /* ldp x29, x30, [sp], 96. */
+ (cinsn 19 (parallel [
+ (set (reg/f:DI sp)
+ (plus:DI (reg/f:DI sp) (const_int 96)))
+ (set (reg:DI x29)
+ (mem:DI (reg/f:DI sp) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 8)) [1 S4 A32]))]))
+ (cinsn 20 (use (reg/i:DI x29)))
+ (cinsn 21 (use (reg/i:DI x30)))
+
+ /* stp x0, x2, [x1, 128]. */
+ (cinsn 104 (parallel [
+ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 128)) [1 S4 A32])
+ (reg:DI x0))
+ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 136)) [1 S4 A32])
+ (reg:DI x2))]))
+ /* ldp x29, x30, [x1, 120]. */
+ (cinsn 22 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 120)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 128)) [1 S4 A32]))]))
+ (cinsn 23 (use (reg/i:DI x29)))
+ (cinsn 24 (use (reg/i:DI x30)))
+
+ /* stp x0, x2, [x1, 128]. */
+ (cinsn 105 (parallel [
+ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 128)) [1 S4 A32])
+ (reg:DI x0))
+ (set (mem:DI (plus:DI (reg/f:DI x1) (const_int 136)) [1 S4 A32])
+ (reg:DI x2))]))
+ /* ldp x3, x4, [x1, 136]. */
+ (cinsn 25 (parallel [
+ (set (reg:DI x3)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 136)) [1 S4 A32]))
+ (set (reg:DI x4)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 144)) [1 S4 A32]))
+ ]))
+ (cinsn 26 (use (reg/i:DI x3)))
+ (cinsn 27 (use (reg/i:DI x4)))
+
+ /* stp w0, w2, [x1, 32]. */
+ (cinsn 106 (parallel [
+ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 32)) [1 S4 A32])
+ (reg:SI x0))
+ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 36)) [1 S4 A32])
+ (reg:SI x2))]))
+ /* ldp x5, x6, [x1, 32]. */
+ (cinsn 28 (parallel [
+ (set (reg:DI x5) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 32)) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 40)) [1 S4 A32]))
+ ]))
+ (cinsn 29 (use (reg/i:DI x5)))
+ (cinsn 30 (use (reg/i:DI x6)))
+
+ /* stp w0, w2, [x1, 40]. */
+ (cinsn 107 (parallel [
+ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 40)) [1 S4 A32])
+ (reg:SI x0))
+ (set (mem:SI (plus:DI (reg/f:DI x1) (const_int 44)) [1 S4 A32])
+ (reg:SI x2))]))
+ /* ldp x5, x6, [x1, 32]. */
+ (cinsn 31 (parallel [
+ (set (reg:DI x5) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 32)) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 40)) [1 S4 A32]))
+ ]))
+ (cinsn 32 (use (reg/i:DI x5)))
+ (cinsn 33 (use (reg/i:DI x6)))
+
+ (cinsn 100 (use (reg/i:DI sp)))
+ (cinsn 200 (use (reg/i:DI cc)))
+ (cinsn 400 (use (reg/i:DI x0)))
(edge-to exit (flags "FALLTHRU"))
) ;; block 2
) ;; insn-chain
@@ -69,43 +152,83 @@ ldp_ti_after_store ()
(block 2
(edge-from entry (flags "FALLTHRU"))
(cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ /* mov sp, x0. */
(cinsn 228 (set (reg/i:DI sp)
- (reg/i:DI x0)))
+ (reg/i:DI x0)))
+ /* mov x2, x0. */
(cinsn 238 (set (reg/i:DI x2)
- (reg/i:DI x0)))
-
+ (reg/i:DI x0)))
+ /* str x0, [sp, 136]. */
(cinsn 101 (set (mem/c:DI
(plus:DI (reg/f:DI sp)
(const_int 136))[1 S4 A32])(reg:DI x0)))
- (insn 81 (set (reg:TI x0 [1 S4 A32])
+ /* ldp x0, x1, [sp, 136]. */
+ (cinsn 81 (set (reg:TI x0 [1 S4 A32])
(mem/c:TI (plus:DI (reg/f:DI sp)
- (const_int 136 )) [1 S4 A32]))
- (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp)
- (const_int -24 )) [1 S4 A32])
- (nil)))
-
+ (const_int 136)) [1 S4 A32])))
+ /* str x0, [x2, -16]. */
(cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2)
- (const_int -16)) [1 S4 A32])
+ (const_int -16)) [1 S4 A32])
(reg:DI x0)))
- (insn 82 (set (reg:TI x3 [1 S4 A32])
+ /* ldp x3, x4, [x2, -16]. */
+ (cinsn 82 (set (reg:TI x3 [1 S4 A32])
(mem/c:TI (plus:DI (reg/f:DI x2)
- (const_int -16)) [1 S4 A32])))
-
+ (const_int -16)) [1 S4 A32])))
+ /* str x0, [x2]. */
(cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32])
(reg:DI x0)))
- (insn 83 (set (reg:TI x5 [1 S4 A32])
+ /* ldp x5, x6, [x2]. */
+ (cinsn 83 (set (reg:TI x5 [1 S4 A32])
(mem/c:TI (reg/f:DI x2) [1 S4 A32])))
+ /* stp x0, x1, [sp, -8]. */
+ (cinsn 104 (set (mem:TI (plus:DI (reg/v/f:DI sp)
+ (const_int -8)) [1 S4 A32])
+ (reg:TI x0)))
+ /* ldp x5, x6, [sp], -16. */
+ (cinsn 84 (set (reg/v:TI x5 [1 S4 A32])
+ (mem:TI (post_dec:DI (reg/v/f:DI sp)) [1 S4 A32])))
+ (cinsn 85 (use (reg/i:DI x5)))
+ (cinsn 86 (use (reg/i:DI x6)))
+
+ /* stp x0, x1, [sp, 8]. */
+ (cinsn 105 (set (mem:TI (plus:DI (reg/v/f:DI sp)
+ (const_int 8)) [1 S4 A32])
+ (reg:TI x0)))
+ /* ldp x5, x6, [sp], -16. */
+ (cinsn 87 (set (reg/v:TI x5 [1 S4 A32])
+ (mem:TI (post_dec:DI (reg/v/f:DI sp)) [1 S4 A32])))
+ (cinsn 88 (use (reg/i:DI x5)))
+ (cinsn 89 (use (reg/i:DI x6)))
+
+ /* Intersects with insn 102. */
+ /* ldp x2, x3, [x2, -16]!. */
+ (cinsn 90 (set (reg/v:TI x2 [1 S4 A32])
+ (mem:TI (pre_dec:DI (reg/v/f:DI x2)) [1 S4 A32])))
+ (cinsn 91 (use (reg/i:DI x2)))
+ (cinsn 92 (use (reg/i:DI x3)))
+
+ /* mov x2, x0. */
+ (cinsn 248 (set (reg/i:DI x2)
+ (reg/i:DI x0)))
+ /* str x0, [x2, 16]. */
+ (cinsn 106 (set (mem:DI (plus:DI (reg/v/f:DI x2)
+ (const_int 16)) [1 S4 A32])
+ (reg:DI x0)))
+ /* ldp x3, x4, [x2, 16]!. */
+ (cinsn 93 (set (reg/v:TI x3 [1 S4 A32])
+ (mem:TI (pre_inc:DI (reg/v/f:DI x2)) [1 S4 A32])))
+ (cinsn 94 (use (reg/i:DI x3)))
+ (cinsn 95 (use (reg/i:DI x4)))
+
(cinsn 11 (use (reg/i:DI sp)))
(cinsn 12 (use (reg/i:DI cc)))
(cinsn 13 (use (reg/i:DI x29)))
(cinsn 14 (use (reg/i:DI x30)))
(cinsn 15 (use (reg/i:DI x0)))
(cinsn 16 (use (reg/i:DI x3)))
- (cinsn 17 (use (reg/i:DI x5)))
(cinsn 18 (use (reg/i:DI x1)))
(cinsn 19 (use (reg/i:DI x4)))
- (cinsn 20 (use (reg/i:DI x6)))
(edge-to exit (flags "FALLTHRU"))
) ;; block 2
) ;; insn-chain
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化