加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0050-Port-IPA-prefetch-to-GCC-12.patch 61.68 KB
一键复制 编辑 原始数据 按行查看 历史
郑晨卉 提交于 2024-04-11 10:45 . [Sync] Sync patch from openeuler/gcc
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071
From 7ee50ce44c652e21ca8ad33dc4e175f02b51b072 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Fri, 8 Mar 2024 06:50:39 +0800
Subject: [PATCH 18/18] Port IPA prefetch to GCC 12
---
gcc/Makefile.in | 1 +
gcc/cgraph.cc | 1 +
gcc/cgraph.h | 2 +
gcc/common.opt | 8 +
gcc/ipa-devirt.cc | 54 +-
gcc/ipa-prefetch.cc | 1819 +++++++++++++++++++++++++++++++++++++++++++
gcc/ipa-sra.cc | 8 +
gcc/params.opt | 8 +
gcc/passes.def | 1 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
11 files changed, 1902 insertions(+), 2 deletions(-)
create mode 100644 gcc/ipa-prefetch.cc
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 876000bda..10544e4a9 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1468,6 +1468,7 @@ OBJS = \
ipa-modref.o \
ipa-modref-tree.o \
ipa-predicate.o \
+ ipa-prefetch.o \
ipa-profile.o \
ipa-prop.o \
ipa-param-manipulation.o \
diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 3734c85db..7d738b891 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -998,6 +998,7 @@ cgraph_node::create_indirect_edge (gcall *call_stmt, int ecf_flags,
edge->indirect_info = cgraph_allocate_init_indirect_info ();
edge->indirect_info->ecf_flags = ecf_flags;
edge->indirect_info->vptr_changed = true;
+ edge->indirect_info->targets = NULL;
/* Record polymorphic call info. */
if (!cloning_p
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index d96690326..b84ff2f98 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1659,6 +1659,8 @@ public:
int param_index;
/* ECF flags determined from the caller. */
int ecf_flags;
+ /* Vector of potential call targets determined by analysis. */
+ vec<cgraph_node *, va_gc_atomic> *targets;
/* Number of speculative call targets, it's less than GCOV_TOPN_VALUES. */
unsigned num_speculative_call_targets : 16;
diff --git a/gcc/common.opt b/gcc/common.opt
index 1eb62ada5..e65a06af9 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1328,6 +1328,10 @@ fdevirtualize
Common Var(flag_devirtualize) Optimization
Try to convert virtual calls to direct ones.
+fipa-ic
+Common Var(flag_ipa_ic) Optimization Init(0)
+Perform interprocedural analysis of indirect calls.
+
ficp
Common Var(flag_icp) Optimization Init(0)
Try to promote indirect calls to direct ones.
@@ -2367,6 +2371,10 @@ fprefetch-loop-arrays
Common Var(flag_prefetch_loop_arrays) Init(-1) Optimization
Generate prefetch instructions, if available, for arrays in loops.
+fipa-prefetch
+Common Var(flag_ipa_prefetch) Init(0) Optimization
+Generate prefetch instructions, if available, using IPA info.
+
fprofile
Common Var(profile_flag)
Enable basic program profiling code.
diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
index 318535d06..dd3562d56 100644
--- a/gcc/ipa-devirt.cc
+++ b/gcc/ipa-devirt.cc
@@ -5758,6 +5758,54 @@ merge_fs_map_for_ftype_aliases ()
}
}
+/* Save results of indirect call analysis for the next passes. */
+
+static void
+save_analysis_results ()
+{
+ if (dump_file)
+ fprintf (dump_file, "\n\nSave results of indirect call analysis.\n");
+
+ struct cgraph_node *n;
+ FOR_EACH_FUNCTION (n)
+ {
+ cgraph_edge *e, *next;
+ for (e = n->indirect_calls; e; e = next)
+ {
+ next = e->next_callee;
+ if (e->indirect_info->polymorphic)
+ continue;
+ gcall *stmt = e->call_stmt;
+ gcc_assert (stmt != NULL);
+ tree call_fn = gimple_call_fn (stmt);
+ tree call_fn_ty = TREE_TYPE (call_fn);
+ if (!POINTER_TYPE_P (call_fn_ty))
+ continue;
+
+ tree ctype = TYPE_CANONICAL (TREE_TYPE (call_fn_ty));
+ unsigned ctype_uid = ctype ? TYPE_UID (ctype) : 0;
+ if (!ctype_uid || unsafe_types->count (ctype_uid)
+ || !fs_map->count (ctype_uid))
+ continue;
+ /* TODO: cleanup noninterposable aliases. */
+ decl_set *decls = (*fs_map)[ctype_uid];
+ if (dump_file)
+ {
+ fprintf (dump_file, "For call ");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ vec_alloc (e->indirect_info->targets, decls->size ());
+ for (decl_set::const_iterator it = decls->begin ();
+ it != decls->end (); it++)
+ {
+ struct cgraph_node *target = cgraph_node::get (*it);
+ /* TODO: maybe discard some targets. */
+ e->indirect_info->targets->quick_push (target);
+ }
+ }
+ }
+}
+
/* Dump function types with set of functions corresponding to it. */
static void
@@ -5822,6 +5870,8 @@ collect_function_signatures ()
}
}
merge_fs_map_for_ftype_aliases ();
+ if (flag_ipa_ic)
+ save_analysis_results ();
if (dump_file)
dump_function_signature_sets ();
}
@@ -6217,7 +6267,7 @@ ipa_icp (void)
optimize indirect calls. */
collect_function_type_aliases ();
collect_function_signatures ();
- bool optimized = optimize_indirect_calls ();
+ bool optimized = flag_icp ? optimize_indirect_calls () : false;
remove_type_alias_map (ta_map);
remove_type_alias_map (fta_map);
@@ -6264,7 +6314,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- return (optimize && flag_icp && !seen_error ()
+ return (optimize && (flag_icp || flag_ipa_ic) && !seen_error ()
&& (in_lto_p || flag_whole_program));
}
diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
new file mode 100644
index 000000000..aeea51105
--- /dev/null
+++ b/gcc/ipa-prefetch.cc
@@ -0,0 +1,1819 @@
+/* IPA prefetch optimizations.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ Contributed by Ilia Diachkov.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* IPA prefetch is an interprocedural pass that detects cases of indirect
+ memory access potentially in loops and inserts prefetch instructions
+ to optimize cache usage during these indirect memory accesses. */
+
+#include "config.h"
+#define INCLUDE_SET
+#define INCLUDE_MAP
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "tm.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "diagnostic-core.h"
+#include "function.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "vec.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "cfg.h"
+#include "cfghooks.h"
+#include "ssa.h"
+#include "tree-dfa.h"
+#include "fold-const.h"
+#include "tree-inline.h"
+#include "stor-layout.h"
+#include "tree-into-ssa.h"
+#include "tree-cfg.h"
+#include "alloc-pool.h"
+#include "symbol-summary.h"
+#include "ipa-prop.h"
+#include "tree-eh.h"
+#include "bitmap.h"
+#include "cfgloop.h"
+#include "langhooks.h"
+#include "ipa-param-manipulation.h"
+#include "ipa-fnsummary.h"
+#include "tree-ssa-loop.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "gimple-fold.h"
+#include "gimplify.h"
+
+namespace {
+
+/* Call graph analysis. */
+
+typedef std::set<cgraph_edge *> edge_set;
+typedef std::set<cgraph_node *> node_set;
+typedef std::map<cgraph_node *, edge_set *> node_to_iedge_map;
+typedef std::map<cgraph_node *, node_set *> node_to_node_map;
+typedef std::map<cgraph_edge *, double> edge_in_loop;
+typedef std::map<cgraph_node *, double> node_in_loop;
+
+static edge_in_loop *el_map = NULL;
+static node_in_loop *nl_map = NULL;
+static node_to_iedge_map *icn_map = NULL;
+/* Contains nodes which reachable from a given node. */
+static node_to_node_map *nn_map = NULL;
+
+static bool
+can_be_optimized (cgraph_node *n)
+{
+ /* TODO: maybe check also inlined_to. */
+ return opt_for_fn (n->decl, flag_ipa_prefetch) && n->has_gimple_body_p ();
+}
+
+static void
+analyze_cgraph_edge (cgraph_edge *e)
+{
+ gcall *stmt = e->call_stmt;
+ gcc_checking_assert (e && stmt);
+ basic_block bb = gimple_bb (stmt);
+ gcc_checking_assert (bb);
+ /* TODO: add the same check for indirect calls. */
+ if (e->callee && !can_be_optimized (e->callee))
+ return;
+
+ if (dump_file)
+ {
+ if (e->callee)
+ fprintf (dump_file, "\t%*s%s %s%*s ", 1, "",
+ e->callee->dump_name (), !e->inline_failed ? "inlined" :
+ cgraph_inline_failed_string (e->inline_failed), 1, "");
+ else
+ fprintf (dump_file, "\t%*s%s %s%*s ", 1, "", "(indirect)",
+ "n/a", 1, "");
+ fprintf (dump_file, "freq:%4.2f", e->sreal_frequency ().to_double ());
+
+ if (e->callee && cross_module_call_p (e))
+ fprintf (dump_file, " cross module");
+
+ class ipa_call_summary *es = ipa_call_summaries->get (e);
+ if (es)
+ fprintf (dump_file, " loop depth:%2i size:%2i time: %2i",
+ es->loop_depth, es->call_stmt_size, es->call_stmt_time);
+
+ fprintf (dump_file, "\n");
+ }
+ if (e->indirect_info && dump_file)
+ {
+ fprintf (dump_file, "II: %p\n", (void *) e->indirect_info->targets);
+ unsigned i = 0;
+ cgraph_node *n;
+ if (e->indirect_info->targets)
+ for (i = 0; e->indirect_info->targets->iterate (i, &n); ++i)
+ fprintf (dump_file, "\t%s\n", n->dump_name ());
+ }
+
+ if (bb_loop_depth (bb) == 0)
+ return;
+
+ if (dump_file)
+ {
+ if (e->callee)
+ fprintf (dump_file, "\tCall in loop (%d): ", bb_loop_depth (bb));
+ else
+ fprintf (dump_file, "\tICall in loop (%d): ", bb_loop_depth (bb));
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ (*el_map)[e] = e->sreal_frequency ().to_double ();
+}
+
+/* Walk optimizible cgraph nodes and collect info for edges. */
+
+static void
+analyse_cgraph ()
+{
+ cgraph_node *n;
+ cgraph_edge *e;
+ FOR_EACH_DEFINED_FUNCTION (n)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n\nProcesing function %s\n", n->dump_name ());
+ print_generic_expr (dump_file, n->decl);
+ fprintf (dump_file, "\n");
+ }
+ if (!can_be_optimized (n))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Skip the function\n");
+ continue;
+ }
+
+ /* TODO: maybe remove loop info here. */
+ push_cfun (DECL_STRUCT_FUNCTION (n->decl));
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (LOOPS_NORMAL);
+
+ for (e = n->callees; e; e = e->next_callee)
+ analyze_cgraph_edge (e);
+ for (e = n->indirect_calls; e; e = e->next_callee)
+ analyze_cgraph_edge (e);
+
+ free_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_finalize ();
+
+ pop_cfun ();
+ }
+}
+
+/* Save indirect call info to node:icall_target map. */
+
+static void
+prepare_indirect_call_info ()
+{
+ cgraph_node *n, *n2;
+ cgraph_edge *e;
+ FOR_EACH_DEFINED_FUNCTION (n)
+ for (e = n->indirect_calls; e; e = e->next_callee)
+ {
+ if (!e->indirect_info->targets)
+ continue;
+ for (unsigned i = 0; e->indirect_info->targets->iterate (i, &n2); ++i)
+ {
+ if (icn_map->count (n2) == 0)
+ (*icn_map)[n2] = new edge_set;
+ (*icn_map)[n2]->insert (e);
+ }
+ }
+}
+
+static void
+collect_nn_info (struct cgraph_edge *e, struct cgraph_node *n)
+{
+ struct cgraph_node *n2 = e->caller;
+ if (nn_map->count (n2) == 0)
+ (*nn_map)[n2] = new node_set;
+ (*nn_map)[n2]->insert (n);
+ if (nn_map->count (n) != 0)
+ {
+ node_set *set = (*nn_map)[n];
+ for (node_set::const_iterator it = set->begin ();
+ it != set->end (); it++)
+ (*nn_map)[n2]->insert (*it);
+ }
+}
+
+static bool
+check_loop_info_for_cgraph_edge (struct cgraph_edge *e, struct cgraph_node *n,
+ bool &all_in_loop, double &rate)
+{
+ collect_nn_info (e, n);
+ if (el_map->count (e) == 0)
+ {
+ if (dump_file)
+ fprintf (dump_file, "not all: %s->%s\n",
+ e->caller->dump_name (), n->dump_name ());
+ all_in_loop = false;
+ return false;
+ }
+ rate += (*el_map)[e];
+ return true;
+}
+
+static bool
+update_loop_info_for_cgraph_node (struct cgraph_node *n)
+{
+ bool changed = false, all_in_loop = true;
+ double rate = 0.0;
+ struct cgraph_edge *e;
+
+ /* Iterate all direct callers. */
+ if (n->callers)
+ for (e = n->callers; e; e = e->next_caller)
+ if (!check_loop_info_for_cgraph_edge (e, n, all_in_loop, rate))
+ break;
+
+ /* Iterate all possible indirect callers. */
+ edge_set *set = (*icn_map)[n];
+ if (set)
+ for (edge_set::const_iterator it = set->begin (); it != set->end (); it++)
+ if (!check_loop_info_for_cgraph_edge (*it, n, all_in_loop, rate))
+ break;
+
+ /* The node had 0 loop count but the rate is > 0,
+ so something is changed. */
+ if (dump_file)
+ fprintf (dump_file, "%s: all=%d, nl->c=%lu, r=%4.2f\n", n->dump_name (),
+ all_in_loop, nl_map->count (n), rate);
+
+ if (all_in_loop && nl_map->count (n) == 0 && rate > 0.0)
+ {
+ if (dump_file)
+ fprintf (dump_file, "%s: new rate %4.2f\n", n->dump_name (), rate);
+ changed = true;
+ }
+ if (all_in_loop)
+ {
+ (*nl_map)[n] = nl_map->count (n) ? (*nl_map)[n] + rate : rate;
+ for (e = n->callees; e; e = e->next_callee)
+ (*el_map)[e] = el_map->count (e) ? (*el_map)[e] + rate : rate;
+ for (e = n->indirect_calls; e; e = e->next_callee)
+ {
+ (*el_map)[e] = el_map->count (e) ? (*el_map)[e] + rate : rate;
+ if (dump_file)
+ fprintf (dump_file, "%s: reset indirect e=%p to %4.2f\n",
+ n->dump_name (), (void *) e, (*el_map)[e]);
+ }
+ }
+ return changed;
+}
+
+/* Propagate in_loop info over the call graph. */
+
+static void
+propagate_loop_info_in_cgraph ()
+{
+ struct cgraph_node *n;
+ bool changed;
+ unsigned iteration = 0;
+ do
+ {
+ changed = false;
+ if (dump_file)
+ fprintf (dump_file, "\nIteration %u\n", iteration++);
+ FOR_EACH_DEFINED_FUNCTION (n)
+ {
+ if (!n->callers && !(*icn_map)[n])
+ continue;
+ if (update_loop_info_for_cgraph_node (n))
+ changed = true;
+ }
+ } while (changed);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nList of nodes in loops:\n");
+ FOR_EACH_DEFINED_FUNCTION (n)
+ if (nl_map->count (n) != 0)
+ fprintf (dump_file, "%s: %4.2f\n", n->dump_name (), (*nl_map)[n]);
+ fprintf (dump_file, "\nList of callable nodes:\n");
+ FOR_EACH_DEFINED_FUNCTION (n)
+ if (nn_map->count (n) != 0)
+ {
+ node_set *set = (*nn_map)[n];
+ fprintf (dump_file, "%s: ", n->dump_name ());
+ for (node_set::const_iterator it = set->begin ();
+ it != set->end (); it++)
+ fprintf (dump_file, "%s ", (*it)->dump_name ());
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
+/* Analysis of memory references. */
+
+typedef enum
+{
+ MR_NONE,
+ MR_SIMPLE,
+ MR_POLYNOMIAL,
+ MR_INDIRECT,
+ MR_UNSUPPORTED
+} mr_type;
+const char *mr_type_str[] =
+ {"none", "simple", "poly", "indirect", "unsuppoted"};
+
+struct memref_type;
+typedef std::set<memref_type *> memref_set;
+
+static unsigned max_mr_id = 0;
+typedef struct memref_type
+{
+ unsigned mr_id = 0;
+ mr_type type = MR_NONE;
+ tree mem = NULL_TREE;
+ tree base = NULL_TREE;
+ tree offset = NULL_TREE;
+ vec<gimple *, va_heap, vl_ptr> stmts = vNULL;
+ memref_set used_mrs;
+ bool is_store = false;
+ bool is_incr = false;
+ tree step = NULL_TREE;
+} memref_t;
+
+typedef std::map<tree, memref_t *> tree_memref_map;
+typedef std::map<function *, vec<memref_t *> > function_mrs_map;
+typedef std::map<function *, memref_set *> funct_mrs_map;
+typedef std::map<memref_t *, memref_t *> memref_map;
+typedef std::map<memref_t *, tree> memref_tree_map;
+
+typedef std::set<gimple *> stmt_set;
+typedef std::map<tree, tree> tree_map;
+
+tree_memref_map *tm_map;
+funct_mrs_map *fmrs_map;
+funct_mrs_map *optimize_mrs_map;
+memref_map *mr_candidate_map;
+tree_map *decl_map;
+
+static void analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr);
+
+static memref_t*
+get_memref (gimple *stmt, tree mem, bool is_store)
+{
+ if (tm_map->count (mem))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Found mr %d for %p.\n",
+ (*tm_map)[mem]->mr_id, (void *) mem);
+ return (*tm_map)[mem];
+ }
+
+ memref_t *mr = new memref_t;
+ mr->mr_id = ++max_mr_id;
+ mr->is_store = is_store;
+ mr->mem = mem;
+ (*tm_map)[mem] = mr;
+ if (dump_file)
+ fprintf (dump_file, "Create mr %d for %p.\n",
+ mr->mr_id, (void *) mem);
+ analyse_mem_ref (stmt, mem, mr);
+ return mr;
+}
+
+static void
+print_mrs_ids (memref_set &mrs, const char *start)
+{
+ if (start)
+ fprintf (dump_file, "%s", start);
+ for (memref_set::const_iterator it = mrs.begin (); it != mrs.end (); it++)
+ fprintf (dump_file, "%d ", (*it)->mr_id);
+ fprintf (dump_file, "\n");
+}
+
+static void
+print_memref (memref_t *mr)
+{
+ fprintf (dump_file, "MR (%d) type: %s (%s) mem: ", mr->mr_id,
+ mr_type_str[mr->type], mr->is_store ? "st" : "ld");
+ print_generic_expr (dump_file, mr->mem);
+ fprintf (dump_file, "\nbase: ");
+ if (mr->base)
+ print_generic_expr (dump_file, mr->base);
+ else
+ fprintf (dump_file, "null");
+ fprintf (dump_file, "\noffset: ");
+ if (mr->offset)
+ print_generic_expr (dump_file, mr->offset);
+ else
+ fprintf (dump_file, "null");
+ fprintf (dump_file, "\nstmts:\n");
+ for (unsigned int i = 0; i < mr->stmts.length (); i++)
+ print_gimple_stmt (dump_file, mr->stmts[i], 0);
+ print_mrs_ids (mr->used_mrs, "\tused memrefs: ");
+ if (mr->is_incr)
+ {
+ fprintf (dump_file, "\tis incremental with step: ");
+ print_generic_expr (dump_file, mr->step);
+ }
+ fprintf (dump_file, "\n");
+}
+
+/* If there is a simple load or store to a memory reference in STMT, returns
+ the location of the memory reference, and sets IS_STORE according to whether
+ it is a store or load. Otherwise, returns NULL.
+ TODO: from gcc/tree-ssa-loop-im.c, maybe make it global. */
+
+static tree *
+simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
+{
+ tree *lhs, *rhs;
+
+ /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
+ if (!gimple_assign_single_p (stmt))
+ return NULL;
+
+ lhs = gimple_assign_lhs_ptr (stmt);
+ rhs = gimple_assign_rhs1_ptr (stmt);
+
+ if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
+ {
+ *is_store = false;
+ return rhs;
+ }
+ else if (gimple_vdef (stmt)
+ && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
+ {
+ *is_store = true;
+ return lhs;
+ }
+ else
+ return NULL;
+}
+
+static void
+analyse_incremental (gimple *stmt, memref_t* mr)
+{
+ if (!gimple_assign_single_p (stmt))
+ return;
+ tree rhs1, rhs2;
+ /* TODO: maybe support other types of stmts. */
+ while (stmt && is_gimple_assign (stmt))
+ {
+ enum tree_code def_code = gimple_assign_rhs_code (stmt);
+ gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Incr: in assign (%s)\n",
+ get_tree_code_name (def_code));
+ print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS);
+ }
+ gcc_assert (def_code != ERROR_MARK);
+ switch (rhs_class)
+ {
+ case GIMPLE_TERNARY_RHS:
+ if (dump_file)
+ fprintf (dump_file, "Incr: unsupported trinary rhs\n");
+ stmt = NULL;
+ break;
+ case GIMPLE_UNARY_RHS:
+ case GIMPLE_SINGLE_RHS:
+ rhs1 = gimple_assign_rhs1 (stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Incr: (%s)",
+ get_tree_code_name (TREE_CODE (rhs1)));
+ print_generic_expr (dump_file, rhs1);
+ fprintf (dump_file, "\n");
+ }
+ if (def_code == SSA_NAME)
+ stmt = SSA_NAME_DEF_STMT (rhs1);
+ else if (def_code == MEM_REF || def_code == COMPONENT_REF
+ || def_code == ARRAY_REF)
+ {
+ /* If we have dereference in address evaluation,
+ it's indirect memory access. */
+ if (dump_file)
+ {
+ if (operand_equal_p (mr->mem, rhs1))
+ fprintf (dump_file, "Incr: the same MEM\n");
+ else
+ fprintf (dump_file, "Incr: diff MEM\n");
+ print_generic_expr (dump_file, rhs1);
+ fprintf (dump_file, " ");
+ print_generic_expr (dump_file, mr->mem);
+ fprintf (dump_file, "\n");
+ }
+ if (operand_equal_p (mr->mem, rhs1) && mr->step)
+ mr->is_incr = true;
+ stmt = NULL;
+ }
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file, "Incr: unsupported unary/single\n");
+ stmt = NULL;
+ }
+ break;
+ case GIMPLE_BINARY_RHS:
+ rhs1 = gimple_assign_rhs1 (stmt);
+ rhs2 = gimple_assign_rhs2 (stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "(%s) (%s)",
+ get_tree_code_name (TREE_CODE (rhs1)),
+ get_tree_code_name (TREE_CODE (rhs2)));
+ print_generic_expr (dump_file, rhs1);
+ fprintf (dump_file, " ");
+ print_generic_expr (dump_file, rhs2);
+ fprintf (dump_file, "\n");
+ }
+ /* TODO: extend for other types of incrementation. */
+ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST)
+ {
+ stmt = SSA_NAME_DEF_STMT (rhs1);
+ mr->step = rhs2;
+ if (dump_file)
+ {
+ fprintf (dump_file, "Incr: const increment stmt: ");
+ print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS);
+ }
+ }
+ else
+ stmt = NULL;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ if ((mr->step && !mr->is_incr) || (!mr->step && mr->is_incr))
+ {
+ mr->step = NULL_TREE;
+ mr->is_incr = false;
+ }
+}
+
+static mr_type
+get_memref_type (memref_t *base, memref_t *used, enum tree_code code)
+{
+ /* TODO: improve memref type detection. */
+ enum tree_code base_code = TREE_CODE (base->mem);
+ if (dump_file)
+ fprintf (dump_file, "get_memref_type: base=%d,%d used=%d,%d code=%s "
+ "base_code=%s\n", base->mr_id, base->type,
+ used ? used->mr_id : -1, used ? used->type : -1,
+ get_tree_code_name (code), get_tree_code_name (base_code));
+ if (used)
+ {
+ if (base->type > used->type)
+ return base->type;
+ if (used->type == MR_SIMPLE)
+ return MR_POLYNOMIAL;
+ if (used->type == MR_POLYNOMIAL)
+ return base_code == ARRAY_REF ? MR_POLYNOMIAL : MR_INDIRECT;
+ if (used->type == MR_INDIRECT)
+ return MR_INDIRECT;
+ return MR_UNSUPPORTED;
+ }
+ if (code == MEM_REF || code == ARRAY_REF || code == COMPONENT_REF)
+ return base->type;
+ if (code == POINTER_PLUS_EXPR || code == PLUS_EXPR
+ || code == MINUS_EXPR || code == MULT_EXPR)
+ return base->type <= MR_POLYNOMIAL ? MR_POLYNOMIAL : base->type;
+ return base->type >= MR_INDIRECT ? base->type : MR_INDIRECT;
+}
+
+/* Recursively walk defs of src expression and record used stmts and other mrs.
+ Return a base address candidate if it's found. */
+
+static tree
+analyse_addr_eval (tree src, memref_t* mr)
+{
+ if (TREE_CODE (src) != SSA_NAME)
+ return NULL_TREE;
+ gimple *stmt = SSA_NAME_DEF_STMT (src);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Src_stmt: ");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ if (!is_gimple_assign (stmt))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Is not assign, stop analysis: ");
+ print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS);
+ }
+ mr->type = MR_UNSUPPORTED;
+ mr->stmts.safe_push (stmt);
+ return NULL_TREE;
+ }
+ enum tree_code def_code = gimple_assign_rhs_code (stmt);
+ if (def_code != MEM_REF && def_code != COMPONENT_REF
+ && def_code != ARRAY_REF)
+ mr->stmts.safe_push (stmt);
+ gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt);
+ tree rhs1, rhs2, base;
+ if (dump_file)
+ fprintf (dump_file, "In assign (%s): ", get_tree_code_name (def_code));
+
+ switch (rhs_class)
+ {
+ case GIMPLE_TERNARY_RHS:
+ if (dump_file)
+ fprintf (dump_file, "Unsupported trinary rhs\n");
+ mr->type = MR_UNSUPPORTED;
+ return NULL_TREE;
+ case GIMPLE_UNARY_RHS:
+ case GIMPLE_SINGLE_RHS:
+ rhs1 = gimple_assign_rhs1 (stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "(%s)",
+ get_tree_code_name (TREE_CODE (rhs1)));
+ print_generic_expr (dump_file, rhs1);
+ fprintf (dump_file, "\n");
+ }
+ if (def_code == NOP_EXPR)
+ return analyse_addr_eval (rhs1, mr);
+ else if (def_code == MEM_REF || def_code == COMPONENT_REF
+ || def_code == ARRAY_REF)
+ {
+ memref_t *mr2 = get_memref (stmt, rhs1, false);
+ mr->type = get_memref_type (mr, mr2, def_code);
+ for (memref_set::const_iterator it = mr2->used_mrs.begin ();
+ it != mr2->used_mrs.end (); it++)
+ mr->used_mrs.insert (*it);
+ mr->used_mrs.insert (mr2);
+ return mr2->base;
+ }
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file, "Unsupported unary/single\n");
+ mr->type = MR_UNSUPPORTED;
+ }
+ return NULL_TREE;
+ case GIMPLE_BINARY_RHS:
+ rhs1 = gimple_assign_rhs1 (stmt);
+ rhs2 = gimple_assign_rhs2 (stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "(%s) (%s)",
+ get_tree_code_name (TREE_CODE (rhs1)),
+ get_tree_code_name (TREE_CODE (rhs2)));
+ print_generic_expr (dump_file, rhs1);
+ fprintf (dump_file, " ");
+ print_generic_expr (dump_file, rhs2);
+ fprintf (dump_file, "\n");
+ }
+ base = analyse_addr_eval (rhs1, mr);
+ analyse_addr_eval (rhs2, mr);
+ mr->type = get_memref_type (mr, NULL, def_code);
+ return base;
+ default:
+ gcc_unreachable ();
+ }
+ return NULL_TREE;
+}
+
+static tree
+get_mem_ref_address_ssa_name (tree mem, tree base)
+{
+ gcc_assert (TREE_CODE (mem) == MEM_REF);
+ if (base == NULL_TREE)
+ base = get_base_address (mem);
+ tree base_addr = NULL_TREE;
+ if (TREE_CODE (base) == MEM_REF)
+ base_addr = TREE_OPERAND (base, 0);
+ if (base_addr != NULL_TREE && TREE_CODE (base_addr) == SSA_NAME)
+ return base_addr;
+ return NULL_TREE;
+}
+
+static void
+analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr)
+{
+ tree base = get_base_address (mem);
+ if (dump_file)
+ fprintf (dump_file, "Codes: base = %s, mem = %s\n",
+ base ? get_tree_code_name (TREE_CODE (base)) : "null",
+ mem ? get_tree_code_name (TREE_CODE (mem)) : "null");
+
+ mr->stmts.safe_push (stmt);
+ mr->base = base;
+ switch (TREE_CODE (mem))
+ {
+ case COMPONENT_REF:
+ if (mr->is_store)
+ analyse_incremental (stmt, mr);
+ mr->type = MR_SIMPLE;
+ mr->offset = TREE_OPERAND (mem, 1);
+ return;
+ case ARRAY_REF:
+ analyse_addr_eval (TREE_OPERAND (mem, 1), mr);
+ return;
+ case MEM_REF:
+ {
+ tree base_addr = get_mem_ref_address_ssa_name (mem, base);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Base addr (%s): ",
+ base_addr ? get_tree_code_name (TREE_CODE (base_addr))
+ : "null");
+ if (base_addr)
+ print_generic_expr (dump_file, base_addr);
+ fprintf (dump_file, "\n");
+ }
+ if (base_addr)
+ {
+ mr->base = analyse_addr_eval (base_addr, mr);
+ return;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ mr->type = MR_UNSUPPORTED;
+ mr->base = NULL_TREE;
+}
+
+static void
+analyse_stmt (gimple *stmt)
+{
+ bool is_store;
+ tree *mem = simple_mem_ref_in_stmt (stmt, &is_store);
+ if (!mem)
+ return;
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n%s: mr is found in stmt (%s): ",
+ function_name (cfun), is_store ? "store" : "load");
+ print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS);
+ }
+ memref_t *mr = get_memref (stmt, *mem, is_store);
+ (*fmrs_map)[cfun]->insert (mr);
+ if (dump_file)
+ print_memref (mr);
+}
+
+/* Scan stmts for indirect stores/loads with bases passed as function args. */
+
+static void
+collect_memrefs_for_cgraph_node (struct cgraph_node *n)
+{
+ if (dump_file)
+ fprintf (dump_file, "\nCollect indirect ptr info in %s\n", n->dump_name ());
+ n->get_body ();
+ function *fn = DECL_STRUCT_FUNCTION (n->decl);
+ gcc_assert (fn && n->has_gimple_body_p ());
+
+ push_cfun (fn);
+ basic_block bb;
+ gimple_stmt_iterator si;
+ (*fmrs_map)[fn] = new memref_set;
+ FOR_EACH_BB_FN (bb, fn)
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *stmt = gsi_stmt (si);
+ analyse_stmt (stmt);
+ }
+ pop_cfun ();
+}
+
+/* Walk cgraph nodes and collect memory references info. */
+
+static void
+collect_memory_references ()
+{
+ struct cgraph_node *n;
+ /* TODO: collect info only for loops and functions in loops. */
+ FOR_EACH_DEFINED_FUNCTION (n)
+ if (nl_map->count (n) != 0 && n->has_gimple_body_p ())
+ collect_memrefs_for_cgraph_node (n);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n\nDump mem references:\n");
+ FOR_EACH_DEFINED_FUNCTION (n)
+ if (nl_map->count (n) != 0 && n->has_gimple_body_p ())
+ {
+ function *fn = DECL_STRUCT_FUNCTION (n->decl);
+ fprintf (dump_file, "\nIn function %s (%s):\n", function_name (fn),
+ nl_map->count (n) != 0 ? "in loop" : "");
+ for (memref_set::const_iterator it = (*fmrs_map)[fn]->begin ();
+ it != (*fmrs_map)[fn]->end (); it++)
+ print_memref (*it);
+ }
+ }
+}
+
+/* Analysis of loops. */
+
+memref_set *current_incr_mrs;
+memref_set *current_indirect_mrs;
+
+static void
+collect_memref (memref_t *mr, class loop *loop, bool check_loop)
+{
+ gimple *stmt = mr->stmts[0];
+ gcc_assert (stmt);
+ if (check_loop && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+ return;
+
+ /* TODO: Improve base invariant analysis for memrefs which are not local
+ (located in called functions). */
+ bool is_base_inv = false;
+ if (mr->base)
+ is_base_inv = expr_invariant_in_loop_p (loop, mr->base);
+
+ if (dump_file && (mr->type == MR_INDIRECT || mr->is_incr))
+ {
+ fprintf (dump_file, "%s MR (%d): ", mr->is_incr ? "INCR" : "INDIRECT",
+ mr->mr_id);
+ print_generic_expr (dump_file, mr->mem);
+ fprintf (dump_file, "\twith base: ");
+ if (mr->base)
+ print_generic_expr (dump_file, mr->base);
+ else
+ fprintf (dump_file, "null");
+ fprintf (dump_file, " (is_inv=%d)\n", is_base_inv);
+ }
+
+ if (!is_base_inv)
+ return;
+ if (mr->type == MR_INDIRECT)
+ current_indirect_mrs->insert (mr);
+ if (mr->is_incr)
+ current_incr_mrs->insert (mr);
+}
+
+static void
+analyse_callable_function (struct cgraph_node *n, class loop *loop)
+{
+ if (dump_file)
+ fprintf (dump_file, "Callable (%s):\n", n->dump_name ());
+
+ function *fn = DECL_STRUCT_FUNCTION (n->decl);
+ if (fmrs_map->count (fn))
+ for (memref_set::const_iterator it = (*fmrs_map)[fn]->begin ();
+ it != (*fmrs_map)[fn]->end (); it++)
+ collect_memref (*it, loop, false);
+}
+
+static void
+insert_node_with_callable_nodes (node_set &s, struct cgraph_node *n)
+{
+ s.insert (n);
+ if (nn_map->count (n) == 0)
+ return;
+ node_set *set = (*nn_map)[n];
+ for (node_set::const_iterator it = set->begin (); it != set->end (); it++)
+ s.insert ((*it));
+}
+
+static bool
+compatible_memrefs_p (memref_t *mr1, memref_t *mr2, bool &compatible_offset)
+{
+ if (!mr1->base || !mr2->base || !mr2->offset)
+ return false;
+ tree base_type1 = TYPE_MAIN_VARIANT (TREE_TYPE (mr1->base));
+ tree base_type2 = TYPE_MAIN_VARIANT (TREE_TYPE (mr2->base));
+ if (base_type1 != base_type2)
+ return false;
+ if (mr1->offset && mr1->offset == mr2->offset)
+ compatible_offset = true;
+ else
+ compatible_offset = false;
+ return true;
+}
+
+static void
+compare_memrefs (memref_t* mr, memref_t* mr2)
+{
+ /* TODO: improve analysis of memrefs from different functions: take into
+ account data flow and context. */
+ bool compatible_offset = false;
+ if (!compatible_memrefs_p (mr, mr2, compatible_offset))
+ return;
+ if (!compatible_offset)
+ {
+ for (memref_set::const_iterator it = mr->used_mrs.begin ();
+ it != mr->used_mrs.end (); it++)
+ if ((*it)->offset && (*it)->offset == mr2->offset)
+ {
+ compatible_offset = true;
+ if (dump_file)
+ fprintf (dump_file, "Used MR (%d) and INC MR have "
+ "the same offset\n", (*it)->mr_id);
+ break;
+ }
+ }
+ if (!compatible_offset)
+ return;
+ if (dump_file)
+ {
+ fprintf (dump_file, "MR (%d) is optimization candidate with offset: ",
+ mr->mr_id);
+ print_generic_expr (dump_file, mr2->offset);
+ fprintf (dump_file, "\n");
+ }
+
+ if (!mr_candidate_map->count (mr))
+ {
+ (*mr_candidate_map)[mr] = mr2;
+ return;
+ }
+ /* TODO: support analysis with incrementation of different fields. */
+ if ((*mr_candidate_map)[mr]->offset != mr2->offset)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "It conflicts with previously found MR (%d) "
+ "with offset ", (*mr_candidate_map)[mr]->mr_id);
+ if ((*mr_candidate_map)[mr] != NULL)
+ print_generic_expr (dump_file, (*mr_candidate_map)[mr]->offset);
+ fprintf (dump_file, ", disable the optimization\n");
+ }
+ (*mr_candidate_map)[mr] = NULL;
+ }
+}
+
+/* In the given loop and all functions called from the loop, collect
+ indirect/incremental memrefs with invariant base address and inductive
+ offset. */
+
+static void
+collect_memrefs_for_loop (class loop *loop, struct cgraph_node *n,
+ function *fn)
+{
+ current_incr_mrs = new memref_set;
+ current_indirect_mrs = new memref_set;
+
+ if (dump_file)
+ fprintf (dump_file, "Loop %d\n", loop->num);
+ if (fmrs_map->count (fn))
+ for (memref_set::const_iterator it = (*fmrs_map)[fn]->begin ();
+ it != (*fmrs_map)[fn]->end (); it++)
+ collect_memref (*it, loop, true);
+
+ /* Collect vector of functions called in the loop. */
+ node_set set;
+ struct cgraph_edge *e;
+ struct cgraph_node *n2;
+ for (e = n->callees; e; e = e->next_callee)
+ {
+ gcall *stmt = e->call_stmt;
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+ continue;
+ insert_node_with_callable_nodes (set, e->callee);
+ }
+ for (e = n->indirect_calls; e; e = e->next_callee)
+ {
+ gcall *stmt = e->call_stmt;
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))
+ || !e->indirect_info->targets)
+ continue;
+ for (unsigned i = 0; e->indirect_info->targets->iterate (i, &n2); ++i)
+ insert_node_with_callable_nodes (set, n2);
+ }
+ if (set.empty ())
+ return;
+ if (dump_file)
+ fprintf (dump_file, "Go inside all callables of %s\n", n->dump_name ());
+
+ for (node_set::const_iterator it = set.begin (); it != set.end (); it++)
+ analyse_callable_function (*it, loop);
+
+ if (!current_incr_mrs->empty () && !current_indirect_mrs->empty ())
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Loop has both incr and indirect memrefs\n"
+ "Incr: ");
+ for (memref_set::const_iterator it = current_incr_mrs->begin ();
+ it != current_incr_mrs->end (); it++)
+ fprintf (dump_file, "%d ", (*it)->mr_id);
+ fprintf (dump_file, "\nIndirect: ");
+ for (memref_set::const_iterator it = current_indirect_mrs->begin ();
+ it != current_indirect_mrs->end (); it++)
+ fprintf (dump_file, "%d ", (*it)->mr_id);
+ fprintf (dump_file, "\n");
+ }
+ /* Check if indirect memref has a base address similar to one of
+ incremental memref. */
+ for (memref_set::const_iterator it = current_indirect_mrs->begin ();
+ it != current_indirect_mrs->end (); it++)
+ for (memref_set::const_iterator it2 = current_incr_mrs->begin ();
+ it2 != current_incr_mrs->end (); it2++)
+ compare_memrefs (*it, *it2);
+ }
+
+ delete current_incr_mrs;
+ delete current_indirect_mrs;
+}
+
+static void
+analyse_loops_in_cgraph_node (struct cgraph_node *n)
+{
+ if (dump_file)
+ fprintf (dump_file, "\nAnalyse loops in %s\n", n->dump_name ());
+
+ n->get_body ();
+ function *fn = DECL_STRUCT_FUNCTION (n->decl);
+ gcc_assert (fn && n->has_gimple_body_p ());
+
+ push_cfun (fn);
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (LOOPS_NORMAL);
+
+ for (auto loop : loops_list (cfun, 0))
+ {
+ class loop *outer = loop_outer (loop);
+ /* Walk only outermost loops. */
+ if (outer->num != 0)
+ continue;
+ collect_memrefs_for_loop (loop, n, fn);
+ }
+
+ free_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_finalize ();
+ pop_cfun ();
+}
+
+static void
+analyse_loops ()
+{
+ if (dump_file)
+ fprintf (dump_file, "\n\nLoops: procesing functions\n");
+ cgraph_node *n;
+ FOR_EACH_DEFINED_FUNCTION (n)
+ {
+ if (!can_be_optimized (n))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Skip the function\n");
+ continue;
+ }
+ analyse_loops_in_cgraph_node (n);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\n\nList of optimization candidates:\n");
+
+ FOR_EACH_DEFINED_FUNCTION (n)
+ {
+ function *fn = DECL_STRUCT_FUNCTION (n->decl);
+ if (!can_be_optimized (n) || !fmrs_map->count (fn))
+ continue;
+ for (memref_map::iterator it = mr_candidate_map->begin ();
+ it != mr_candidate_map->end (); ++it)
+ {
+ memref_t *mr = it->first, *mr2 = it->second;
+ if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr))
+ continue;
+ if (!optimize_mrs_map->count (fn))
+ (*optimize_mrs_map)[fn] = new memref_set;
+ (*optimize_mrs_map)[fn]->insert (mr);
+ }
+ if (dump_file && optimize_mrs_map->count (fn))
+ {
+ fprintf (dump_file, "Function %s\n", n->dump_name ());
+ for (memref_set::const_iterator it
+ = (*optimize_mrs_map)[fn]->begin ();
+ it != (*optimize_mrs_map)[fn]->end (); it++)
+ {
+ memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr];
+ fprintf (dump_file, "MRs %d,%d with incremental offset ",
+ mr->mr_id, mr2->mr_id);
+ print_generic_expr (dump_file, mr2->offset);
+ fprintf (dump_file, "\n");
+ }
+ }
+ }
+}
+
+/* Reduce the set filtering out memrefs with the same memory references,
+ return the result vector of memrefs. */
+
+static void
+reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+{
+ for (memref_set::const_iterator it = set->begin ();
+ it != set->end (); it++)
+ {
+ memref_t *mr1 = *it;
+ if (!vec.length ())
+ vec.safe_push (mr1);
+ else
+ {
+ bool inserted = false;
+ for (unsigned int i = 0; i < vec.length (); i++)
+ {
+ /* mr2 is less than current mr1. */
+ memref_t *mr2 = vec[i];
+ if (operand_equal_p (mr1->mem, mr2->mem))
+ {
+ if (dump_file)
+ fprintf (dump_file, "The same mems in MRs %d and %d\n",
+ mr1->mr_id, mr2->mr_id);
+ /* TODO: maybe build new memref which include stmts of both
+ mr1 and mr2. */
+ if ((mr1->is_store && !mr2->is_store)
+ || mr1->stmts.length () > mr2->stmts.length ())
+ {
+ inserted = true;
+ vec[i] = mr1;
+ }
+ }
+ }
+ if (!inserted)
+ vec.safe_push (mr1);
+ }
+ }
+ if (dump_file)
+ {
+ fprintf (dump_file, "MRs (%d) after filtering: ", vec.length ());
+ for (unsigned int i = 0; i < vec.length (); i++)
+ fprintf (dump_file, "%d ", vec[i]->mr_id);
+ fprintf (dump_file, "\n");
+ }
+}
+
+static void
+find_nearest_common_dominator (memref_t *mr, basic_block &dom)
+{
+ for (unsigned int i = 0; i < mr->stmts.length (); i++)
+ {
+ basic_block bb = gimple_bb (mr->stmts[i]);
+ gcc_assert (bb);
+ if (dom == bb)
+ continue;
+ if (dom)
+ dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb);
+ else
+ dom = bb;
+ }
+}
+
+/* Return true if DECL is a parameter or a SSA_NAME for a parameter.
+ TODO: from gcc/tree-inline.c, maybe make it global. */
+
+static bool
+is_parm (tree decl)
+{
+ if (TREE_CODE (decl) == SSA_NAME)
+ {
+ decl = SSA_NAME_VAR (decl);
+ if (!decl)
+ return false;
+ }
+
+ return (TREE_CODE (decl) == PARM_DECL);
+}
+
+/* TODO: the following functions are inspired by remap in gcc/tree-inline.c,
+ maybe we can share some functionality. */
+
+static tree
+remap_name (tree name, gimple *stmt, bool is_lhs)
+{
+ tree new_tree = NULL_TREE;
+ if (decl_map->count (name))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Find map: ");
+ print_generic_expr (dump_file, name);
+ fprintf (dump_file, " ");
+ print_generic_expr (dump_file, (*decl_map)[name]);
+ fprintf (dump_file, "\n");
+ }
+ return unshare_expr ((*decl_map)[name]);
+ }
+ if (!is_lhs)
+ return name;
+ if (TREE_CODE (name) == SSA_NAME)
+ {
+ /* Remap anonymous SSA names or SSA names of anonymous decls. */
+ tree var = SSA_NAME_VAR (name);
+ if (!var
+ || (!SSA_NAME_IS_DEFAULT_DEF (name)
+ && VAR_P (var) && !VAR_DECL_IS_VIRTUAL_OPERAND (var)
+ && DECL_ARTIFICIAL (var) && DECL_IGNORED_P (var)
+ && !DECL_NAME (var)))
+ {
+ new_tree = make_ssa_name (TREE_TYPE (name), stmt);
+ if (!var && SSA_NAME_IDENTIFIER (name))
+ SET_SSA_NAME_VAR_OR_IDENTIFIER (new_tree,
+ SSA_NAME_IDENTIFIER (name));
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree)
+ = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name);
+ /* So can range-info. */
+ if (!POINTER_TYPE_P (TREE_TYPE (name))
+ && SSA_NAME_RANGE_INFO (name))
+ duplicate_ssa_name_range_info (new_tree,
+ SSA_NAME_RANGE_TYPE (name),
+ SSA_NAME_RANGE_INFO (name));
+ /* TODO: maybe correct the insertion. */
+ (*decl_map)[name] = new_tree;
+ if (dump_file)
+ {
+ fprintf (dump_file, "New map (no var): ");
+ print_generic_expr (dump_file, name);
+ fprintf (dump_file, " ");
+ print_generic_expr (dump_file, new_tree);
+ fprintf (dump_file, "\n");
+ }
+ return new_tree;
+ }
+ /* TODO: maybe remap_name or do the same as before for SSA_NAME_VAR. */
+ new_tree = make_ssa_name (TREE_TYPE (name), stmt);
+ (*decl_map)[name] = new_tree;
+ if (dump_file)
+ {
+ fprintf (dump_file, "New map: ");
+ print_generic_expr (dump_file, name);
+ fprintf (dump_file, " ");
+ print_generic_expr (dump_file, new_tree);
+ fprintf (dump_file, "\n");
+ }
+ }
+ else if (VAR_P (name) || TREE_CODE (name) == PARM_DECL)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "VAR/PARM: ");
+ print_generic_expr (dump_file, name);
+ fprintf (dump_file, "\n");
+ }
+ return name;
+ }
+ else
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Unsupported: ");
+ print_generic_expr (dump_file, name);
+ fprintf (dump_file, "\n");
+ }
+ //gcc_unreachable ();
+ return name;
+ }
+ return new_tree;
+}
+
+/* Passed to walk_tree. Copies the node pointed to, if appropriate. */
+
+static tree
+ipa_copy_tree_r (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
+{
+ enum tree_code code = TREE_CODE (*tp);
+ enum tree_code_class cl = TREE_CODE_CLASS (code);
+
+ /* We make copies of most nodes. */
+ if (IS_EXPR_CODE_CLASS (cl)
+ || code == TREE_LIST
+ || code == TREE_VEC
+ || code == TYPE_DECL
+ || code == OMP_CLAUSE)
+ {
+ /* Because the chain gets clobbered when we make a copy, we save it
+ here. */
+ tree chain = NULL_TREE, new_tree;
+
+ if (CODE_CONTAINS_STRUCT (code, TS_COMMON))
+ chain = TREE_CHAIN (*tp);
+
+ /* Copy the node. */
+ new_tree = copy_node (*tp);
+
+ *tp = new_tree;
+
+ /* Now, restore the chain, if appropriate. That will cause
+ walk_tree to walk into the chain as well. */
+ if (code == PARM_DECL
+ || code == TREE_LIST
+ || code == OMP_CLAUSE)
+ TREE_CHAIN (*tp) = chain;
+
+ /* For now, we don't update BLOCKs when we make copies. So, we
+ have to nullify all BIND_EXPRs. */
+ if (TREE_CODE (*tp) == BIND_EXPR)
+ BIND_EXPR_BLOCK (*tp) = NULL_TREE;
+ }
+ else if (code == CONSTRUCTOR || code == STATEMENT_LIST)
+ gcc_unreachable ();
+ else if (TREE_CODE_CLASS (code) == tcc_type
+ || TREE_CODE_CLASS (code) == tcc_declaration
+ || TREE_CODE_CLASS (code) == tcc_constant)
+ *walk_subtrees = 0;
+ return NULL_TREE;
+}
+
+/* Remap the GIMPLE operand pointed to by *TP. DATA is really a
+ 'struct walk_stmt_info *'. DATA->INFO is a 'gimple *'.
+ WALK_SUBTREES is used to indicate walk_gimple_op whether to keep
+ recursing into the children nodes of *TP. */
+
+static tree
+remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
+{
+ struct walk_stmt_info *wi_p = (struct walk_stmt_info *) data;
+ gimple *stmt = (gimple *) wi_p->info;
+
+ /* For recursive invocations this is no longer the LHS itself. */
+ bool is_lhs = wi_p->is_lhs;
+ wi_p->is_lhs = false;
+
+ if (TREE_CODE (*tp) == SSA_NAME)
+ {
+ *tp = remap_name (*tp, stmt, is_lhs);
+ *walk_subtrees = 0;
+ if (is_lhs)
+ SSA_NAME_DEF_STMT (*tp) = wi_p->stmt;
+ return NULL;
+ }
+ else if (auto_var_in_fn_p (*tp, cfun->decl))
+ {
+ /* Local variables and labels need to be replaced by equivalent
+ variables. We don't want to copy static variables; there's
+ only one of those, no matter how many times we inline the
+ containing function. Similarly for globals from an outer
+ function. */
+ tree new_decl;
+
+ /* Remap the declaration. */
+ new_decl = remap_name (*tp, stmt, is_lhs);
+ gcc_assert (new_decl);
+ /* Replace this variable with the copy. */
+ STRIP_TYPE_NOPS (new_decl);
+ /* ??? The C++ frontend uses void * pointer zero to initialize
+ any other type. This confuses the middle-end type verification.
+ As cloned bodies do not go through gimplification again the fixup
+ there doesn't trigger. */
+ if (TREE_CODE (new_decl) == INTEGER_CST
+ && !useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (new_decl)))
+ new_decl = fold_convert (TREE_TYPE (*tp), new_decl);
+ *tp = new_decl;
+ *walk_subtrees = 0;
+ }
+ else if (TREE_CODE (*tp) == STATEMENT_LIST || TREE_CODE (*tp) == SAVE_EXPR)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Unexpected tree: ");
+ print_generic_expr (dump_file, *tp);
+ fprintf (dump_file, "\n");
+ }
+ gcc_unreachable ();
+ }
+ else
+ {
+ /* Otherwise, just copy the node. Note that copy_tree_r already
+ knows not to copy VAR_DECLs, etc., so this is safe. */
+
+ if (TREE_CODE (*tp) == MEM_REF)
+ {
+ /* We need to re-canonicalize MEM_REFs from inline substitutions
+ that can happen when a pointer argument is an ADDR_EXPR.
+ Recurse here manually to allow that. */
+ tree ptr = TREE_OPERAND (*tp, 0);
+ tree type = TREE_TYPE (*tp);
+ tree old = *tp;
+ walk_tree (&ptr, remap_gimple_op_r, data, NULL);
+ *tp = fold_build2 (MEM_REF, type, ptr, TREE_OPERAND (*tp, 1));
+ TREE_THIS_VOLATILE (*tp) = TREE_THIS_VOLATILE (old);
+ TREE_SIDE_EFFECTS (*tp) = TREE_SIDE_EFFECTS (old);
+ TREE_NO_WARNING (*tp) = TREE_NO_WARNING (old);
+ /* TODO: maybe support this case. */
+ gcc_assert (MR_DEPENDENCE_CLIQUE (old) == 0);
+ /* We cannot propagate the TREE_THIS_NOTRAP flag if we have
+ remapped a parameter as the property might be valid only
+ for the parameter itself. */
+ if (TREE_THIS_NOTRAP (old) && (!is_parm (TREE_OPERAND (old, 0))))
+ TREE_THIS_NOTRAP (*tp) = 1;
+ REF_REVERSE_STORAGE_ORDER (*tp) = REF_REVERSE_STORAGE_ORDER (old);
+ *walk_subtrees = 0;
+ return NULL;
+ }
+
+ /* Here is the "usual case". Copy this tree node, and then
+ tweak some special cases. */
+ ipa_copy_tree_r (tp, walk_subtrees, NULL);
+ gcc_assert (!(TREE_CODE (*tp) == TARGET_EXPR && TREE_OPERAND (*tp, 3)));
+ if (TREE_CODE (*tp) == ADDR_EXPR)
+ {
+ /* TODO: If this used to be invariant, but is not any longer,
+ then regimplification is probably needed. */
+ walk_tree (&TREE_OPERAND (*tp, 0), remap_gimple_op_r, data, NULL);
+ recompute_tree_invariant_for_addr_expr (*tp);
+ *walk_subtrees = 0;
+ }
+ }
+ /* TODO: maybe we need to update TREE_BLOCK (*tp). */
+
+ /* Keep iterating. */
+ return NULL_TREE;
+}
+
+static void
+create_cgraph_edge (cgraph_node *n, gimple *stmt)
+{
+ gcall *call_stmt = dyn_cast <gcall *> (stmt);
+ basic_block bb = gimple_bb (stmt);
+ tree decl = gimple_call_fndecl (call_stmt);
+ if (!decl)
+ return;
+ struct cgraph_edge *e = n->create_edge (cgraph_node::get_create (decl),
+ call_stmt, bb->count);
+ /* TODO: maybe we need to store ipa_call_summary result. */
+ ipa_call_summaries->get_create (e);
+}
+
+/* Insert prefetch intrinsics in this function, return nonzero on success. */
+
+static int
+optimize_function (cgraph_node *n, function *fn)
+{
+ /* In a given function, optimize only indirect memrefs with
+ the same incremental memref.
+ TODO: implement the optimization for other cases. */
+ bool different_incrementals = false;
+ memref_t *first_mr = NULL;
+ memref_set used_mrs;
+ for (memref_set::const_iterator it = (*optimize_mrs_map)[fn]->begin ();
+ it != (*optimize_mrs_map)[fn]->end (); it++)
+ {
+ memref_t *mr = *it;
+ if (!first_mr)
+ first_mr = mr;
+ else if ((*mr_candidate_map)[first_mr] != (*mr_candidate_map)[mr])
+ {
+ different_incrementals = true;
+ break;
+ }
+ for (memref_set::const_iterator it2 = mr->used_mrs.begin ();
+ it2 != mr->used_mrs.end (); it2++)
+ used_mrs.insert (*it2);
+ }
+ if (different_incrementals)
+ {
+ if (dump_file)
+ fprintf (dump_file, "It contains memrefs with different "
+ "incrementals. Skip the case.\n");
+ return 0;
+ }
+ memref_t *inc_mr = (*mr_candidate_map)[first_mr];
+ if (!inc_mr->stmts[0] || !gimple_assign_single_p (inc_mr->stmts[0]))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Incremental MR with unexpected stmt. "
+ "Skip the case.\n");
+ return 0;
+ }
+ if (dump_file && !used_mrs.empty ())
+ print_mrs_ids (used_mrs, "Common list of used mrs:\n");
+
+ /* Find a memref in used mrs which corresponds to the found incremental
+ memref. */
+ memref_t *comp_mr = NULL;
+ for (memref_set::const_iterator it = used_mrs.begin ();
+ it != used_mrs.end (); it++)
+ {
+ bool c_offset;
+ if ((*it)->type != MR_SIMPLE || inc_mr->type != MR_SIMPLE
+ || !compatible_memrefs_p (*it, inc_mr, c_offset))
+ continue;
+ if (c_offset)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Found compatible used MR (%d) and "
+ "incr MR (%d)\n", (*it)->mr_id, inc_mr->mr_id);
+ comp_mr = (*it);
+ }
+ }
+ if (!comp_mr || !comp_mr->stmts[0]
+ || !gimple_assign_single_p (comp_mr->stmts[0]))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Compatible MR in this function is not found "
+ " or it has unexpected stmt. Skip the case.\n");
+ return 0;
+ }
+
+ /* Filter out memrefs with the same memory references.
+ TODO: maybe do the same with used mrs. */
+ vec<memref_t *> vmrs = vNULL;
+ reduce_memref_set ((*optimize_mrs_map)[fn], vmrs);
+
+ /* Find insertion place. Create new BB. */
+ /* TODO: maybe it is useful to process also used_mrs. */
+ basic_block dom_bb = NULL;
+ for (unsigned int i = 0; i < vmrs.length (); i++)
+ find_nearest_common_dominator (vmrs[i], dom_bb);
+
+ if (!dom_bb)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Dominator bb for MRs is not found. "
+ "Skip the case.\n");
+ return 0;
+ }
+ else if (dump_file)
+ fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
+
+ split_block (dom_bb, (gimple *) NULL);
+ gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
+
+ /* Create new inc var. Insert new_var = old_var + step * factor. */
+ decl_map = new tree_map;
+ gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0]));
+ tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]);
+ gimple_seq stmts = NULL;
+ tree var_type = TREE_TYPE (inc_var);
+ enum tree_code inc_code;
+ if (TREE_CODE (var_type) == POINTER_TYPE)
+ inc_code = POINTER_PLUS_EXPR;
+ else
+ inc_code = PLUS_EXPR;
+ tree step = inc_mr->step;
+ unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor;
+ tree dist = build_int_cst (TREE_TYPE (step), dist_val);
+ tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
+ (*decl_map)[inc_var] = new_inc_var;
+
+ /* Create other new vars. Insert new stmts. */
+ struct walk_stmt_info wi;
+ stmt_set processed_stmts;
+ memref_tree_map mr_new_trees;
+ for (memref_set::const_iterator it = used_mrs.begin ();
+ it != used_mrs.end (); it++)
+ {
+ memref_t *mr = *it;
+ gimple *last_stmt = NULL;
+ if (mr == comp_mr)
+ continue;
+ for (int i = mr->stmts.length () - 1; i >= 0 ; i--)
+ {
+ if (processed_stmts.count (mr->stmts[i]))
+ continue;
+ processed_stmts.insert (mr->stmts[i]);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
+ i, mr->mr_id);
+ print_gimple_stmt (dump_file, mr->stmts[i], 0);
+ }
+ /* Create a new copy of STMT and duplicate STMT's virtual
+ operands. */
+ gimple *copy = gimple_copy (mr->stmts[i]);
+ gcc_checking_assert (!is_gimple_debug (copy));
+
+ /* Remap all the operands in COPY. */
+ memset (&wi, 0, sizeof (wi));
+ last_stmt = copy;
+ wi.info = copy;
+ walk_gimple_op (copy, remap_gimple_op_r, &wi);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Stmt %d after remap:\n",i);
+ print_gimple_stmt (dump_file, copy, 0);
+ }
+ gimple_seq_add_stmt (&stmts, copy);
+ }
+ gcc_assert (last_stmt);
+ mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
+ print_generic_expr (dump_file, gimple_assign_lhs (last_stmt));
+ fprintf (dump_file, "\n");
+ }
+ }
+ /* On new load check page fault. */
+ /* Insert prefetch instructions. */
+ if (dump_file)
+ fprintf (dump_file, "Evaluate addresses and insert prefetch insn.\n");
+
+ vec<gimple *> pcalls = vNULL;
+ tree local;
+ switch (param_ipa_prefetch_locality)
+ {
+ case 0:
+ local = integer_zero_node;
+ break;
+ case 1:
+ local = integer_one_node;
+ break;
+ case 2:
+ local = build_int_cst (integer_type_node, 2);
+ break;
+ default:
+ case 3:
+ local = integer_three_node;
+ break;
+ }
+ for (unsigned int j = 0; j < vmrs.length (); j++)
+ {
+ memref_t *mr = vmrs[j];
+ /* Don't need to copy the last stmt, since we insert prefetch insn
+ instead of it. */
+ for (int i = mr->stmts.length () - 1; i >= 1 ; i--)
+ {
+ if (processed_stmts.count (mr->stmts[i]))
+ continue;
+ processed_stmts.insert (mr->stmts[i]);
+
+ gimple *copy = gimple_copy (mr->stmts[i]);
+ gcc_checking_assert (!is_gimple_debug (copy));
+
+ /* Remap all the operands in COPY. */
+ memset (&wi, 0, sizeof (wi));
+ wi.info = copy;
+ walk_gimple_op (copy, remap_gimple_op_r, &wi);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Stmt %d after remap:\n",i);
+ print_gimple_stmt (dump_file, copy, 0);
+ }
+ gimple_seq_add_stmt (&stmts, copy);
+ }
+ gimple *last_stmt = mr->stmts[0];
+ gcc_assert (last_stmt);
+ mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
+ tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
+ tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
+ if (decl_map->count (addr))
+ addr = (*decl_map)[addr];
+ last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
+ 3, addr, write_p, local);
+ pcalls.safe_push (last_stmt);
+ gimple_seq_add_stmt (&stmts, last_stmt);
+ }
+
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ delete decl_map;
+
+ /* Modify cgraph inserting calls to prefetch intrinsics. */
+ for (unsigned i = 0; i < pcalls.length (); i++)
+ create_cgraph_edge (n, pcalls[i]);
+ ipa_update_overall_fn_summary (n);
+
+ return 1;
+}
+
+static int
+insert_prefetch ()
+{
+ int res = 0;
+ cgraph_node *n;
+ FOR_EACH_DEFINED_FUNCTION (n)
+ {
+ function *fn = DECL_STRUCT_FUNCTION (n->decl);
+ if (!optimize_mrs_map->count (fn))
+ continue;
+ if (dump_file)
+ fprintf (dump_file, "Optimize function %s\n", n->dump_name ());
+ push_cfun (DECL_STRUCT_FUNCTION (n->decl));
+ calculate_dominance_info (CDI_DOMINATORS);
+ res |= optimize_function (n, fn);
+ free_dominance_info (CDI_DOMINATORS);
+ pop_cfun ();
+ }
+ return res;
+}
+
+static unsigned int
+ipa_prefetch (void)
+{
+ if (!targetm.have_prefetch ())
+ {
+ if (dump_file)
+ fprintf (dump_file, "Prefetch is not supported by the target.\n");
+ return 0;
+ }
+
+ unsigned int ret = 0;
+ el_map = new edge_in_loop;
+ nl_map = new node_in_loop;
+ icn_map = new node_to_iedge_map;
+ nn_map = new node_to_node_map;
+ tm_map = new tree_memref_map;
+ fmrs_map = new funct_mrs_map;
+ mr_candidate_map = new memref_map;
+ optimize_mrs_map = new funct_mrs_map;
+
+ max_mr_id = 0;
+ /* TODO: check if we really need this init. */
+ if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH))
+ {
+ tree type = build_function_type_list (void_type_node,
+ const_ptr_type_node, NULL_TREE);
+ tree decl = add_builtin_function ("__builtin_prefetch", type,
+ BUILT_IN_PREFETCH, BUILT_IN_NORMAL,
+ NULL, NULL_TREE);
+ DECL_IS_NOVOPS (decl) = true;
+ set_builtin_decl (BUILT_IN_PREFETCH, decl, false);
+ }
+
+ analyse_cgraph ();
+ prepare_indirect_call_info ();
+ propagate_loop_info_in_cgraph ();
+ collect_memory_references ();
+ analyse_loops ();
+
+ /* TODO: implement some specific heuristics. */
+ if (!optimize_mrs_map->empty ())
+ ret = insert_prefetch ();
+
+ delete el_map;
+ delete nl_map;
+ for (node_to_iedge_map::iterator it = icn_map->begin ();
+ it != icn_map->end (); ++it)
+ delete it->second;
+ delete icn_map;
+ for (node_to_node_map::iterator it = nn_map->begin ();
+ it != nn_map->end (); ++it)
+ delete it->second;
+ delete nn_map;
+ for (tree_memref_map::iterator it = tm_map->begin ();
+ it != tm_map->end (); ++it)
+ delete it->second;
+ delete tm_map;
+ for (funct_mrs_map::iterator it = fmrs_map->begin ();
+ it != fmrs_map->end (); ++it)
+ delete it->second;
+ delete fmrs_map;
+ delete mr_candidate_map;
+ delete optimize_mrs_map;
+
+ /* TODO: maybe add other todos. */
+ return ret | TODO_verify_all;
+}
+
+const pass_data pass_data_ipa_prefetch =
+{
+ SIMPLE_IPA_PASS, // type
+ "ipa_prefetch", // name
+ OPTGROUP_NONE, // optinfo_flags
+ TV_IPA_PREFETCH, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0, // todo_flags_finish
+};
+
+class pass_ipa_prefetch : public simple_ipa_opt_pass
+{
+public:
+ pass_ipa_prefetch (gcc::context *ctxt)
+ : simple_ipa_opt_pass (pass_data_ipa_prefetch, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *);
+ virtual unsigned int execute (function *)
+ {
+ return ipa_prefetch ();
+ }
+}; // class pass_ipa_prefetch
+
+bool
+pass_ipa_prefetch::gate (function *)
+{
+ return (optimize >= 3
+ && flag_ipa_prefetch
+ /* Don't bother doing anything if the program has errors. */
+ && !seen_error ()
+ && flag_lto_partition == LTO_PARTITION_ONE
+ /* Only enable struct optimizations in lto or whole_program. */
+ && (in_lto_p || flag_whole_program));
+}
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_ipa_prefetch (gcc::context *ctxt)
+{
+ return new pass_ipa_prefetch (ctxt);
+}
diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 261a72085..5355cf2f4 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -3033,6 +3033,14 @@ process_edge_to_unknown_caller (cgraph_edge *cs)
gcc_checking_assert (from_ifs);
isra_call_summary *csum = call_sums->get (cs);
+ /* TODO: implement better support for call edges inserted after summary
+ collection but before sra wpa invocation. */
+ if (!csum)
+ {
+ csum = call_sums->get_create (cs);
+ csum->m_return_ignored = true;
+ }
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
cs->caller->dump_name ());
diff --git a/gcc/params.opt b/gcc/params.opt
index 7e5c119cf..5c07e3986 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -309,6 +309,14 @@ Maximum pieces that IPA-SRA tracks per formal parameter, as a consequence, also
Common Joined UInteger Var(param_ipa_sra_ptr_growth_factor) Init(2) Param Optimization
Maximum allowed growth of number and total size of new parameters that ipa-sra replaces a pointer to an aggregate with.
+-param=ipa-prefetch-distance-factor=
+Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Optimization
+The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch.
+
+-param=ipa-prefetch-locality=
+Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization
+The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep.
+
-param=ira-loop-reserved-regs=
Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization
The number of registers in each class kept unused by loop invariant motion.
diff --git a/gcc/passes.def b/gcc/passes.def
index b7d4f7b4e..4c1436766 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -158,6 +158,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_ipa_icf);
NEXT_PASS (pass_ipa_devirt);
NEXT_PASS (pass_ipa_icp);
+ NEXT_PASS (pass_ipa_prefetch);
NEXT_PASS (pass_ipa_cp);
NEXT_PASS (pass_ipa_sra);
NEXT_PASS (pass_ipa_cdtor_merge);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 18a9f62cc..810ae20fd 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp")
DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics")
DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
+DEFTIMEVAR (TV_IPA_PREFETCH , "ipa prefetch")
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 1733931c3..63f1192ae 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -529,6 +529,7 @@ extern ipa_opt_pass_d *make_pass_ipa_icp (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化