加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch 9.71 KB
一键复制 编辑 原始数据 按行查看 历史
From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Sat, 29 Jul 2023 18:27:10 +0800
Subject: [PATCH 18/22] [ccmp] Add another optimization opportunity for ccmp
instruction
Add flag -fccmp2.
Enables the use of the ccmp instruction by creating a new conflict
relationship for instances where temporary expressions replacement
cannot be effectively created.
---
gcc/ccmp.cc | 33 ++++
gcc/ccmp.h | 1 +
gcc/common.opt | 4 +
gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++
gcc/tree-ssa-coalesce.cc | 197 ++++++++++++++++++++++
5 files changed, 250 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
index 3db0a264e..e34f3bcc6 100644
--- a/gcc/ccmp.cc
+++ b/gcc/ccmp.cc
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgexpand.h"
#include "ccmp.h"
#include "predict.h"
+#include "gimple-iterator.h"
/* Check whether T is a simple boolean variable or a SSA name
set by a comparison operator in the same basic block. */
@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
return false;
}
+/* Check whether bb is a potential conditional compare candidate. */
+bool
+check_ccmp_candidate (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ gimple *bb_last_stmt, *stmt;
+ tree op0, op1;
+
+ gsi = gsi_last_bb (bb);
+ bb_last_stmt = gsi_stmt (gsi);
+
+ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
+ {
+ op0 = gimple_cond_lhs (bb_last_stmt);
+ op1 = gimple_cond_rhs (bb_last_stmt);
+
+ if (TREE_CODE (op0) == SSA_NAME
+ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
+ && TREE_CODE (op1) == INTEGER_CST
+ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
+ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
+ {
+ stmt = SSA_NAME_DEF_STMT (op0);
+ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
+ {
+ return ccmp_candidate_p (stmt);
+ }
+ }
+ }
+ return false;
+}
+
/* Extract the comparison we want to do from the tree. */
void
get_compare_parts (tree t, int *up, rtx_code *rcode,
diff --git a/gcc/ccmp.h b/gcc/ccmp.h
index 1799d5fed..efe3a1c14 100644
--- a/gcc/ccmp.h
+++ b/gcc/ccmp.h
@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see
#define GCC_CCMP_H
extern rtx expand_ccmp_expr (gimple *, machine_mode);
+extern bool check_ccmp_candidate (basic_block bb);
#endif /* GCC_CCMP_H */
diff --git a/gcc/common.opt b/gcc/common.opt
index 4d91ce8cf..0aa516719 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2017,6 +2017,10 @@ fira-verbose=
Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
-fira-verbose=<number> Control IRA's level of diagnostic messages.
+fccmp2
+Common Var(flag_ccmp2) Init(0) Optimization
+Optimize potential ccmp instruction in complex scenarios.
+
fivopts
Common Var(flag_ivopts) Init(1) Optimization
Optimize induction variables on trees.
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
new file mode 100644
index 000000000..b509ba810
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
+
+int func (int a, int b, int c)
+{
+ while(1)
+ {
+ if(a-- == 0 || b >= c)
+ {
+ return 1;
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc
index dccf41ab8..195e06428 100644
--- a/gcc/tree-ssa-coalesce.cc
+++ b/gcc/tree-ssa-coalesce.cc
@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see
#include "explow.h"
#include "tree-dfa.h"
#include "stor-layout.h"
+#include "ccmp.h"
+#include "target.h"
+#include "tree-outof-ssa.h"
/* This set of routines implements a coalesce_list. This is an object which
is used to track pairs of ssa_names which are desirable to coalesce
@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
bitmap_clear (&ptr->live_base_var);
}
+/* Return true if gimple is a copy assignment. */
+
+static inline bool
+gimple_is_assign_copy_p (gimple *gs)
+{
+ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
+ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
+ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
+}
+
+#define MAX_CCMP_CONFLICT_NUM 5
+
+/* Clear high-cost conflict graphs. */
+
+static void
+remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
+{
+ unsigned x = 0;
+ int add_conflict_num = 0;
+ bitmap b;
+ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
+ {
+ if (b)
+ {
+ add_conflict_num++;
+ }
+ }
+ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
+ {
+ conflict_graph->conflicts.release ();
+ }
+}
+
+/* Adding a new conflict graph to the original graph. */
+
+static void
+process_add_graph (live_track *live, basic_block bb,
+ ssa_conflicts *conflict_graph)
+{
+ tree use, def;
+ ssa_op_iter iter;
+ gimple *first_visit_stmt = NULL;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ if (gimple_visited_p (gsi_stmt (gsi)))
+ {
+ first_visit_stmt = gsi_stmt (gsi);
+ break;
+ }
+ }
+ if (!first_visit_stmt)
+ return;
+
+ for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
+ {
+ continue;
+ }
+ if (gimple_is_assign_copy_p (stmt))
+ {
+ live_track_clear_var (live, gimple_assign_rhs1 (stmt));
+ }
+ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
+ {
+ live_track_process_def (live, def, conflict_graph);
+ }
+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+ {
+ live_track_process_use (live, use);
+ }
+ }
+}
+
+/* Build a conflict graph based on ccmp candidate. */
+
+static void
+add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
+ tree_live_info_p liveinfo, var_map map, basic_block bb)
+{
+ live_track *live;
+ tree use, def;
+ ssa_op_iter iter;
+ live = new_live_track (map);
+ live_track_init (live, live_on_exit (liveinfo, bb));
+
+ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
+ gcc_assert (gimple_cond_lhs (last_stmt));
+
+ auto_vec<tree> stack;
+ stack.safe_push (gimple_cond_lhs (last_stmt));
+ while (!stack.is_empty ())
+ {
+ tree op = stack.pop ();
+ gimple *op_stmt = SSA_NAME_DEF_STMT (op);
+ if (!op_stmt || gimple_bb (op_stmt) != bb
+ || !is_gimple_assign (op_stmt)
+ || !ssa_is_replaceable_p (op_stmt))
+ {
+ continue;
+ }
+ if (gimple_is_assign_copy_p (op_stmt))
+ {
+ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
+ }
+ gimple_set_visited (op_stmt, true);
+ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
+ {
+ live_track_process_def (live, def, conflict_graph);
+ }
+ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
+ {
+ stack.safe_push (use);
+ live_track_process_use (live, use);
+ }
+ }
+
+ process_add_graph (live, bb, conflict_graph);
+ delete_live_track (live);
+ remove_high_cost_graph_for_ccmp (conflict_graph);
+}
+
+/* Determine whether the ccmp conflict graph can be added.
+ i.e,
+
+ ;; basic block 3, loop depth 1
+ ;; pred: 2
+ ;; 3
+ # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
+ _7 = b_4 (D) >= c_5 (D);
+ _8 = ivtmp.5_10 == 0;
+ _9 = _7 | _8;
+ ivtmp.5_11 = ivtmp.5_10 - 1;
+ if (_9 != 0)
+ goto <bb 4>; [10.70%]
+ else
+ goto <bb 3>; [89.30%]
+
+ In the above loop, the expression will be replaced:
+
+ _7 replaced by b_4 (D) >= c_5 (D)
+ _8 replaced by ivtmp.5_10 == 0
+
+ If the current case want use the ccmp instruction, then
+
+ _9 can replaced by _7 | _8
+
+ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
+ partitions.
+
+ Now this function can achieve this ability. */
+
+static void
+determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
+ var_map map, ssa_conflicts *graph)
+{
+ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
+ return;
+ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
+ gsi_next (&bsi))
+ {
+ gimple_set_visited (gsi_stmt (bsi), false);
+ }
+ ssa_conflicts *ccmp_conflict_graph;
+ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
+ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
+ unsigned x;
+ bitmap b;
+ if (ccmp_conflict_graph)
+ {
+ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
+ {
+ if (!b)
+ continue;
+ unsigned y = bitmap_first_set_bit (b);
+ if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
+ {
+ ssa_conflicts_add (graph, x, y);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "potential ccmp: add additional "
+ "conflict-ssa : bb[%d] %d:%d\n",
+ bb->index, x, y);
+ }
+ }
+ }
+ }
+ ssa_conflicts_delete (ccmp_conflict_graph);
+}
/* Build a conflict graph based on LIVEINFO. Any partitions which are in the
partition view of the var_map liveinfo is based on get entries in the
@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
live_track_process_use (live, var);
}
+ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
+
/* If result of a PHI is unused, looping over the statements will not
record any conflicts since the def was never live. Since the PHI node
is going to be translated out of SSA form, it will insert a copy.
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化