加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch 4.44 KB
一键复制 编辑 原始数据 按行查看 历史
From 547ab9b3e073ef389e5fd89d961bb1e3e6934ae9 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Wed, 9 Nov 2022 17:04:13 +0800
Subject: [PATCH 07/22] [MULL64 2/3] Fold series of instructions into mul
Merge the low part of series instructions into mul
gcc/
* match.pd: Add simplifcations for low part of mul
* common.opt: Add new option fmerge-mull enable with -O2
* opts.c: default_options_table
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: New test.
---
gcc/common.opt | 4 +++
gcc/match.pd | 27 ++++++++++++++++++++
gcc/opts.cc | 1 +
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
diff --git a/gcc/common.opt b/gcc/common.opt
index 8a0dafc52..e365a48bc 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2126,6 +2126,10 @@ fmerge-debug-strings
Common Var(flag_merge_debug_strings) Init(1)
Attempt to merge identical debug strings across compilation units.
+fmerge-mull
+Common Var(flag_merge_mull) Init(0) Optimization
+Attempt to merge series instructions into mul.
+
fmessage-length=
Common RejectNegative Joined UInteger
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
diff --git a/gcc/match.pd b/gcc/match.pd
index fd0857fc9..2092e6959 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
+ simple IR. The following scenario should be matched:
+ In0Lo = In0(D) & 4294967295;
+ In0Hi = In0(D) >> 32;
+ In1Lo = In1(D) & 4294967295;
+ In1Hi = In1(D) >> 32;
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
+ addc32 = Addc << 32;
+ ResLo = In0Lo * In1Lo + addc32 */
+(simplify
+ (plus:c (mult @4 @5)
+ (lshift
+ (plus:c
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
+ INTEGER_CST@3
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (mult (convert:type @0) (convert:type @1))
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/opts.cc b/gcc/opts.cc
index a97630d1c..eae71ed20 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -647,6 +647,7 @@ static const struct default_options default_options_table[] =
VECT_COST_MODEL_VERY_CHEAP },
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
/* -O2 and above optimizations, but not -Os or -Og. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
new file mode 100644
index 000000000..2a3b74604
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+
+# define BN_BITS4 32
+# define BN_MASK2 (0xffffffffffffffffL)
+# define BN_MASK2l (0xffffffffL)
+# define BN_MASK2h (0xffffffff00000000L)
+# define BN_MASK2h1 (0xffffffff80000000L)
+# define LBITS(a) ((a)&BN_MASK2l)
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
+
+void mul64(unsigned long in0, unsigned long in1,
+ unsigned long &retLo, unsigned long &retHi) {
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
+ unsigned long Addc, addc32, low;
+ al = LBITS(in0);
+ ah = HBITS(in0);
+ bl = LBITS(in1);
+ bh = HBITS(in1);
+ m10 = bh * al;
+ m00 = bl * al;
+ m01 = bl * ah;
+ m11 = bh * ah;
+ Addc = (m10 + m01) & BN_MASK2;
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
+ m11 += HBITS(Addc);
+ addc32 = L2HBITS(Addc);
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
+ retLo = low;
+ retHi = m11;
+}
+
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化