加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch 3.90 KB
一键复制 编辑 原始数据 按行查看 历史
From 4e536dbb4a08925cea259be13962969efcc0f3c1 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Fri, 11 Nov 2022 11:30:37 +0800
Subject: [PATCH 08/22] [MULL64 3/3] Fold series of instructions into umulh
Merge the high part of series instructions into umulh
gcc/
* match.pd: Add simplifcations for high part of umulh
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
---
gcc/match.pd | 56 ++++++++++++++++++++++++++
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 2092e6959..b7e3588e8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4301,6 +4301,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
+ the if statements. They should be done late because it gives jump threading
+ and few other passes to reduce what is going on. */
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
+ integers to one 128-bit integer. Try to match the high part of mul pattern
+ after the low part of mul pattern is simplified. The following scenario
+ should be matched:
+ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
+ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
+ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
+ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
+ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
+ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
+ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
+ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
+ addc32 = Addc << 32; -- lshift@10 @9 @3
+ ResLo = In0(D) * In1(D); -- mult @0 @1
+ ResHi = ((long unsigned int) (addc32 > ResLo)) +
+ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
+ } */
+(simplify
+ (plus:c
+ (plus:c
+ (convert
+ (gt (lshift@10 @9 @3)
+ (mult:c @0 @1)))
+ (lshift
+ (convert
+ (gt @8 @9))
+ @3))
+ (plus:c@11
+ (rshift
+ (plus:c@9
+ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
+ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
+ @3)
+ (mult:c (rshift@5 SSA_NAME@0 @3)
+ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (with {
+ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
+ tree shift = build_int_cst (integer_type_node, 64);
+ }
+ (convert:type (rshift
+ (mult (convert:i128_type @0)
+ (convert:i128_type @1))
+ { shift; })))
+ )
+)
+#endif
+
#if GIMPLE
/* These patterns are mostly used by FORWPROP1 to fold some operations into more
simple IR. The following scenario should be matched:
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
index 2a3b74604..f61cf5e6f 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
retHi = m11;
}
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化