diff --git a/0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch b/0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch new file mode 100644 index 0000000000000000000000000000000000000000..edfab318a3cf992dda211f09442473e9a0c23eb2 --- /dev/null +++ b/0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch @@ -0,0 +1,246 @@ +From 60ff801d1ea96ab964039cc1ed42e1dca0a63d54 Mon Sep 17 00:00:00 2001 +From: Anton Sidorenko +Date: Tue, 6 Feb 2024 12:02:06 +0300 +Subject: [PATCH] [SimplifyLibCalls] Merge sqrt into the power of exp (#79146) + +Under fast-math flags it's possible to convert `sqrt(exp(X)) `into +`exp(X * 0.5)`. I suppose that this transformation is always profitable. +This is similar to the optimization existing in GCC. +--- + .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 + + .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 67 ++++++++++ + llvm/test/Transforms/InstCombine/sqrt.ll | 120 ++++++++++++++++++ + 3 files changed, 188 insertions(+) + +diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +index eb10545ee149..1aad0b298845 100644 +--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h ++++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +@@ -201,6 +201,7 @@ private: + Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B); + Value *optimizeLog(CallInst *CI, IRBuilderBase &B); + Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B); ++ Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B); + Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B); + Value *optimizeTan(CallInst *CI, IRBuilderBase &B); + // Wrapper for all floating point library call optimizations +diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +index 3ad97613fe7a..dd5bbdaaf6d3 100644 +--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp ++++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +@@ -2539,6 +2539,70 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { + return Ret; + } + ++// sqrt(exp(X)) -> exp(X * 0.5) ++Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) { ++ if (!CI->hasAllowReassoc()) ++ return nullptr; ++ ++ Function *SqrtFn = CI->getCalledFunction(); ++ CallInst *Arg = dyn_cast(CI->getArgOperand(0)); ++ if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse()) ++ return nullptr; ++ Intrinsic::ID ArgID = Arg->getIntrinsicID(); ++ LibFunc ArgLb = NotLibFunc; ++ TLI->getLibFunc(*Arg, ArgLb); ++ ++ LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb; ++ ++ if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb)) ++ switch (SqrtLb) { ++ case LibFunc_sqrtf: ++ ExpLb = LibFunc_expf; ++ Exp2Lb = LibFunc_exp2f; ++ Exp10Lb = LibFunc_exp10f; ++ break; ++ case LibFunc_sqrt: ++ ExpLb = LibFunc_exp; ++ Exp2Lb = LibFunc_exp2; ++ Exp10Lb = LibFunc_exp10; ++ break; ++ case LibFunc_sqrtl: ++ ExpLb = LibFunc_expl; ++ Exp2Lb = LibFunc_exp2l; ++ Exp10Lb = LibFunc_exp10l; ++ break; ++ default: ++ return nullptr; ++ } ++ else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) { ++ if (CI->getType()->getScalarType()->isFloatTy()) { ++ ExpLb = LibFunc_expf; ++ Exp2Lb = LibFunc_exp2f; ++ Exp10Lb = LibFunc_exp10f; ++ } else if (CI->getType()->getScalarType()->isDoubleTy()) { ++ ExpLb = LibFunc_exp; ++ Exp2Lb = LibFunc_exp2; ++ Exp10Lb = LibFunc_exp10; ++ } else ++ return nullptr; ++ } else ++ return nullptr; ++ ++ if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb && ++ ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2) ++ return nullptr; ++ ++ IRBuilderBase::InsertPointGuard Guard(B); ++ B.SetInsertPoint(Arg); ++ auto *ExpOperand = Arg->getOperand(0); ++ auto *FMul = ++ B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5), ++ CI, "merged.sqrt"); ++ ++ Arg->setOperand(0, FMul); ++ return Arg; ++} ++ + Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); +@@ -2551,6 +2615,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { + Callee->getIntrinsicID() == Intrinsic::sqrt)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); + ++ if (Value *Opt = mergeSqrtToExp(CI, B)) ++ return Opt; ++ + if (!CI->isFast()) + return Ret; + +diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll +index 004df3e30c72..f72fe5a6a581 100644 +--- a/llvm/test/Transforms/InstCombine/sqrt.ll ++++ b/llvm/test/Transforms/InstCombine/sqrt.ll +@@ -88,7 +88,127 @@ define float @sqrt_call_fabs_f32(float %x) { + ret float %sqrt + } + ++define double @sqrt_exp(double %x) { ++; CHECK-LABEL: @sqrt_exp( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %e = call reassoc double @llvm.exp.f64(double %x) ++ %res = call reassoc double @llvm.sqrt.f64(double %e) ++ ret double %res ++} ++ ++define double @sqrt_exp_2(double %x) { ++; CHECK-LABEL: @sqrt_exp_2( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %e = call reassoc double @exp(double %x) ++ %res = call reassoc double @sqrt(double %e) ++ ret double %res ++} ++ ++define double @sqrt_exp2(double %x) { ++; CHECK-LABEL: @sqrt_exp2( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp2(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %e = call reassoc double @exp2(double %x) ++ %res = call reassoc double @sqrt(double %e) ++ ret double %res ++} ++ ++define double @sqrt_exp10(double %x) { ++; CHECK-LABEL: @sqrt_exp10( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp10(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %e = call reassoc double @exp10(double %x) ++ %res = call reassoc double @sqrt(double %e) ++ ret double %res ++} ++ ++; Negative test ++define double @sqrt_exp_nofast_1(double %x) { ++; CHECK-LABEL: @sqrt_exp_nofast_1( ++; CHECK-NEXT: [[E:%.*]] = call double @llvm.exp.f64(double [[X:%.*]]) ++; CHECK-NEXT: [[RES:%.*]] = call reassoc double @llvm.sqrt.f64(double [[E]]) ++; CHECK-NEXT: ret double [[RES]] ++; ++ %e = call double @llvm.exp.f64(double %x) ++ %res = call reassoc double @llvm.sqrt.f64(double %e) ++ ret double %res ++} ++ ++; Negative test ++define double @sqrt_exp_nofast_2(double %x) { ++; CHECK-LABEL: @sqrt_exp_nofast_2( ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[X:%.*]]) ++; CHECK-NEXT: [[RES:%.*]] = call double @llvm.sqrt.f64(double [[E]]) ++; CHECK-NEXT: ret double [[RES]] ++; ++ %e = call reassoc double @llvm.exp.f64(double %x) ++ %res = call double @llvm.sqrt.f64(double %e) ++ ret double %res ++} ++ ++define double @sqrt_exp_merge_constant(double %x, double %y) { ++; CHECK-LABEL: @sqrt_exp_merge_constant( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc nsz double [[X:%.*]], 5.000000e+00 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %mul = fmul reassoc nsz double %x, 10.0 ++ %e = call reassoc double @llvm.exp.f64(double %mul) ++ %res = call reassoc nsz double @llvm.sqrt.f64(double %e) ++ ret double %res ++} ++ ++define double @sqrt_exp_intr_and_libcall(double %x) { ++; CHECK-LABEL: @sqrt_exp_intr_and_libcall( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %e = call reassoc double @exp(double %x) ++ %res = call reassoc double @llvm.sqrt.f64(double %e) ++ ret double %res ++} ++ ++define double @sqrt_exp_intr_and_libcall_2(double %x) { ++; CHECK-LABEL: @sqrt_exp_intr_and_libcall_2( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01 ++; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]]) ++; CHECK-NEXT: ret double [[E]] ++; ++ %e = call reassoc double @llvm.exp.f64(double %x) ++ %res = call reassoc double @sqrt(double %e) ++ ret double %res ++} ++ ++define <2 x float> @sqrt_exp_vec(<2 x float> %x) { ++; CHECK-LABEL: @sqrt_exp_vec( ++; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], ++; CHECK-NEXT: [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]]) ++; CHECK-NEXT: ret <2 x float> [[E]] ++; ++ %e = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> %x) ++ %res = call reassoc <2 x float> @llvm.sqrt.v2f32(<2 x float> %e) ++ ret <2 x float> %res ++} ++ + declare i32 @foo(double) + declare double @sqrt(double) readnone + declare float @sqrtf(float) + declare float @llvm.fabs.f32(float) ++declare double @llvm.exp.f64(double) ++declare double @llvm.sqrt.f64(double) ++declare double @exp(double) ++declare double @exp2(double) ++declare double @exp10(double) ++declare <2 x float> @llvm.exp.v2f32(<2 x float>) ++declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) +-- +2.38.1.windows.1 + diff --git a/0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch b/0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch new file mode 100644 index 0000000000000000000000000000000000000000..81f9f17f4cb51cf411e14551cb622e494dca38a2 --- /dev/null +++ b/0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch @@ -0,0 +1,187 @@ +From fdbf1bd9f1bdec32384eda47f419d895d11a1c50 Mon Sep 17 00:00:00 2001 +From: XingYuShuai <1150775134@qq.com> +Date: Wed, 15 May 2024 14:42:27 +0800 +Subject: [PATCH] [LICM] Solve runtime error caused by the signal function. + +Using the option enable-signal to control whether to solve the +runtime error caused by the signal function when lto is turned on. +--- + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++++ + llvm/lib/Transforms/Scalar/LICM.cpp | 47 +++++++++++++++++++ + .../Transforms/LICM/signal-before-loop-2.ll | 25 ++++++++++ + .../Transforms/LICM/signal-before-loop.ll | 25 ++++++++++ + llvm/test/lit.site.cfg.py.in | 1 + + 5 files changed, 106 insertions(+) + create mode 100644 llvm/test/Transforms/LICM/signal-before-loop-2.ll + create mode 100644 llvm/test/Transforms/LICM/signal-before-loop.ll + +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index b8e9dbe29d88..8be5d4ba52c2 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -120,6 +120,14 @@ else() + set(LLVM_ENABLE_AUTOTUNER 0) + endif() + ++option(LLVM_BUILD_FOR_COMMON "" ON) ++if(LLVM_BUILD_FOR_COMMON) ++ set(LLVM_BUILD_FOR_COMMON 1) ++ add_definitions( -DBUILD_FOR_COMMON ) ++else() ++ set(LLVM_BUILD_FOR_COMMON 0) ++endif() ++ + if(LLVM_ENABLE_EXPENSIVE_CHECKS) + add_compile_definitions(EXPENSIVE_CHECKS) + +diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp +index f8fab03f151d..2feec759f240 100644 +--- a/llvm/lib/Transforms/Scalar/LICM.cpp ++++ b/llvm/lib/Transforms/Scalar/LICM.cpp +@@ -44,6 +44,9 @@ + #include "llvm/Analysis/AliasSetTracker.h" + #include "llvm/Analysis/AssumptionCache.h" + #include "llvm/Analysis/CaptureTracking.h" ++#ifdef BUILD_FOR_COMMON ++#include "llvm/Analysis/CFG.h" ++#endif // BUILD_FOR_COMMON + #include "llvm/Analysis/GuardUtils.h" + #include "llvm/Analysis/LazyBlockFrequencyInfo.h" + #include "llvm/Analysis/Loads.h" +@@ -122,6 +125,13 @@ static cl::opt + SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false), + cl::desc("Force thread model single in LICM pass")); + ++#ifdef BUILD_FOR_COMMON ++static cl::opt DisableMovStoreInsOutsideOfLoopInSigFun( ++ "disable-move-store-ins-outside-of-loop", ++ cl::Hidden, cl::init(true), cl::desc("Disable move store instruction" ++ "outside of loop in signal function.")); ++#endif // BUILD_FOR_COMMON ++ + static cl::opt MaxNumUsesTraversed( + "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), + cl::desc("Max num uses visited for identifying load " +@@ -2075,8 +2085,45 @@ bool llvm::promoteLoopAccessesToScalars( + for (Use &U : ASIV->uses()) { + // Ignore instructions that are outside the loop. + Instruction *UI = dyn_cast(U.getUser()); ++ #if defined(BUILD_FOR_COMMON) ++ if (DisableMovStoreInsOutsideOfLoopInSigFun) { ++ if (!UI) ++ continue; ++ ++ // In the following scenario, there will be a loop index store ++ // instruction that is moved outside the loop and when the termination ++ // loop is triggered by the signal function, the store instruction is not ++ // executed.However, the function registered by the signal will read the ++ // data sored in the store instruction, so the data read is incorrect. ++ // Solution: Prevent the store instruction form going outside the loop. ++ // NOTE: The sys_signal function takes the same arguments and performs ++ // the same task as signal. They all belong to glic. ++ if(StoreSafety == StoreSafe && !CurLoop->contains(UI)) { ++ if(LoadInst *NotCurLoopLoad = dyn_cast(UI)) { ++ Function *NotCurLoopFun = UI->getParent()->getParent(); ++ for (Use &UseFun : NotCurLoopFun->uses()) { ++ CallInst *Call = dyn_cast(UseFun.getUser()); ++ if (Call && Call->getCalledFunction() && ++ (Call->getCalledFunction()->getName() == "__sysv_signal" || ++ Call->getCalledFunction()->getName() == "signal") && ++ isPotentiallyReachable(Call->getParent(), ++ CurLoop->getLoopPreheader(),NULL,DT, ++ LI)) ++ return false; ++ } ++ } ++ } ++ ++ if (!CurLoop->contains(UI)) ++ continue; ++ } else { ++ if (!UI || !CurLoop->contains(UI)) ++ continue; ++ } ++#else + if (!UI || !CurLoop->contains(UI)) + continue; ++#endif // BUILD_FOR_COMMON + + // If there is an non-load/store instruction in the loop, we can't promote + // it. +diff --git a/llvm/test/Transforms/LICM/signal-before-loop-2.ll b/llvm/test/Transforms/LICM/signal-before-loop-2.ll +new file mode 100644 +index 000000000000..da878c6c691b +--- /dev/null ++++ b/llvm/test/Transforms/LICM/signal-before-loop-2.ll +@@ -0,0 +1,25 @@ ++; REQUIRES: enable_build_for_common ++; RUN:opt -disable-move-store-ins-outside-of-loop=true -S < %s | FileCheck %s ++ ++@Run_Index = external global i64 ++ ++declare ptr @signal(ptr) ++ ++define void @report() { ++entry: ++ %0 = load i64, ptr @Run_Index, align 8 ++ unreachable ++} ++ ++define i32 @main() { ++if.end: ++ %call.i4 = call ptr @signal(ptr @report) ++ br label %for.cond ++ ++; CHECK-LABEL: for.cond ++; CHECK: store ++for.cond: ++ %0 = load i64, ptr @Run_Index, align 8 ++ store i64 %0, ptr @Run_Index, align 8 ++ br label %for.cond ++} +diff --git a/llvm/test/Transforms/LICM/signal-before-loop.ll b/llvm/test/Transforms/LICM/signal-before-loop.ll +new file mode 100644 +index 000000000000..cfae4e87db56 +--- /dev/null ++++ b/llvm/test/Transforms/LICM/signal-before-loop.ll +@@ -0,0 +1,25 @@ ++; REQUIRES: enable_build_for_common ++; RUN:opt -disable-move-store-ins-outside-of-loop=true -S < %s | FileCheck %s ++ ++@Run_Index = external global i64 ++ ++declare ptr @__sysv_signal(ptr) ++ ++define void @report() { ++entry: ++ %0 = load i64, ptr @Run_Index, align 8 ++ unreachable ++} ++ ++define i32 @main() { ++if.end: ++ %call.i4 = call ptr @__sysv_signal(ptr @report) ++ br label %for.cond ++ ++; CHECK-LABEL: for.cond ++; CHECK: store ++for.cond: ++ %0 = load i64, ptr @Run_Index, align 8 ++ store i64 %0, ptr @Run_Index, align 8 ++ br label %for.cond ++} +diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in +index 0e9396e3b014..20c1ecca1d43 100644 +--- a/llvm/test/lit.site.cfg.py.in ++++ b/llvm/test/lit.site.cfg.py.in +@@ -63,6 +63,7 @@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@ + config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ + config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ + config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ ++config.enable_build_for_common = @LLVM_BUILD_FOR_COMMON@ + + import lit.llvm + lit.llvm.initialize(lit_config, config) +-- +2.38.1.windows.1 + diff --git a/llvm.spec b/llvm.spec index 31604cff75b0f46038d2790122fa6fc91457af52..7e0866f0e3689af388518fd2a64d2698d69de755 100644 --- a/llvm.spec +++ b/llvm.spec @@ -44,7 +44,7 @@ Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 23 +Release: 25 Summary: The Low Level Virtual Machine License: NCSA @@ -83,6 +83,8 @@ Patch25: 0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patc Patch26: 0026-Add-arch-restriction-for-BiSheng-Autotuner.patch Patch27: 0027-AArch64-Delete-hip09-macro.patch Patch28: 0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch +Patch29: 0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch +Patch30: 0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch BuildRequires: binutils-devel BuildRequires: cmake @@ -379,6 +381,12 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build %{install_includedir}/llvm-gmock %changelog +* Wed Nov 20 2024 eastb233 - 17.0.6-25 +- [LICM] Solve runtime error caused by the signal function. + +* Wed Nov 20 2024 eastb233 - 17.0.6-24 +- [SimplifyLibCalls] Merge sqrt into the power of exp (#79146) + * Tue Nov 19 2024 xiajingze - 17.0.6-23 - [backport][Clang] Fix crash with -fzero-call-used-regs