diff --git a/0027-AArch64-Delete-hip09-macro.patch b/0027-AArch64-Delete-hip09-macro.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a3e8d3ee4872c3d857bd245a5906a8af87a6f0d --- /dev/null +++ b/0027-AArch64-Delete-hip09-macro.patch @@ -0,0 +1,514 @@ +From 42b0d16ab1ced5720e017fa9f6059c32489ab1bd Mon Sep 17 00:00:00 2001 +From: xiajingze +Date: Wed, 9 Oct 2024 17:13:49 +0800 +Subject: [PATCH] [AArch64] Delete hip09 macro + +Signed-off-by: xiajingze +--- + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 -- + .../llvm/TargetParser/AArch64TargetParser.h | 2 - + llvm/lib/Target/AArch64/AArch64.td | 8 -- + .../lib/Target/AArch64/AArch64MacroFusion.cpp | 8 -- + llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 - + llvm/lib/Target/AArch64/AArch64Subtarget.h | 6 -- + llvm/lib/Target/CMakeLists.txt | 4 - + llvm/lib/TargetParser/Host.cpp | 2 - + llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 --- + llvm/test/CodeGen/AArch64/cpus.ll | 1 + + .../CodeGen/AArch64/macro-fusion-mvnclz.mir | 1 - + .../AArch64/misched-fusion-lit-hip09.ll | 73 -------------- + .../CodeGen/AArch64/misched-fusion-lit.ll | 7 ++ + llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ---- + llvm/test/CodeGen/AArch64/remat.ll | 1 + + llvm/test/lit.site.cfg.py.in | 4 - + llvm/unittests/TargetParser/Host.cpp | 2 - + .../TargetParser/TargetParserTest.cpp | 6 -- + 18 files changed, 9 insertions(+), 155 deletions(-) + delete mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll + delete mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll + delete mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll + +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index 74e68e25d85c..8be5d4ba52c2 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -112,14 +112,6 @@ else() + set(LLVM_ENABLE_AUTOTUNER 0) + endif() + +-option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON) +-if(LLVM_ENABLE_AARCH64_HIP09) +- set(LLVM_ENABLE_AARCH64_HIP09 1) +- add_definitions( -DENABLE_AARCH64_HIP09 ) +-else() +- set(LLVM_ENABLE_AARCH64_HIP09 0) +-endif() +- + if(LLVM_ENABLE_EXPENSIVE_CHECKS) + add_compile_definitions(EXPENSIVE_CHECKS) + +diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h +index 07cd2fcbb68d..8b25cce0abdc 100644 +--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h ++++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h +@@ -542,13 +542,11 @@ inline constexpr CpuInfo CpuInfos[] = { + (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 | + AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES | + AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)}, +-#if defined(ENABLE_AARCH64_HIP09) + {"hip09", ARMV8_5A, + (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 | + AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE | + AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | + AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)}, +-#endif + }; + + // An alias for a CPU. +diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td +index c8bfd770f55f..fdb931a0fe6c 100644 +--- a/llvm/lib/Target/AArch64/AArch64.td ++++ b/llvm/lib/Target/AArch64/AArch64.td +@@ -296,11 +296,9 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< + "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true", + "CPU fuses (a + b + 1) and (a - b - 1)">; + +-#ifdef ENABLE_AARCH64_HIP09 + def FeatureFuseMvnClz : SubtargetFeature< + "fuse-mvn-clz", "HasFuseMvnClz", "true", + "CPU fuses mvn+clz operations">; +-#endif + + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", +@@ -1211,7 +1209,6 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", + FeatureFuseAES, + FeaturePostRAScheduler]>; + +-#ifdef ENABLE_AARCH64_HIP09 + def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09", + "HiSilicon HIP-09 processors", [ + FeatureCustomCheapAsMoveHandling, +@@ -1224,7 +1221,6 @@ def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09", + FeatureFuseLiterals, + FeatureFuseMvnClz, + FeaturePostRAScheduler]>; +-#endif + + def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", + "Ampere Computing Ampere-1 processors", [ +@@ -1380,14 +1376,12 @@ def ProcessorFeatures { + list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; +-#ifdef ENABLE_AARCH64_HIP09 + list HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8, + FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64, + FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd, + FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4, + FeatureSVE]; +-#endif + list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureSSBS, FeatureRandGen, FeatureSB, + FeatureSHA2, FeatureSHA3, FeatureAES]; +@@ -1497,11 +1491,9 @@ def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, + // HiSilicon Processors. + def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, + [TuneTSV110]>; +-#ifdef ENABLE_AARCH64_HIP09 + // FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57. + def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09, + [TuneHIP09]>; +-#endif + + // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. + def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, +diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +index 4963ec350db2..44daa06468c5 100644 +--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp ++++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +@@ -51,12 +51,10 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI, + case AArch64::SUBSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: +-#if defined(ENABLE_AARCH64_HIP09) + case AArch64::ADCSWr: + case AArch64::ADCSXr: + case AArch64::SBCSWr: + case AArch64::SBCSXr: +-#endif + return true; + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: +@@ -189,7 +187,6 @@ static bool isLiteralsPair(const MachineInstr *FirstMI, + SecondMI.getOperand(3).getImm() == 16)) + return true; + +-#if defined(ENABLE_AARCH64_HIP09) + // 32 bit immediate. + if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) && + (SecondMI.getOpcode() == AArch64::MOVKWi && +@@ -201,7 +198,6 @@ static bool isLiteralsPair(const MachineInstr *FirstMI, + (SecondMI.getOpcode() == AArch64::MOVKWi && + SecondMI.getOperand(3).getImm() == 16)) + return true; +-#endif + + // Upper half of 64 bit immediate. + if ((FirstMI == nullptr || +@@ -457,7 +453,6 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI, + return false; + } + +-#if defined(ENABLE_AARCH64_HIP09) + static bool isMvnClzPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // HIP09 supports fusion of MVN + CLZ. +@@ -486,7 +481,6 @@ static bool isMvnClzPair(const MachineInstr *FirstMI, + + return false; + } +-#endif + + /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused + /// together. Given SecondMI, when FirstMI is unspecified, then check if +@@ -523,10 +517,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + if (ST.hasFuseAddSub2RegAndConstOne() && + isAddSub2RegAndConstOnePair(FirstMI, SecondMI)) + return true; +-#if defined(ENABLE_AARCH64_HIP09) + if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI)) + return true; +-#endif + + return false; + } +diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +index ddf22364c78e..1aff7e30a0cf 100644 +--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp ++++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +@@ -266,7 +266,6 @@ void AArch64Subtarget::initializeProperties() { + PrefFunctionAlignment = Align(16); + PrefLoopAlignment = Align(4); + break; +-#if defined(ENABLE_AARCH64_HIP09) + case HIP09: + CacheLineSize = 64; + PrefFunctionAlignment = Align(16); +@@ -274,7 +273,6 @@ void AArch64Subtarget::initializeProperties() { + VScaleForTuning = 2; + DefaultSVETFOpts = TailFoldingOpts::Simple; + break; +-#endif + case ThunderX3T110: + CacheLineSize = 64; + PrefFunctionAlignment = Align(16); +diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h +index 5f481f4f976a..8a1cebe96894 100644 +--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h ++++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h +@@ -88,9 +88,7 @@ public: + ThunderXT88, + ThunderX3T110, + TSV110, +-#if defined(ENABLE_AARCH64_HIP09) + HIP09 +-#endif + }; + + protected: +@@ -242,11 +240,7 @@ public: + bool hasFusion() const { + return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || + hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || +-#if defined(ENABLE_AARCH64_HIP09) + hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz(); +-#else +- hasFuseAdrpAdd() || hasFuseLiterals(); +-#endif + } + + unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } +diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt +index 501ce1f2fe53..2739233f9ccb 100644 +--- a/llvm/lib/Target/CMakeLists.txt ++++ b/llvm/lib/Target/CMakeLists.txt +@@ -2,10 +2,6 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen) + + list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target) + +-if(LLVM_ENABLE_AARCH64_HIP09) +- list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09") +-endif() +- + add_llvm_component_library(LLVMTarget + Target.cpp + TargetIntrinsicInfo.cpp +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index 8b23be02edc0..8b1191a5b442 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -257,9 +257,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { + // contents are specified in the various processor manuals. + return StringSwitch(Part) + .Case("0xd01", "tsv110") +-#if defined(ENABLE_AARCH64_HIP09) + .Case("0xd02", "hip09") +-#endif + .Default("generic"); + + if (Implementer == "0x51") // Qualcomm Technologies, Inc. +diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll +deleted file mode 100644 +index dcf32e4dca89..000000000000 +--- a/llvm/test/CodeGen/AArch64/cpus-hip09.ll ++++ /dev/null +@@ -1,11 +0,0 @@ +-; REQUIRES: enable_enable_aarch64_hip09 +-; This tests that llc accepts all valid AArch64 CPUs +- +-; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s +- +-; CHECK-NOT: {{.*}} is not a recognized processor for this target +-; INVALID: {{.*}} is not a recognized processor for this target +- +-define i32 @f(i64 %z) { +- ret i32 0 +-} +diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll +index b24866064efa..56772f6c6049 100644 +--- a/llvm/test/CodeGen/AArch64/cpus.ll ++++ b/llvm/test/CodeGen/AArch64/cpus.ll +@@ -33,6 +33,7 @@ + ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx2t99 2>&1 | FileCheck %s + ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx3t110 2>&1 | FileCheck %s + ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s ++; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s + ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s + ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s + ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1 2>&1 | FileCheck %s +diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir +index 64bf159370f9..26ba76ef0af5 100644 +--- a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir ++++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir +@@ -1,4 +1,3 @@ +-# REQUIRES: enable_enable_aarch64_hip09 + # RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION + # RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION + --- +diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll +deleted file mode 100644 +index d67fa5b4374c..000000000000 +--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll ++++ /dev/null +@@ -1,73 +0,0 @@ +-; REQUIRES: enable_enable_aarch64_hip09 +-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09 +- +-@g = common local_unnamed_addr global ptr null, align 8 +- +-define dso_local ptr @litp(i32 %a, i32 %b) { +-entry: +- %add = add nsw i32 %b, %a +- %idx.ext = sext i32 %add to i64 +- %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext +- store ptr %add.ptr, ptr @g, align 8 +- ret ptr %add.ptr +- +-; CHECK-LABEL: litp: +-; CHECK: adrp [[R:x[0-9]+]], litp +-; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp +-} +- +-define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" { +-entry: +- %add = add nsw i32 %b, %a +- %idx.ext = sext i32 %add to i64 +- %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext +- store ptr %add.ptr, ptr @g, align 8 +- ret ptr %add.ptr +- +-; CHECK-LABEL: litp_tune_generic: +-; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic +-; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic +-} +- +-define dso_local i32 @liti(i32 %a, i32 %b) { +-entry: +- %add = add i32 %a, -262095121 +- %add1 = add i32 %add, %b +- ret i32 %add1 +- +-; CHECK-LABEL: liti: +-; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}} +-; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +-; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 +-; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16 +-} +- +-; Function Attrs: norecurse nounwind readnone +-define dso_local i64 @litl(i64 %a, i64 %b) { +-entry: +- %add = add i64 %a, 2208998440489107183 +- %add1 = add i64 %add, %b +- ret i64 %add1 +- +-; CHECK-LABEL: litl: +-; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}} +-; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +-; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 +-; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32 +-; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48 +-} +- +-; Function Attrs: norecurse nounwind readnone +-define dso_local double @litf() { +-entry: +- ret double 0x400921FB54442D18 +- +-; CHECK-LABEL: litf: +-; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] +-; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} +-; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544 +-; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16 +-; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32 +-; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48 +-; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]] +-} +diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +index ad244d30df11..67cc7aa503b6 100644 +--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll ++++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +@@ -7,6 +7,7 @@ + ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE + ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE + ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1 | FileCheck %s --check-prefix=CHECKFUSE-NEOVERSE ++; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09 + + @g = common local_unnamed_addr global ptr null, align 8 + +@@ -59,6 +60,7 @@ entry: + ; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}} + ; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 ++; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16 + } + + ; Function Attrs: norecurse nounwind readnone +@@ -89,4 +91,9 @@ entry: + ; CHECK-FUSE: movk [[R]], #8699, lsl #32 + ; CHECK-FUSE: movk [[R]], #16393, lsl #48 + ; CHECK-FUSE: fmov {{d[0-9]+}}, [[R]] ++; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544 ++; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16 ++; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32 ++; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48 ++; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]] + } +diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll +deleted file mode 100644 +index aec0d18ae73f..000000000000 +--- a/llvm/test/CodeGen/AArch64/remat-hip09.ll ++++ /dev/null +@@ -1,18 +0,0 @@ +-; REQUIRES: enable_enable_aarch64_hip09 +-; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s +- +-%X = type { i64, i64, i64 } +-declare void @f(ptr) +-define void @t() { +-entry: +- %tmp = alloca %X +- call void @f(ptr %tmp) +-; CHECK: add x0, sp, #8 +-; CHECK-NOT: mov +-; CHECK-NEXT: bl f +- call void @f(ptr %tmp) +-; CHECK: add x0, sp, #8 +-; CHECK-NOT: mov +-; CHECK-NEXT: bl f +- ret void +-} +diff --git a/llvm/test/CodeGen/AArch64/remat.ll b/llvm/test/CodeGen/AArch64/remat.ll +index 483c4d71ee21..fa039246c7f5 100644 +--- a/llvm/test/CodeGen/AArch64/remat.ll ++++ b/llvm/test/CodeGen/AArch64/remat.ll +@@ -22,6 +22,7 @@ + ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s + ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx2t99 -o - %s | FileCheck %s + ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=tsv110 -o - %s | FileCheck %s ++; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s + ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s + ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx3t110 -o - %s | FileCheck %s + ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=ampere1 -o - %s | FileCheck %s +diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in +index 6145a514f008..20c1ecca1d43 100644 +--- a/llvm/test/lit.site.cfg.py.in ++++ b/llvm/test/lit.site.cfg.py.in +@@ -63,14 +63,10 @@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@ + config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ + config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ + config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ +-config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@ + + import lit.llvm + lit.llvm.initialize(lit_config, config) + +-if config.enable_enable_aarch64_hip09: +- config.available_features.add("enable_enable_aarch64_hip09") +- + # Let the main config do the real work. + lit_config.load_config( + config, os.path.join(config.llvm_src_root, "test/lit.cfg.py")) +diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp +index 4b4c81514896..cfc41486b173 100644 +--- a/llvm/unittests/TargetParser/Host.cpp ++++ b/llvm/unittests/TargetParser/Host.cpp +@@ -250,11 +250,9 @@ CPU part : 0x0a1 + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n" + "CPU part : 0xd01"), + "tsv110"); +-#if defined(ENABLE_AARCH64_HIP09) + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n" + "CPU part : 0xd02"), + "hip09"); +-#endif + + // Verify A64FX. + const std::string A64FXProcCpuInfo = R"( +diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp +index 94e0047e567b..daa38474004e 100644 +--- a/llvm/unittests/TargetParser/TargetParserTest.cpp ++++ b/llvm/unittests/TargetParser/TargetParserTest.cpp +@@ -1421,7 +1421,6 @@ INSTANTIATE_TEST_SUITE_P( + AArch64::AEK_PROFILE | AArch64::AEK_FP16 | + AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD, + "8.2-A"), +-#if defined(ENABLE_AARCH64_HIP09) + ARMCPUTestParams( + "hip09", "armv8.5-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD | +@@ -1432,7 +1431,6 @@ INSTANTIATE_TEST_SUITE_P( + AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | + AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16, + "8.5-A"), +-#endif + ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_AES | + AArch64::AEK_SHA2 | AArch64::AEK_FP | +@@ -1449,11 +1447,7 @@ INSTANTIATE_TEST_SUITE_P( + "8.2-A"))); + + // Note: number of CPUs includes aliases. +-#if defined(ENABLE_AARCH64_HIP09) + static constexpr unsigned NumAArch64CPUArchs = 63; +-#else +-static constexpr unsigned NumAArch64CPUArchs = 62; +-#endif + + TEST(TargetParserTest, testAArch64CPUArchList) { + SmallVector List; +-- +2.43.0 + diff --git a/0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch b/0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e3136975642cd9f5d697545b886ed676b5f1f45 --- /dev/null +++ b/0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch @@ -0,0 +1,72 @@ +From 28e3fc80336935bc8bed372e78616ef5be9f4908 Mon Sep 17 00:00:00 2001 +From: Arthur Eubanks +Date: Thu, 27 Jul 2023 13:27:58 -0700 +Subject: [PATCH] Don't zero out noreg operands + +A tail call may have $noreg operands. + +Fixes a crash. + +Reviewed By: xgupta + +Differential Revision: https://reviews.llvm.org/D156485 +--- + llvm/lib/CodeGen/PrologEpilogInserter.cpp | 9 +++++++-- + llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 ++++++++++++++ + 2 files changed, 21 insertions(+), 2 deletions(-) + +diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp +index e323aaaeefaf..49047719fdaa 100644 +--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp ++++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp +@@ -1285,6 +1285,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { + continue; + + MCRegister Reg = MO.getReg(); ++ if (!Reg) ++ continue; + + // This picks up sibling registers (e.q. %al -> %ah). + for (MCRegUnit Unit : TRI.regunits(Reg)) +@@ -1308,8 +1310,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { + if (!MO.isReg()) + continue; + +- for (const MCPhysReg &Reg : +- TRI.sub_and_superregs_inclusive(MO.getReg())) ++ MCRegister Reg = MO.getReg(); ++ if (!Reg) ++ continue; ++ ++ for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg)) + RegsToZero.reset(Reg); + } + } +diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll +index 63d51c916bb9..97ad5ce9c8cb 100644 +--- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll ++++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll +@@ -241,6 +241,20 @@ entry: + ret i32 %x + } + ++define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" { ++; I386-LABEL: tailcall: ++; I386: # %bb.0: ++; I386-NEXT: movl {{[0-9]+}}(%esp), %eax ++; I386-NEXT: jmpl *(%eax) # TAILCALL ++; ++; X86-64-LABEL: tailcall: ++; X86-64: # %bb.0: ++; X86-64-NEXT: jmpq *(%rdi) # TAILCALL ++ %c = load ptr, ptr %p ++ tail call void %c() ++ ret void ++} ++ + ; Don't emit zeroing registers in "main" function. + define dso_local i32 @main() local_unnamed_addr #1 { + ; I386-LABEL: main: +-- +2.43.0 + diff --git a/llvm.spec b/llvm.spec index f3f937129c32e92dad560588dc2864c537a0a9a0..31604cff75b0f46038d2790122fa6fc91457af52 100644 --- a/llvm.spec +++ b/llvm.spec @@ -44,7 +44,7 @@ Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 21 +Release: 23 Summary: The Low Level Virtual Machine License: NCSA @@ -81,6 +81,8 @@ Patch23: 0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch Patch24: 0024-Backport-LoongArch-fix-and-add-some-new-support.patch Patch25: 0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patch Patch26: 0026-Add-arch-restriction-for-BiSheng-Autotuner.patch +Patch27: 0027-AArch64-Delete-hip09-macro.patch +Patch28: 0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch BuildRequires: binutils-devel BuildRequires: cmake @@ -377,6 +379,12 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build %{install_includedir}/llvm-gmock %changelog +* Tue Nov 19 2024 xiajingze - 17.0.6-23 +- [backport][Clang] Fix crash with -fzero-call-used-regs + +* Mon Nov 18 2024 xiajingze - 17.0.6-22 +- [AArch64] Delete hip09 macro + * Mon Nov 18 2024 liyunfei - 17.0.6-21 - Add arch restriction for BiSheng Autotuner