master

分支 (36)

标签 (20)

管理

管理

master

Multi-Version_openEuler-24.03-LTS-SP1_LLVM18

openEuler-24.03-LTS-SP1

openEuler-24.03-LTS-Next

Multi-Version_openEuler-20.03-LTS-SP4_LLVM17

Multi-Version_openEuler-22.03-LTS-SP3_LLVM17

Multi-Version_openEuler-22.03-LTS-SP3_LLVM18

Multi-Version_openEuler-22.03-LTS-SP4_LLVM17

Multi-Version_openEuler-22.03-LTS-SP4_LLVM18

Multi-Version_openEuler-24.03-LTS_LLVM18

openEuler-24.09

openEuler-24.03-LTS

revert-4f42987

openEuler-22.03-LTS-SP3

openEuler-22.03-LTS-SP2

openEuler-22.03-LTS-SP1

openEuler-22.03-LTS

openEuler-22.03-LTS-Next

openEuler-22.03-LTS-SP4

openEuler-20.03-LTS-SP4

openEuler-22.03-LTS-SP4-release

openEuler-24.09-release

openEuler-24.03-LTS-release

openEuler-22.03-LTS-SP3-release

openEuler-23.09-rc5

openEuler-22.03-LTS-SP1-release

openEuler-22.09-release

openEuler-22.09-rc5

openEuler-22.09-20220829

openEuler-22.03-LTS-20220331

openEuler-22.03-LTS-round5

openEuler-22.03-LTS-round3

openEuler-22.03-LTS-round2

openEuler-22.03-LTS-round1

openEuler-20.03-LTS-SP3-release

openEuler-20.03-LTS-SP2-20210624

openEuler-21.03-20210330

openEuler-20.09-20200929

openEuler-20.03-LTS-20200606

openEuler-20.03-LTS-tag

llvm
/
0023-AArch64-Support-HiSilicon-s-HIP0...

From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001
From: xiajingze <xiajingze1@huawei.com>
Date: Wed, 31 Jul 2024 18:37:29 +0800
Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor

Signed-off-by: xiajingze <xiajingze1@huawei.com>
---
 llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 ++
 .../llvm/TargetParser/AArch64TargetParser.h   |  7 ++
 llvm/lib/Target/AArch64/AArch64.td            | 36 +++++++
 .../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |  9 ++
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |  9 +-
 llvm/lib/Target/CMakeLists.txt                |  4 +
 llvm/lib/TargetParser/Host.cpp                |  3 +
 llvm/test/CodeGen/AArch64/cpus-hip09.ll       | 11 +++
 .../CodeGen/AArch64/macro-fusion-mvnclz.mir   | 20 ++++
 .../AArch64/misched-fusion-lit-hip09.ll       | 73 ++++++++++++++
 llvm/test/CodeGen/AArch64/remat-hip09.ll      | 18 ++++
 llvm/test/lit.site.cfg.py.in                  |  4 +
 llvm/unittests/TargetParser/Host.cpp          |  5 +
 .../TargetParser/TargetParserTest.cpp         | 16 +++
 15 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll
 create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
 create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
 create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 8be5d4ba5..74e68e25d 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -112,6 +112,14 @@ else()
   set(LLVM_ENABLE_AUTOTUNER 0)
 endif()

+option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON)
+if(LLVM_ENABLE_AARCH64_HIP09)
+  set(LLVM_ENABLE_AARCH64_HIP09 1)
+  add_definitions( -DENABLE_AARCH64_HIP09 )
+else()
+  set(LLVM_ENABLE_AARCH64_HIP09 0)
+endif()
+
 if(LLVM_ENABLE_EXPENSIVE_CHECKS)
   add_compile_definitions(EXPENSIVE_CHECKS)

diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index dc4cdfa8e..07cd2fcbb 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos[] = {
      (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
       AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
       AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
+#if defined(ENABLE_AARCH64_HIP09)
+    {"hip09", ARMV8_5A,
+     (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 |
+      AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
+      AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
+      AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)},
+#endif
 };

 // An alias for a CPU.
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 8f50af4b7..c8bfd770f 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
    "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
    "CPU fuses (a + b + 1) and (a - b - 1)">;

+#ifdef ENABLE_AARCH64_HIP09
+def FeatureFuseMvnClz : SubtargetFeature<
+    "fuse-mvn-clz", "HasFuseMvnClz", "true",
+    "CPU fuses mvn+clz operations">;
+#endif
+
 def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
@@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
                                   FeatureFuseAES,
                                   FeaturePostRAScheduler]>;

+#ifdef ENABLE_AARCH64_HIP09
+def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
+                                   "HiSilicon HIP-09 processors", [
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureExperimentalZeroingPseudos,
+                                   FeatureFuseAES,
+                                   FeatureLSLFast,
+                                   FeatureAscendStoreAddress,
+                                   FeatureCmpBccFusion,
+                                   FeatureArithmeticBccFusion,
+                                   FeatureFuseLiterals,
+                                   FeatureFuseMvnClz,
+                                   FeaturePostRAScheduler]>;
+#endif
+
 def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
                                    "Ampere Computing Ampere-1 processors", [
                                    FeaturePostRAScheduler,
@@ -1359,6 +1380,14 @@ def ProcessorFeatures {
   list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                    FeatureNEON, FeaturePerfMon, FeatureSPE,
                                    FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+#ifdef ENABLE_AARCH64_HIP09
+  list<SubtargetFeature> HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8,
+                                  FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64,
+                                  FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE,
+                                  FeatureFullFP16, FeatureFP16FML, FeatureDotProd,
+                                  FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4,
+                                  FeatureSVE];
+#endif
   list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
                                     FeatureSSBS, FeatureRandGen, FeatureSB,
                                     FeatureSHA2, FeatureSHA3, FeatureAES];
@@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
 // Marvell ThunderX3T110 Processors.
 def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
                      ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
+
+// HiSilicon Processors.
 def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
                      [TuneTSV110]>;
+#ifdef ENABLE_AARCH64_HIP09
+// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57.
+def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09,
+                     [TuneHIP09]>;
+#endif

 // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
 def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 05d60872b..4963ec350 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
   case AArch64::SUBSXrr:
   case AArch64::BICSWrr:
   case AArch64::BICSXrr:
+#if defined(ENABLE_AARCH64_HIP09)
+  case AArch64::ADCSWr:
+  case AArch64::ADCSXr:
+  case AArch64::SBCSWr:
+  case AArch64::SBCSXr:
+#endif
     return true;
   case AArch64::ADDSWrs:
   case AArch64::ADDSXrs:
@@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
       SecondMI.getOperand(3).getImm() == 16))
     return true;

+#if defined(ENABLE_AARCH64_HIP09)
+  // 32 bit immediate.
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) &&
+      (SecondMI.getOpcode() == AArch64::MOVKWi &&
+       SecondMI.getOperand(3).getImm() == 16))
+    return true;
+
+  // Lower half of 64 bit immediate.
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) &&
+      (SecondMI.getOpcode() == AArch64::MOVKWi &&
+       SecondMI.getOperand(3).getImm() == 16))
+    return true;
+#endif
+
   // Upper half of 64 bit immediate.
   if ((FirstMI == nullptr ||
        (FirstMI->getOpcode() == AArch64::MOVKXi &&
@@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
   return false;
 }

+#if defined(ENABLE_AARCH64_HIP09)
+static bool isMvnClzPair(const MachineInstr *FirstMI,
+                         const MachineInstr &SecondMI) {
+  // HIP09 supports fusion of MVN + CLZ.
+  // The CLZ can be fused with MVN and make execution faster.
+  // And the fusion is not allowed for shifted forms.
+  //
+  // Instruction alias info:
+  // 1. MVN <Wd>, <Wm>{, <shift> #<amount>} is equivalent to
+  //    ORN <Wd>, WZR, <Wm>{, <shift> #<amount>}
+  // 2. MVN <Xd>, <Xm>{, <shift> #<amount>} is equivalent to
+  //    ORN <Xd>, XZR, <Xm>{, <shift> #<amount>}
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if ((FirstMI == nullptr ||
+       ((FirstMI->getOpcode() == AArch64::ORNWrs) &&
+        (FirstMI->getOperand(1).getReg() == AArch64::WZR) &&
+        (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
+      (SecondMI.getOpcode() == AArch64::CLZWr))
+    return true;
+
+  if ((FirstMI == nullptr ||
+       ((FirstMI->getOpcode() == AArch64::ORNXrs) &&
+        (FirstMI->getOperand(1).getReg() == AArch64::XZR) &&
+        (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
+      (SecondMI.getOpcode() == AArch64::CLZXr))
+    return true;
+
+  return false;
+}
+#endif
+
 /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
@@ -472,6 +523,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   if (ST.hasFuseAddSub2RegAndConstOne() &&
       isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
     return true;
+#if defined(ENABLE_AARCH64_HIP09)
+  if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI))
+    return true;
+#endif

   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 450e27b8a..ddf22364c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -266,6 +266,15 @@ void AArch64Subtarget::initializeProperties() {
     PrefFunctionAlignment = Align(16);
     PrefLoopAlignment = Align(4);
     break;
+#if defined(ENABLE_AARCH64_HIP09)
+  case HIP09:
+    CacheLineSize = 64;
+    PrefFunctionAlignment = Align(16);
+    PrefLoopAlignment = Align(4);
+    VScaleForTuning = 2;
+    DefaultSVETFOpts = TailFoldingOpts::Simple;
+    break;
+#endif
   case ThunderX3T110:
     CacheLineSize = 64;
     PrefFunctionAlignment = Align(16);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5e20d1646..5f481f4f9 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -87,7 +87,10 @@ public:
     ThunderXT83,
     ThunderXT88,
     ThunderX3T110,
-    TSV110
+    TSV110,
+#if defined(ENABLE_AARCH64_HIP09)
+    HIP09
+#endif
   };

 protected:
@@ -239,7 +242,11 @@ public:
   bool hasFusion() const {
     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
+#if defined(ENABLE_AARCH64_HIP09)
+           hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz();
+#else
            hasFuseAdrpAdd() || hasFuseLiterals();
+#endif
   }

   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
index 2739233f9..501ce1f2f 100644
--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
@@ -2,6 +2,10 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)

 list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target)

+if(LLVM_ENABLE_AARCH64_HIP09)
+  list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09")
+endif()
+
 add_llvm_component_library(LLVMTarget
   Target.cpp
   TargetIntrinsicInfo.cpp
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index d11dc605e..8b23be02e 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -257,6 +257,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
     // contents are specified in the various processor manuals.
     return StringSwitch<const char *>(Part)
       .Case("0xd01", "tsv110")
+#if defined(ENABLE_AARCH64_HIP09)
+      .Case("0xd02", "hip09")
+#endif
       .Default("generic");

   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
new file mode 100644
index 000000000..dcf32e4dc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
@@ -0,0 +1,11 @@
+; REQUIRES: enable_enable_aarch64_hip09
+; This tests that llc accepts all valid AArch64 CPUs
+
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
+
+; CHECK-NOT: {{.*}}  is not a recognized processor for this target
+; INVALID: {{.*}}  is not a recognized processor for this target
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
new file mode 100644
index 000000000..64bf15937
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
@@ -0,0 +1,20 @@
+# REQUIRES: enable_enable_aarch64_hip09
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
+---
+# CHECK-LABEL: name: fuse-mvn-clz
+# CHECK: $w2 = ORNWrs $wzr, $w1, 0
+# FUSION: $w0 = CLZWr killed renamable $w2
+# CHECK: $w3 = ADDWri killed renamable $w1, 1, 0
+# NOFUSION: $w0 = CLZWr killed renamable $w2
+name: fuse-mvn-clz
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $w0, $w1, $w2, $w3
+
+    $w2 = ORNWrs $wzr, $w1, 0
+    $w3 = ADDWri killed renamable $w1, 1, 0
+    $w0 = CLZWr killed renamable $w2
+    RET undef $lr, implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
new file mode 100644
index 000000000..d67fa5b43
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
@@ -0,0 +1,73 @@
+; REQUIRES: enable_enable_aarch64_hip09
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09           | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
+
+@g = common local_unnamed_addr global ptr null, align 8
+
+define dso_local ptr @litp(i32 %a, i32 %b) {
+entry:
+  %add = add nsw i32 %b, %a
+  %idx.ext = sext i32 %add to i64
+  %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext
+  store ptr %add.ptr, ptr @g, align 8
+  ret ptr %add.ptr
+
+; CHECK-LABEL: litp:
+; CHECK: adrp [[R:x[0-9]+]], litp
+; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp
+}
+
+define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" {
+entry:
+  %add = add nsw i32 %b, %a
+  %idx.ext = sext i32 %add to i64
+  %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext
+  store ptr %add.ptr, ptr @g, align 8
+  ret ptr %add.ptr
+
+; CHECK-LABEL: litp_tune_generic:
+; CHECK:         adrp [[R:x[0-9]+]], litp_tune_generic
+; CHECK-NEXT:    add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic
+}
+
+define dso_local i32 @liti(i32 %a, i32 %b) {
+entry:
+  %add = add i32 %a, -262095121
+  %add1 = add i32 %add, %b
+  ret i32 %add1
+
+; CHECK-LABEL: liti:
+; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
+; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
+}
+
+; Function Attrs: norecurse nounwind readnone
+define dso_local i64 @litl(i64 %a, i64 %b) {
+entry:
+  %add = add i64 %a, 2208998440489107183
+  %add1 = add i64 %add, %b
+  ret i64 %add1
+
+; CHECK-LABEL: litl:
+; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}}
+; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
+}
+
+; Function Attrs: norecurse nounwind readnone
+define dso_local double @litf() {
+entry:
+  ret double 0x400921FB54442D18
+
+; CHECK-LABEL: litf:
+; CHECK-DONT:      adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+; CHECK-DONT-NEXT: ldr  {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+; CHECKFUSE-HIP09:    mov  [[R:x[0-9]+]], #11544
+; CHECKFUSE-HIP09:    movk [[R]], #21572, lsl #16
+; CHECKFUSE-HIP09:    movk [[R]], #8699, lsl #32
+; CHECKFUSE-HIP09:    movk [[R]], #16393, lsl #48
+; CHECKFUSE-HIP09:    fmov {{d[0-9]+}}, [[R]]
+}
diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll
new file mode 100644
index 000000000..aec0d18ae
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/remat-hip09.ll
@@ -0,0 +1,18 @@
+; REQUIRES: enable_enable_aarch64_hip09
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
+
+%X = type { i64, i64, i64 }
+declare void @f(ptr)
+define void @t() {
+entry:
+  %tmp = alloca %X
+  call void @f(ptr %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NOT: mov
+; CHECK-NEXT: bl f
+  call void @f(ptr %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NOT: mov
+; CHECK-NEXT: bl f
+  ret void
+}
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 20c1ecca1..6145a514f 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -64,9 +64,13 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
 config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
 config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
+config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@

 import lit.llvm
 lit.llvm.initialize(lit_config, config)

+if config.enable_enable_aarch64_hip09:
+    config.available_features.add("enable_enable_aarch64_hip09")
+
 # Let the main config do the real work.
 lit_config.load_config(
     config, os.path.join(config.llvm_src_root, "test/lit.cfg.py"))
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 452d0326c..4b4c81514 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -250,6 +250,11 @@ CPU part	: 0x0a1
   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
                                               "CPU part        : 0xd01"),
             "tsv110");
+#if defined(ENABLE_AARCH64_HIP09)
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
+                                              "CPU part        : 0xd02"),
+            "hip09");
+#endif

   // Verify A64FX.
   const std::string A64FXProcCpuInfo = R"(
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 741d5a2d4..94e0047e5 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1421,6 +1421,18 @@ INSTANTIATE_TEST_SUITE_P(
                              AArch64::AEK_PROFILE | AArch64::AEK_FP16 |
                              AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD,
                          "8.2-A"),
+#if defined(ENABLE_AARCH64_HIP09)
+        ARMCPUTestParams(
+            "hip09", "armv8.5-a", "crypto-neon-fp-armv8",
+            AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD |
+                AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM |
+                AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_AES |
+                AArch64::AEK_SM4 | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+                AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
+                AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
+                AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16,
+            "8.5-A"),
+#endif
         ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8",
                          AArch64::AEK_CRC | AArch64::AEK_AES |
                              AArch64::AEK_SHA2 | AArch64::AEK_FP |
@@ -1437,7 +1449,11 @@ INSTANTIATE_TEST_SUITE_P(
                          "8.2-A")));

 // Note: number of CPUs includes aliases.
+#if defined(ENABLE_AARCH64_HIP09)
+static constexpr unsigned NumAArch64CPUArchs = 63;
+#else
 static constexpr unsigned NumAArch64CPUArchs = 62;
+#endif

 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector<StringRef, NumAArch64CPUArchs> List;
--
2.19.1