diff --git a/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch b/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea61cb374fb876873228fdb42eba8562cce3f68a --- /dev/null +++ b/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch @@ -0,0 +1,9915 @@ +From a9863e2b6e6783aa9be0b9d1d187084fd4b32a3a Mon Sep 17 00:00:00 2001 +From: Muhammad Asif Manzoor +Date: Thu, 21 Mar 2024 12:50:38 -0400 +Subject: Add BiSheng Autotuner support for LLVM compiler + +Automatic tuning is an automatic iterative process that optimizes a given +program by manipulating compilation options for optimal performance. +BiSheng Autotuner provides a resumable interface for tuning process. BiSheng +Autotuner can tune 1) individual code segments/blocks (fine grain turning) like +loops, callsites, instructions, etc. and 2) entire modules/programs (coarse +grain tuning) for compiler flags, pass ordering, etc. +This patch enables LLVM compiler to extract tuneable code regions and then apply +suggested configuration (by Autotuner) to find out the optimal configurations. +--- + llvm/cmake/modules/CrossCompile.cmake | 1 + + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 + + llvm/include/llvm/Analysis/AutotuningDump.h | 75 ++ + llvm/include/llvm/Analysis/LoopInfo.h | 13 + + llvm/include/llvm/Analysis/Passes.h | 10 + + llvm/include/llvm/AutoTuner/AutoTuning.h | 486 ++++++++++++ + .../llvm/AutoTuner/AutoTuningRemarkManager.h | 43 ++ + .../llvm/AutoTuner/AutoTuningRemarkStreamer.h | 47 ++ + llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 + + llvm/include/llvm/IR/Function.h | 37 + + llvm/include/llvm/IR/InstrTypes.h | 24 + + llvm/include/llvm/IR/Instructions.h | 24 + + llvm/include/llvm/IR/Module.h | 3 + + llvm/include/llvm/IR/StructuralHash.h | 14 + + llvm/include/llvm/InitializePasses.h | 5 + + llvm/include/llvm/LinkAllPasses.h | 8 + + llvm/include/llvm/Remarks/Remark.h | 32 + + llvm/include/llvm/Support/CommandLine.h | 17 + + llvm/include/llvm/Transforms/Scalar.h | 17 + + .../Transforms/Scalar/AutoTuningCompile.h | 170 +++++ + .../llvm/Transforms/Utils/UnrollLoop.h | 4 + + llvm/lib/Analysis/AutotuningDump.cpp | 265 +++++++ + llvm/lib/Analysis/CMakeLists.txt | 2 + + llvm/lib/Analysis/InlineAdvisor.cpp | 18 + + llvm/lib/Analysis/InlineCost.cpp | 29 + + llvm/lib/Analysis/LoopInfo.cpp | 52 ++ + llvm/lib/AutoTuner/AutoTuning.cpp | 705 ++++++++++++++++++ + .../lib/AutoTuner/AutoTuningRemarkManager.cpp | 299 ++++++++ + .../AutoTuner/AutoTuningRemarkStreamer.cpp | 55 ++ + llvm/lib/AutoTuner/CMakeLists.txt | 11 + + llvm/lib/CMakeLists.txt | 1 + + llvm/lib/CodeGen/CMakeLists.txt | 1 + + llvm/lib/CodeGen/CalcSpillWeights.cpp | 30 + + llvm/lib/CodeGen/MachineBasicBlock.cpp | 36 + + llvm/lib/CodeGen/MachineScheduler.cpp | 44 ++ + llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 19 + + llvm/lib/IR/AsmWriter.cpp | 151 ++++ + llvm/lib/IR/CMakeLists.txt | 1 + + llvm/lib/IR/Function.cpp | 34 + + llvm/lib/IR/Instructions.cpp | 86 +++ + llvm/lib/IR/StructuralHash.cpp | 114 +++ + llvm/lib/Passes/PassBuilder.cpp | 5 + + llvm/lib/Passes/PassBuilderPipelines.cpp | 46 ++ + llvm/lib/Passes/PassRegistry.def | 13 + + llvm/lib/Passes/StandardInstrumentations.cpp | 23 + + .../lib/Remarks/BitstreamRemarkSerializer.cpp | 8 + + llvm/lib/Remarks/RemarkStreamer.cpp | 4 + + llvm/lib/Remarks/YAMLRemarkParser.cpp | 122 +++ + llvm/lib/Remarks/YAMLRemarkParser.h | 6 + + llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 84 +++ + llvm/lib/Support/CommandLine.cpp | 41 + + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + + llvm/lib/Transforms/IPO/Inliner.cpp | 36 + + llvm/lib/Transforms/IPO/SampleProfile.cpp | 14 + + .../Transforms/Instrumentation/CMakeLists.txt | 1 + + .../Instrumentation/PGOInstrumentation.cpp | 8 + + .../Transforms/Scalar/AutoTuningCompile.cpp | 334 +++++++++ + llvm/lib/Transforms/Scalar/CMakeLists.txt | 2 + + llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 187 +++++ + llvm/lib/Transforms/Scalar/Scalar.cpp | 4 + + llvm/lib/Transforms/Scalar/Sink.cpp | 5 + + llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + + llvm/lib/Transforms/Utils/LCSSA.cpp | 5 + + llvm/lib/Transforms/Utils/LoopSimplify.cpp | 8 + + llvm/lib/Transforms/Utils/LoopUnroll.cpp | 3 + + llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + + .../Vectorize/LoopVectorizationLegality.cpp | 12 + + .../Transforms/Vectorize/LoopVectorize.cpp | 34 + + .../Inputs/unroll_template.yaml | 8 + + .../AutotuningDump/create-data-dir.ll | 65 ++ + llvm/test/AutoTuning/AutotuningDump/unroll.ll | 35 + + .../autotune_datadir/baseline_config.yaml | 9 + + .../autotune_datadir/random_config.yaml | 9 + + .../AutoTuning/BaselineConfig/Inputs/test.ll | 117 +++ + .../BaselineConfig/apply_baseline_config.ll | 11 + + llvm/test/AutoTuning/BaselineConfig/opp.ll | 67 ++ + .../CodeRegionFilter/function-filtering.ll | 62 ++ + .../Error/Inputs/invalid-format.yaml | 3 + + .../AutoTuning/Error/Inputs/template.yaml | 10 + + .../AutoTuning/Error/file-not-found-error.ll | 29 + + .../AutoTuning/Error/invalid-yaml-error.ll | 27 + + .../AutoTuning/Error/malformed-input-error.ll | 136 ++++ + llvm/test/AutoTuning/Error/output-error.ll | 28 + + llvm/test/AutoTuning/Error/valid-input.ll | 27 + + .../Inputs/template.yaml | 9 + + .../inc-compile-parse-input.ll | 103 +++ + .../AutoTuning/Inline/Inputs/template.yaml | 9 + + .../Inline/Inputs/template_no_metadata.yaml | 7 + + .../test/AutoTuning/Inline/duplicate-calls.ll | 96 +++ + llvm/test/AutoTuning/Inline/force-inline.ll | 84 +++ + .../AutoTuning/Inline/inline-attribute.ll | 85 +++ + llvm/test/AutoTuning/Inline/opp.ll | 64 ++ + .../LoopUnroll/Inputs/debug_loc_template.yaml | 10 + + .../LoopUnroll/Inputs/loop_nest.yaml | 10 + + .../LoopUnroll/Inputs/loop_peel.yaml | 9 + + .../Inputs/unroll_raw_template.yaml | 10 + + .../LoopUnroll/Inputs/unroll_template.yaml | 10 + + .../Inputs/unroll_template_no_metadata.yaml | 8 + + llvm/test/AutoTuning/LoopUnroll/debug_loc.ll | 161 ++++ + .../AutoTuning/LoopUnroll/dynamic_config.ll | 56 ++ + llvm/test/AutoTuning/LoopUnroll/loop_nest.ll | 136 ++++ + llvm/test/AutoTuning/LoopUnroll/loop_peel.ll | 53 ++ + .../AutoTuning/LoopUnroll/unroll-pragma.ll | 129 ++++ + llvm/test/AutoTuning/LoopUnroll/unroll.ll | 101 +++ + llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll | 113 +++ + .../Inputs/vectorize_template.yaml | 9 + + .../vectorize_template_no_metadata.yaml | 7 + + .../LoopVectorize/force-vector-interleave.ll | 88 +++ + .../Inputs/misched_x86_template.yaml | 10 + + .../misched_x86_bidirectional.ll | 73 ++ + .../MachineScheduler/misched_x86_bottomup.ll | 72 ++ + .../MachineScheduler/misched_x86_topdown.ll | 72 ++ + .../AutoTuning/MetaData/structural_hash.ll | 234 ++++++ + .../AutoTuning/MetaData/write_no_metadata.ll | 191 +++++ + .../MetaData/write_with_metadata.ll | 204 +++++ + .../AutoTuning/PGO/Inputs/pgo-instr.proftext | 17 + + .../PGO/Inputs/pgo-sample-cold.prof | 7 + + .../AutoTuning/PGO/Inputs/pgo-sample-hot.prof | 7 + + llvm/test/AutoTuning/PGO/pgo-instr-filters.ll | 61 ++ + .../test/AutoTuning/PGO/pgo-sample-filters.ll | 138 ++++ + .../Inputs/pass_invocation.yaml | 10 + + .../PassInvocation/pass_invocation_read.ll | 64 ++ + .../PassInvocation/pass_invocation_write.ll | 67 ++ + .../PhaseOrdering/Inputs/template.yaml | 8 + + .../AutoTuning/PhaseOrdering/pass-order.ll | 65 ++ + .../AutoTuning/SwitchLowering/switch-opp.ll | 47 ++ + llvm/test/AutoTuning/lit.local.cfg | 2 + + llvm/test/AutoTuning/opt-opp.ll | 315 ++++++++ + llvm/test/lit.site.cfg.py.in | 1 + + llvm/tools/llc/llc.cpp | 19 + + llvm/tools/opt/NewPMDriver.cpp | 42 ++ + llvm/tools/opt/opt.cpp | 53 ++ + 132 files changed, 7801 insertions(+) + create mode 100644 llvm/include/llvm/Analysis/AutotuningDump.h + create mode 100644 llvm/include/llvm/AutoTuner/AutoTuning.h + create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h + create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h + create mode 100644 llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h + create mode 100644 llvm/lib/Analysis/AutotuningDump.cpp + create mode 100644 llvm/lib/AutoTuner/AutoTuning.cpp + create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp + create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp + create mode 100644 llvm/lib/AutoTuner/CMakeLists.txt + create mode 100644 llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp + create mode 100644 llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml + create mode 100644 llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll + create mode 100644 llvm/test/AutoTuning/AutotuningDump/unroll.ll + create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml + create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml + create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll + create mode 100644 llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll + create mode 100644 llvm/test/AutoTuning/BaselineConfig/opp.ll + create mode 100644 llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll + create mode 100644 llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml + create mode 100644 llvm/test/AutoTuning/Error/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/Error/file-not-found-error.ll + create mode 100644 llvm/test/AutoTuning/Error/invalid-yaml-error.ll + create mode 100644 llvm/test/AutoTuning/Error/malformed-input-error.ll + create mode 100644 llvm/test/AutoTuning/Error/output-error.ll + create mode 100644 llvm/test/AutoTuning/Error/valid-input.ll + create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll + create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml + create mode 100644 llvm/test/AutoTuning/Inline/duplicate-calls.ll + create mode 100644 llvm/test/AutoTuning/Inline/force-inline.ll + create mode 100644 llvm/test/AutoTuning/Inline/inline-attribute.ll + create mode 100644 llvm/test/AutoTuning/Inline/opp.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/debug_loc.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_nest.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_peel.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll + create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml + create mode 100644 llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll + create mode 100644 llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml + create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll + create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll + create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll + create mode 100644 llvm/test/AutoTuning/MetaData/structural_hash.ll + create mode 100644 llvm/test/AutoTuning/MetaData/write_no_metadata.ll + create mode 100644 llvm/test/AutoTuning/MetaData/write_with_metadata.ll + create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext + create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof + create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof + create mode 100644 llvm/test/AutoTuning/PGO/pgo-instr-filters.ll + create mode 100644 llvm/test/AutoTuning/PGO/pgo-sample-filters.ll + create mode 100644 llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml + create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll + create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll + create mode 100644 llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/PhaseOrdering/pass-order.ll + create mode 100644 llvm/test/AutoTuning/SwitchLowering/switch-opp.ll + create mode 100644 llvm/test/AutoTuning/lit.local.cfg + create mode 100644 llvm/test/AutoTuning/opt-opp.ll + +diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake +index 6af47b51d4c6..1a9fb4b2dddc 100644 +--- a/llvm/cmake/modules/CrossCompile.cmake ++++ b/llvm/cmake/modules/CrossCompile.cmake +@@ -82,6 +82,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) + -DLLVM_ENABLE_PROJECTS="${llvm_enable_projects_arg}" + -DLLVM_EXTERNAL_PROJECTS="${llvm_external_projects_arg}" + -DLLVM_ENABLE_RUNTIMES="${llvm_enable_runtimes_arg}" ++ -DLLVM_ENABLE_AUTOTUNER="${LLVM_ENABLE_AUTOTUNER}" + ${external_project_source_dirs} + -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}" + -DLLVM_INCLUDE_BENCHMARKS=OFF +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index 62a1a64d37d4..b8e9dbe29d88 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -112,6 +112,14 @@ else() + set(BUILD_FOR_OPENEULER 0) + endif() + ++option(LLVM_ENABLE_AUTOTUNER "Enable BiSheng Auto-Tuning features" OFF) ++if (LLVM_ENABLE_AUTOTUNER) ++ set(LLVM_ENABLE_AUTOTUNER 1) ++ add_definitions( -DENABLE_AUTOTUNER ) ++else() ++ set(LLVM_ENABLE_AUTOTUNER 0) ++endif() ++ + if(LLVM_ENABLE_EXPENSIVE_CHECKS) + add_compile_definitions(EXPENSIVE_CHECKS) + +diff --git a/llvm/include/llvm/Analysis/AutotuningDump.h b/llvm/include/llvm/Analysis/AutotuningDump.h +new file mode 100644 +index 000000000000..fb973f05323e +--- /dev/null ++++ b/llvm/include/llvm/Analysis/AutotuningDump.h +@@ -0,0 +1,75 @@ ++#if defined(ENABLE_AUTOTUNER) ++// ===-- AutotuningDump.h - Auto-Tuning-----------------------------------===// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// ===--------------------------------------------------------------------===// ++// ++// This file contains pass collecting IR of tuned regions and storing them into ++// predetrmined locations, to be used later by autotuning ML guidance ++// ++// ===--------------------------------------------------------------------===// ++ ++#include "llvm/Analysis/LoopInfo.h" ++#include "llvm/Analysis/LoopPass.h" ++#include "llvm/IR/PassManager.h" ++#include "llvm/Transforms/Scalar/LoopPassManager.h" ++#include ++ ++namespace llvm { ++class AutotuningDump { ++public: ++ AutotuningDump(bool IncrementalCompilation = false); ++ bool run(Module &F, function_ref GetLI); ++ ++private: ++ std::string AutoTuneDirPath; ++ std::unique_ptr createFile(const Twine &File); ++ int getConfigNumber(); ++ void dumpToStream(llvm::raw_ostream &os, const Loop &L) const; ++ void dumpToStream(llvm::raw_ostream &os, const Function &F) const; ++ void dumpFunctions(llvm::Module &M); ++ void dumpLoops(llvm::Module &M, function_ref GetLI); ++ void dumpModule(llvm::Module &M); ++ std::string getDirectoryName(const std::string File) const; ++ std::string getFileName(std::string FilePath); ++ ++ bool IsIncrementalCompilation; ++}; ++ ++class AutotuningDumpLegacy : public ModulePass { ++public: ++ static char ID; ++ AutotuningDumpLegacy(bool IncrementalCompilation = false); ++ StringRef getPassName() const override; ++ bool runOnModule(Module &M) override; ++ void getAnalysisUsage(AnalysisUsage &AU) const override; ++ ++private: ++ bool IsIncrementalCompilation; ++}; ++ ++class AutotuningDumpAnalysis ++ : public AnalysisInfoMixin { ++ friend AnalysisInfoMixin; ++ static AnalysisKey Key; ++ ++public: ++ AutotuningDumpAnalysis(bool IncrementalCompilation = false) { ++ IsIncrementalCompilation = IncrementalCompilation; ++ } ++ ++ // This pass only prints IRs of selected function or loops without doing any ++ // real analyses, thus the return value is meaningless. To avoid leaking data ++ // or memory, we typedef Result to Optional to avoid having to return an ++ // AutotuningDump object. ++ using Result = std::optional; ++ Result run(Module &M, ModuleAnalysisManager &AM); ++ ++private: ++ bool IsIncrementalCompilation; ++}; ++} // namespace llvm ++#endif +\ No newline at end of file +diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h +index 3434630c27cf..9be3e056cf76 100644 +--- a/llvm/include/llvm/Analysis/LoopInfo.h ++++ b/llvm/include/llvm/Analysis/LoopInfo.h +@@ -26,6 +26,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + namespace llvm { + +@@ -44,7 +47,12 @@ extern template class LoopBase; + + /// Represents a single loop in the control flow graph. Note that not all SCCs + /// in the CFG are necessarily loops. ++#if defined(ENABLE_AUTOTUNER) ++class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase, ++ public autotuning::Container { ++#else + class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase { ++#endif + public: + /// A range representing the start and end location of a loop. + class LocRange { +@@ -395,6 +403,11 @@ public: + return ""; + } + ++#if defined(ENABLE_AUTOTUNER) ++ void initCodeRegion() override; ++ uint64_t computeStructuralHash() override; ++#endif ++ + private: + Loop() = default; + +diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h +index ac1bc3549910..65f566cc75de 100644 +--- a/llvm/include/llvm/Analysis/Passes.h ++++ b/llvm/include/llvm/Analysis/Passes.h +@@ -58,6 +58,16 @@ namespace llvm { + // in a function and builds the region hierarchy. + // + FunctionPass *createRegionInfoPass(); ++ ++#if defined(ENABLE_AUTOTUNER) ++ //===--------------------------------------------------------------------===// ++ // ++ // createAutotuningDumpPass - This pass collects IR of tuned regions ++ // and stores them into predetrmined locations. ++ // for the purpose of autotuning ML guidance ++ // ++ ModulePass *createAutotuningDumpPass(); ++#endif + } + + #endif +diff --git a/llvm/include/llvm/AutoTuner/AutoTuning.h b/llvm/include/llvm/AutoTuner/AutoTuning.h +new file mode 100644 +index 000000000000..0f1f276306ec +--- /dev/null ++++ b/llvm/include/llvm/AutoTuner/AutoTuning.h +@@ -0,0 +1,486 @@ ++#if defined(ENABLE_AUTOTUNER) ++//===-- AutoTuning.h - Auto-Tuning-----------------------------------------===// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines Auto Tuning related functions, models and interfaces. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_AUTOTUNER_AUTOTUNING_H_ ++#define LLVM_AUTOTUNER_AUTOTUNING_H_ ++ ++#include "llvm/ADT/DenseMapInfo.h" ++#include "llvm/ADT/Hashing.h" ++#include "llvm/ADT/SetVector.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/IR/DebugInfoMetadata.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/Support/Casting.h" ++#include ++#include ++#include ++#include ++#include ++ ++// Options for AutoTuner incremental compilation. ++enum AutoTuningCompileOpt { ++ Inactive, // Disabled incremental compilation. ++ CoarseGrain, // For tuning LLVMParam. ++ FineGrain, // For tuning default code regions (Loop, CallSite, Function). ++ Basic // Same as CoarseGrain but can be applied for any code region. ++ // Can be used with ImpactRanker. ++}; ++ ++namespace autotuning { ++// Constant defintion for AutoTuner incremental compilation. ++const std::string CompileOptionStart = "start"; ++const std::string CompileOptionEnd = "end"; ++const std::string CompileOptionUnknow = "unknown"; ++const std::string CompileOptionUnroll = "loop-unroll"; ++const std::string CompileOptionVectorize = "loop-vectorize"; ++const std::string CompileOptionInline = "inline"; ++ ++class ParameterBase { ++public: ++ virtual ~ParameterBase() = default; ++ enum ParameterKind { ++ PK_PARAMETER, ++ }; ++ ParameterKind getKind() const { return Kind; } ++ ++ explicit ParameterBase(ParameterKind K) : Kind(K) {} ++ ++private: ++ const ParameterKind Kind; ++}; ++ ++template class Parameter : public ParameterBase { ++public: ++ Parameter(const T &RHS) : ParameterBase(PK_PARAMETER), Value(RHS) {} ++ const T &getValue() const { return Value; } ++ void setValue(const T &RHS) { Value = RHS; } ++ ++ static bool classof(const ParameterBase *P) { ++ return P->getKind() == PK_PARAMETER; ++ } ++ ++private: ++ T Value; ++}; ++ ++/// This class manages parameters of one codeRegion. ++class ParameterManager { ++ ++public: ++ // add a param into this ParameterManager ++ template ++ void add(const std::string &ParamName, const T ParamValue) { ++ std::shared_ptr Param = ++ std::make_shared>(ParamValue); ++ this->Parameters[ParamName] = Param; ++ } ++ ++ // Look up the value of a parameter by name in this ParameterManager. ++ // The found value will be assigned to the reference variable "Value". ++ // Return true if the parameter exits in this ParameterManager, ++ // and false otherwise. ++ template ++ bool findByName(const std::string &ParamName, T &Value) const { ++ auto Iterator = Parameters.find(ParamName); ++ if (Iterator == Parameters.end()) { ++ return false; ++ } ++ ++ auto ParamPtr = llvm::dyn_cast>(Iterator->second.get()); ++ if (ParamPtr != nullptr) { ++ Value = ParamPtr->getValue(); ++ return true; ++ } else { ++ return false; ++ } ++ } ++ ++private: ++ std::unordered_map> Parameters; ++}; ++ ++/// The debug location used to track a CodeRegion back to the source file. ++struct SourceLocation { ++ /// The source file corresponding to this CodeRegion. ++ std::string SourceFilePath; ++ unsigned SourceLine = 0; ++ unsigned SourceColumn = 0; ++ ++ bool operator==(const SourceLocation &CR) const { ++ return (this->SourceFilePath == CR.SourceFilePath) && ++ (this->SourceLine == CR.SourceLine) && ++ (this->SourceColumn == CR.SourceColumn); ++ }; ++ ++ explicit operator bool() const { ++ return !(SourceFilePath.empty() && SourceLine == 0 && SourceColumn == 0); ++ } ++}; ++ ++enum CodeRegionType { ++ CallSite, // Code region for function inlining. ++ Function, // Used in AutoTuningDump pass for IR writing. ++ LLVMParam, // Compilation flags. Tuned individually for each module. ++ Loop, // Code region for loops. ++ MachineBasicBlock, // Instruction scheduling code region. ++ Other, // Pass ordering code region. ++ ProgramParam, // Compilation flags. Tuned collectively for program. ++ Switch, // Tuning MinJumpTableEntries parameter for switch inst. ++ Empty, // Empty CodeRegion. ++ Invalid // Invalid CodeRegion. ++}; ++ ++enum HotnessType { ++ Unknown, ++ Cold, ++ Hot, ++}; ++ ++/// DynamicOptions represent a map: Arg -> DynamicConfigs. ++/// Where Arg is a tuning parameter on the associated CodeRegion. ++/// And DynamicConfigs is the possible tuning values associated with Arg. ++typedef std::map> DynamicOptions; ++ ++/// This class represents a region in source code including ++/// its name, function name, type, debug location, and associated pass name. ++class CodeRegion { ++ ++public: ++ // Default constructor ++ CodeRegion(const CodeRegionType Type = CodeRegionType::Other); ++ ~CodeRegion() = default; ++ // Concrete constructors ++ CodeRegion(const std::string &Name, const std::string &FuncName, ++ const CodeRegionType &Type, const llvm::DebugLoc &DL, ++ const DynamicOptions DO = {}); ++ CodeRegion(const std::string &Name, const std::string &FuncName, ++ const CodeRegionType &Type, ++ const SourceLocation &Location = SourceLocation(), ++ const DynamicOptions DO = {}); ++ CodeRegion(const std::string &Name, const std::string &FuncName, ++ const std::string &PassName, const CodeRegionType &Type, ++ const SourceLocation &Location = SourceLocation(), ++ const unsigned int Invocation = 0); ++ ++ bool operator==(const CodeRegion &CR) const; ++ inline bool operator!=(const CodeRegion &CR) const { return !(*this == CR); }; ++ ++ explicit operator bool() const { ++ return !(Name.empty() && FuncName.empty() && PassName.empty()); ++ } ++ ++ static std::string getTypeAsString(CodeRegionType CRType); ++ static std::string getHotnessAsString(HotnessType Hotness); ++ const std::string &getName() const { return Name; } ++ const std::string &getFuncName() const { return FuncName; } ++ const CodeRegionType &getType() const { return Type; } ++ const std::string &getFileName() const { return Location.SourceFilePath; } ++ const std::string &getTypeAsString() const { return StringType; } ++ const SourceLocation &getSourceLoc() const { return Location; } ++ const std::string &getPassName() const { return PassName; } ++ unsigned getSize() const { return Size; }; ++ void setPassName(const std::string &NewPassName); ++ void setSize(unsigned Size) { this->Size = Size; }; ++ void setHotness(HotnessType NewHotness) const { this->Hotness = NewHotness; } ++ HotnessType getHotness() const { return this->Hotness; } ++ std::string getHotnessAsString() const { return getHotnessAsString(Hotness); } ++ bool isCold() const { return this->Hotness == Cold; } ++ bool isHot() const { return this->Hotness == Hot; } ++ std::uint64_t getHash() const { return this->Hash; } ++ void setHash(std::uint64_t Hash) { this->Hash = Hash; } ++ DynamicOptions getAutoTunerOptions() const { return this->AutoTunerOptions; } ++ void setInvocation(unsigned int Invocation) { this->Invocation = Invocation; } ++ unsigned int getInvocation() const { return this->Invocation; } ++ ++ /// Add dynamic config options with Code Region for AutoTuner to tune instead ++ /// of using static config options. ++ void addAutoTunerOptions(const std::string ParamName, ++ std::vector Options) const { ++ this->AutoTunerOptions.insert( ++ std::pair>(ParamName, Options)); ++ } ++ static CodeRegion getInvalidInstance(); ++ static CodeRegion getEmptyInstance(); ++ void setBaselineConfig(std::map Value) const { ++ this->BaselineConfig = Value; ++ }; ++ std::map getBaselineConfig() const { ++ return this->BaselineConfig; ++ } ++ ++private: ++ /// Name of the code region. ++ /// For most of cases it's set to the name of a header basic block. ++ std::string Name; ++ /// Function name of this code region if any. ++ std::string FuncName; ++ /// Name of the pass which this code region is associated. ++ std::string PassName; ++ /// Type of this code region. Options are other, function, loop, ++ /// and machine basic block. ++ CodeRegionType Type; ++ /// Source Location. ++ SourceLocation Location; ++ std::string StringType; ++ /// Structural hash for the CodeRegion. ++ std::uint64_t Hash = 0; ++ /// Configs values passed to AutoTuner for dynamic setting of search space ++ /// for code regions. ++ mutable DynamicOptions AutoTunerOptions; ++ /// Configuration values passed to AutoTuner for generating the same binary ++ /// as the baseline. ++ mutable std::map BaselineConfig; ++ ++ /// Record the order of invocation of an optimization pass during the whole ++ /// compilation pipeline. It is used to differentiate multiple invocations of ++ /// a same optimization pass. ++ /// Currently, Loop Unroll pass is invoked twice during the compilation ++ /// pipeline. 'Invocation' helps to relate a code region with the invocation ++ /// of Loop Unroll pass where the code region is generated. ++ mutable unsigned int Invocation; ++ ++ /// Size of this code region. Usually it refers to the number of instructions ++ /// but could be different based on implementations. ++ unsigned Size = 0; ++ mutable HotnessType Hotness = Unknown; ++ ++ /// A boolean flag to record if a CR is initialized or not. ++ /// It should only be set to true by initContainer(). ++ /// We only add initialized CR to TuningOpps. ++ bool Initialized = false; ++ ++ friend class AutoTuningEngine; ++}; ++ ++/// This class is an interface for classes representing code regions in LLVM ++/// (eg. Loop, Function and MachineBasicBlock) to inherit ++/// so that auto-tuning can be enabled on them. ++/// A Container must contain a CodeRegion. ++class Container { ++ ++public: ++ Container() {} ++ virtual ~Container(){}; ++ ++ /// Abstract method for derived classes to overwrite ++ virtual void initCodeRegion() = 0; ++ virtual uint64_t computeStructuralHash() = 0; ++ ++ /// Get the Container's CodeRegion. ++ const CodeRegion &getCodeRegion() const; ++ /// Set the Container's CodeRegion. ++ void setCodeRegion(const CodeRegion &NewCR); ++ /// This method is to look up the value of a parameter that corresponds to an ++ /// Container. The parameter being looked up is stored in a ParameterManager. ++ template ++ bool lookUpParams(const std::string &ParamsName, T &Value) const; ++ ++ /// Check if the code region is being tuned by config file. ++ bool requiresIRDump(bool IsFunctionIR = false) const; ++ ++private: ++ CodeRegion CR; ++ friend class AutoTuningEngine; ++}; ++} // end namespace autotuning ++ ++namespace std { ++template <> ++// Implement hash for CodeRegion data type in std namespace. Only using common ++// attributes (with and without using 'OmitAutotuningMetadata' flag) of ++// CodeRegion. Remaining attributes are compared in overloaded == function. ++struct hash { ++ std::size_t operator()(const autotuning::CodeRegion &CR) const { ++ return llvm::hash_combine(CR.getPassName(), CR.getType()); ++ } ++}; ++} // namespace std ++ ++namespace llvm { ++// Forward Decleration. ++class CallBase; ++ ++typedef autotuning::CodeRegion CodeRegion; ++template <> struct DenseMapInfo { ++ static bool isEqual(const CodeRegion &LHS, const CodeRegion &RHS) { ++ return LHS == RHS; ++ } ++ static inline CodeRegion getEmptyKey() { ++ return autotuning::CodeRegion::getEmptyInstance(); ++ } ++ static inline CodeRegion getTombstoneKey() { ++ return autotuning::CodeRegion::getInvalidInstance(); ++ } ++ // Implement hash for CodeRegion data type in llvm namespace. Only using ++ // common attributes (with and without using 'OmitAutotuningMetadata' flag) ++ // of CodeRegion. Remaining attributes are compared in overloaded == ++ // function. ++ static unsigned getHashValue(const CodeRegion &CR) { ++ return llvm::hash_combine(CR.getPassName(), CR.getType()); ++ } ++}; ++} // namespace llvm ++ ++namespace autotuning { ++using namespace llvm; ++typedef std::unordered_map LookUpTable; ++typedef llvm::SetVector CodeRegions; ++ ++/// Structure to store information of CallSite code regions which is used to ++/// get a different SourceLocation for multiple callsites (same callee) in a ++/// function when these callsites have same SourceLocation due to inlining. ++struct CallSiteLocation { ++ llvm::CallBase *CB; ++ llvm::Function *Caller; ++ llvm::Function *Callee; ++ SourceLocation SrcLoc; ++}; ++ ++class AutoTuningEngine { ++public: ++ AutoTuningEngine() { Enabled = false; } ++ ~AutoTuningEngine() {} ++ ++ /// Initialize the Container for auto-tuning. ++ void initContainer(Container *Container, const std::string &PassName, ++ const StringRef FuncName = "", bool AddOpportunity = true, ++ unsigned int Invocation = 0); ++ ++ /// Initialize auto-tuning. This method should only be called in the main ++ /// function. ++ /// \return Error::success() on success or the related Error otherwise. ++ llvm::Error init(const std::string &ModuleID); ++ ++ /// Finalize auto-tuning. This method should only be called in the main ++ /// function. ++ /// \return Error::success() on success or the related Error otherwise. ++ llvm::Error finalize(); ++ ++ /// Return the number of tuning configuration used for this compilation. ++ llvm::Expected getConfigNumber(); ++ ++ void enable() { Enabled = true; } ++ void disable() { Enabled = false; } ++ bool isEnabled() const { return Enabled; } ++ bool isMLEnabled() const { return MLEnabled; } ++ bool isDumpEnabled() const { return DumpEnabled; } ++ bool isGenerateOutput() const { return GenerateOutput; } ++ bool isParseInput() const { return ParseInput; } ++ bool isTuningAllowedForType(CodeRegionType CRType) const { ++ return (CodeRegionFilterTypes.count(CRType) > 0); ++ } ++ bool isThinLTOTuning() const; ++ ++ /// Convert a pass-name to CodeRegionType. ++ CodeRegionType convertPassToType(std::string Pass); ++ ++ /// First sets BaselineConfig value for the CR then ++ /// add a tuning opportunity into the TuningOpps list. ++ void addOpportunity(const CodeRegion &OppCR, ++ std::map BaselineConfig = {}); ++ bool hasOpportunities() const { return TuningOpps.empty(); } ++ ++ bool shouldRunOptPass(std::string FileName, std::string Pass); ++ ++ /// Insert all of the callsites of a function in CallSiteLocs vector. ++ void insertCallSiteLoc(CallSiteLocation Loc); ++ ++ /// Update CallSiteLocs vector with new callsites (if any) which get available ++ /// due to inlining. ++ void updateCallSiteLocs(llvm::CallBase *CB, llvm::CallBase *Ptr, ++ llvm::Function *F, unsigned int Line); ++ ++ /// Clean up the CallSiteLocs vector by keeping the callsite if there are ++ /// multiple calls to same callee. This cleaning will be perform before ++ /// inlining any callsite. ++ void cleanCallSiteLoc(); ++ ++ /// clear the CallSiteLocs vector. ++ void clearCallSiteLocs(); ++ ++ /// Return the SourceLocation::SourceLine (if available). ++ std::optional getCallSiteLoc(llvm::CallBase *CB); ++ ++ template ++ bool lookUpGlobalParams(const std::string &ParamsName, T &Value) const; ++ /// A map storing llvm parameters. ++ std::unordered_map LLVMParams; ++ /// A map storing program parameters. ++ std::unordered_map ProgramParams; ++ ++private: ++ std::string ModuleID; ++ /// This boolean indicates if the auto-tuning mode is enabled. ++ /// It will be set to true if the any of the following command line options ++ /// (auto-tuning-input, auto-tuning-result and auto-tuning-opp) is specified. ++ bool Enabled; ++ /// This boolean indicates if the ML guidance feature is enabled in ++ /// Autotuner. It will be set to true if -fautotune-rank is specified. ++ bool MLEnabled; ++ /// This boolean indicates if the IR dumping is enabled or not. IR dumping ++ /// is enabled for ML guidance feature. It can also be enabled with command ++ /// line compiler flag 'enable-autotuning-dump'. ++ bool DumpEnabled = false; ++ /// This boolean indicates if compiler is parsing/using 'config.yaml' file ++ /// generated by AutoTuner and use the configuration values instead of ++ /// determining with compiler heuristic. ++ bool ParseInput; ++ /// This boolean indicates if compiler is creating/generating opportunity ++ /// file(s) which will be consumed by AutoTuner to create the search space. ++ bool GenerateOutput; ++ /// A map of filename and set of optimization passes; an optimization pass ++ /// will be added to this set if a CodeRegion belongs to the optimization ++ /// pass. ++ std::unordered_map> OppPassList; ++ ++ /// Vector to store all of the duplicate calls in a function and the calls ++ /// which get available due to inlining. ++ SmallVector CallSiteLocs; ++ ++ /// A set to store the code region types that will be tuned in current ++ /// autotuning flow. This will be populated with code region types based on ++ /// 'auto-tuning-type-filter' for -fautotune-generate and the types will be ++ /// extracted from config.yaml in case of -fautotune. ++ /// This set is used to apply type-based filtering prior to creating/ ++ /// initializing a code region. ++ std::unordered_set CodeRegionFilterTypes; ++ ++ // A statically initialized map used to convert 'pass-name' to ++ // 'CodeRegionType'. ++ std::unordered_map PTTMap; ++ ++ /// A map of CodeRegion and ParameterManager to keep track of all the ++ /// parameters of code regions loaded from input config file. ++ LookUpTable ParamTable; ++ /// A list of CodeRegions as tuning opportunities ++ CodeRegions TuningOpps; ++ /// A ParameterManager for global parameters. ++ ParameterManager GlobalParams; ++ ++ /// Apply filters for CodeRegions. ++ void applyOppFilters(CodeRegions &CRs); ++ ++ /// Apply function name filter for CodeRegions. ++ bool applyFunctionFilter(std::string FuncName); ++ ++ friend class Container; ++ friend class CodeRegion; ++ friend class AutoTuningRemarkManager; ++}; ++ ++extern class AutoTuningEngine Engine; // AutoTuning Engine ++ ++} // end namespace autotuning ++ ++#endif /* LLVM_AUTOTUNER_AUTOTUNING_H_ */ ++#endif +diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h +new file mode 100644 +index 000000000000..153a2c6246ad +--- /dev/null ++++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h +@@ -0,0 +1,43 @@ ++#if defined(ENABLE_AUTOTUNER) ++//===- llvm/AutoTuner/AutoTuningRemarkManager.h - Remark Manager ----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares the main interface for inputting and outputting ++// remarks for AutoTuning. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_AUTOTUNINGREMARKMANAGER_H ++#define LLVM_AUTOTUNINGREMARKMANAGER_H ++ ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/Remarks/RemarkStreamer.h" ++#include "llvm/Support/Error.h" ++#include ++#include ++#include ++ ++namespace autotuning { ++class AutoTuningRemarkManager { ++public: ++ /// Read a list of parameters from input file. ++ /// Return true on success and false on failure. ++ static llvm::Error read(autotuning::AutoTuningEngine &E, ++ const std::string &InputName, ++ const std::string &RemarksFormat); ++ ++ /// Dump a list of CodeRegions as tuning opportunities into a file. ++ /// Return true on success and false on failure. ++ static llvm::Error dump(const autotuning::AutoTuningEngine &E, ++ const std::string &DirPath, ++ const std::string &RemarksFormat, ++ const std::string &RemarksPasses); ++}; ++} // namespace autotuning ++#endif // LLVM_AUTOTUNINGREMARKMANAGER_H ++#endif +diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h +new file mode 100644 +index 000000000000..0096139b12e9 +--- /dev/null ++++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h +@@ -0,0 +1,47 @@ ++#if defined(ENABLE_AUTOTUNER) ++// ===------------ llvm/AutoTuner/AutoTuningRemarkStreamer.h --------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. ++// ++// ===---------------------------------------------------------------------===// ++// ++// This file contains the implementation of the conversion between AutoTuner ++// CodeRegions and serializable remarks::Remark objects. ++// ++// ===---------------------------------------------------------------------===// ++ ++#ifndef LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H ++#define LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H ++ ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/Remarks/Remark.h" ++#include "llvm/Remarks/RemarkStreamer.h" ++#include "llvm/Support/Error.h" ++#include "llvm/Support/ToolOutputFile.h" ++#include ++#include ++ ++namespace llvm { ++/// Streamer for AutoTuner remarks which has logic for dealing with CodeRegions. ++class AutoTuningRemarkStreamer { ++ remarks::RemarkStreamer &RS; ++ /// Convert CodeRegion into remark objects. ++ remarks::Remark toRemark(const autotuning::CodeRegion &CR); ++ ++public: ++ AutoTuningRemarkStreamer(remarks::RemarkStreamer &RS) : RS(RS) {} ++ /// Emit a CodeRegion through the streamer. ++ void emit(const autotuning::CodeRegion &CR); ++ /// Set a pass filter based on a regex \p Filter. ++ /// Returns an error if the regex is invalid. ++ Error setFilter(StringRef Filter); ++}; ++} // end namespace llvm ++ ++#endif // LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H ++#endif +diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h +index 52388692c196..95ac9acf4e5e 100644 +--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h ++++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h +@@ -27,6 +27,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + namespace llvm { + +@@ -91,9 +94,19 @@ public: + void deleteNode(MachineInstr *MI); + }; + ++#if defined(ENABLE_AUTOTUNER) ++class MachineBasicBlock ++ : public ilist_node_with_parent, ++ public autotuning::Container { ++#else + class MachineBasicBlock + : public ilist_node_with_parent { ++#endif + public: ++#if defined(ENABLE_AUTOTUNER) ++ void initCodeRegion() override; ++ uint64_t computeStructuralHash() override; ++#endif + /// Pair of physical register and lane mask. + /// This is not simply a std::pair typedef because the members should be named + /// clearly as they both have an integer type. +diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h +index 93cf0d27e9a7..c0db48ae1789 100644 +--- a/llvm/include/llvm/IR/Function.h ++++ b/llvm/include/llvm/IR/Function.h +@@ -37,6 +37,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + namespace llvm { + +@@ -56,6 +59,24 @@ class User; + class BranchProbabilityInfo; + class BlockFrequencyInfo; + ++#if defined(ENABLE_AUTOTUNER) ++class AutoTuningEnabledFunction : public autotuning::Container { ++public: ++ AutoTuningEnabledFunction() = delete; ++ void initCodeRegion() override; ++ void setHot() { this->Hotness = autotuning::Hot; } ++ void setCold() { this->Hotness = autotuning::Cold; } ++ autotuning::HotnessType getHotness() const { return this->Hotness; } ++ uint64_t computeStructuralHash() override; ++ ++private: ++ AutoTuningEnabledFunction(Function *F) { Func = F; }; ++ Function *Func; ++ autotuning::HotnessType Hotness = autotuning::Unknown; ++ friend class Function; ++}; ++#endif ++ + class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, + public ilist_node { + public: +@@ -68,6 +89,13 @@ public: + using arg_iterator = Argument *; + using const_arg_iterator = const Argument *; + ++#if defined(ENABLE_AUTOTUNER) ++ // There is one-to-one correspondence between ATEFunction and the current ++ // Function object to avoid messing up the LLVM User and owned Use classes' ++ // memory layout. ++ AutoTuningEnabledFunction ATEFunction = AutoTuningEnabledFunction(this); ++#endif ++ + private: + // Important things that make up a function! + BasicBlockListType BasicBlocks; ///< The basic blocks +@@ -128,6 +156,11 @@ public: + void operator=(const Function&) = delete; + ~Function(); + ++#if defined(ENABLE_AUTOTUNER) ++ // Return the auto-tuning enabled version of this Function object. ++ AutoTuningEnabledFunction &getATEFunction() { return ATEFunction; } ++#endif ++ + // This is here to help easily convert from FunctionT * (Function * or + // MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling + // FunctionT->getFunction(). +@@ -840,7 +873,11 @@ public: + /// AssemblyAnnotationWriter. + void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr, + bool ShouldPreserveUseListOrder = false, ++#if defined(ENABLE_AUTOTUNER) ++ bool IsForDebug = false, bool PrintCompleteIR = false) const; ++#else + bool IsForDebug = false) const; ++#endif + + /// viewCFG - This function is meant for use from the debugger. You can just + /// say 'call F->viewCFG()' and a ghostview window should pop up from the +diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h +index 6095b0a1be69..dcc9bbee30fa 100644 +--- a/llvm/include/llvm/IR/InstrTypes.h ++++ b/llvm/include/llvm/IR/InstrTypes.h +@@ -1169,6 +1169,23 @@ public: + using OperandBundleDef = OperandBundleDefT; + using ConstOperandBundleDef = OperandBundleDefT; + ++#if defined(ENABLE_AUTOTUNER) ++//===----------------------------------------------------------------------===// ++// AutoTuningEnabledCallSite Class ++//===----------------------------------------------------------------------===// ++class CallBase; ++class AutoTuningEnabledCallSite : public autotuning::Container { ++public: ++ AutoTuningEnabledCallSite() = delete; ++ void initCodeRegion() override; ++ uint64_t computeStructuralHash() override; ++ AutoTuningEnabledCallSite(CallBase *CallBase) { CB = CallBase; } ++ ++private: ++ CallBase *CB; ++}; ++#endif ++ + //===----------------------------------------------------------------------===// + // CallBase Class + //===----------------------------------------------------------------------===// +@@ -1229,6 +1246,13 @@ protected: + unsigned getNumSubclassExtraOperandsDynamic() const; + + public: ++#if defined(ENABLE_AUTOTUNER) ++ // There is one-to-one correspondence between ATECallSite and CallBase class ++ // to enable auto-tuning. ++ std::unique_ptr ATECallSite = ++ std::make_unique(this); ++#endif ++ + using Instruction::getContext; + + /// Create a clone of \p CB with a different set of operand bundles and +diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h +index 8d60384e1a32..9d638af6eeef 100644 +--- a/llvm/include/llvm/IR/Instructions.h ++++ b/llvm/include/llvm/IR/Instructions.h +@@ -3287,6 +3287,23 @@ struct OperandTraits : public VariadicOperandTraits { + + DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) + ++#if defined(ENABLE_AUTOTUNER) ++//===----------------------------------------------------------------------===// ++// AutoTuningEnabledSwitchInst Class ++//===----------------------------------------------------------------------===// ++class SwitchInst; ++ ++class AutoTuningEnabledSwitchInst : public autotuning::Container { ++public: ++ AutoTuningEnabledSwitchInst() = delete; ++ void initCodeRegion() override; ++ uint64_t computeStructuralHash() override; ++ AutoTuningEnabledSwitchInst(SwitchInst *SwitchInst) { SI = SwitchInst; } ++ ++private: ++ SwitchInst *SI; ++}; ++#endif + //===----------------------------------------------------------------------===// + // SwitchInst Class + //===----------------------------------------------------------------------===// +@@ -3332,6 +3349,13 @@ protected: + public: + void operator delete(void *Ptr) { User::operator delete(Ptr); } + ++#if defined(ENABLE_AUTOTUNER) ++ // There is one-to-one correspondence between ATESwitchInst and ++ // SwitchInst class to enable AutoTuner. ++ std::unique_ptr ATESwitchInst = ++ std::make_unique(this); ++#endif ++ + // -2 + static const unsigned DefaultPseudoIndex = static_cast(~0L-1); + +diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h +index 670a40b28eab..904a450a1888 100644 +--- a/llvm/include/llvm/IR/Module.h ++++ b/llvm/include/llvm/IR/Module.h +@@ -38,6 +38,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + namespace llvm { + +diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h +index 1bdeb85afa3c..c0bcc8153eb8 100644 +--- a/llvm/include/llvm/IR/StructuralHash.h ++++ b/llvm/include/llvm/IR/StructuralHash.h +@@ -15,6 +15,9 @@ + #define LLVM_IR_STRUCTURALHASH_H + + #include ++#if defined(ENABLE_AUTOTUNER) ++#include ++#endif + + namespace llvm { + +@@ -24,6 +27,17 @@ class Module; + uint64_t StructuralHash(const Function &F); + uint64_t StructuralHash(const Module &M); + ++#if defined(ENABLE_AUTOTUNER) ++class MachineBasicBlock; ++class BasicBlock; ++class CallBase; ++class SwitchInst; ++ ++uint64_t StructuralHash(const std::vector BBs); ++uint64_t StructuralHash(const MachineBasicBlock &MBB); ++uint64_t StructuralHash(const CallBase &CB); ++uint64_t StructuralHash(const SwitchInst &SI); ++#endif + } // end namespace llvm + + #endif +diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h +index c6fee47b464b..80bec2d82e24 100644 +--- a/llvm/include/llvm/InitializePasses.h ++++ b/llvm/include/llvm/InitializePasses.h +@@ -340,6 +340,11 @@ void initializeWasmEHPreparePass(PassRegistry&); + void initializeWinEHPreparePass(PassRegistry&); + void initializeWriteBitcodePassPass(PassRegistry&); + void initializeXRayInstrumentationPass(PassRegistry&); ++#if defined(ENABLE_AUTOTUNER) ++void initializeAutotuningDumpLegacyPass(PassRegistry &); ++void initializeAutoTuningCompileFunctionLegacyPass(PassRegistry &); ++void initializeAutoTuningCompileModuleLegacyPass(PassRegistry &); ++#endif + + } // end namespace llvm + +diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h +index 7420ea64e954..3a8ecb1399f1 100644 +--- a/llvm/include/llvm/LinkAllPasses.h ++++ b/llvm/include/llvm/LinkAllPasses.h +@@ -54,6 +54,9 @@ + #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" + #include "llvm/Transforms/Vectorize.h" + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/Transforms/Scalar/AutoTuningCompile.h" ++#endif + + namespace { + struct ForcePassLinking { +@@ -93,6 +96,11 @@ namespace { + (void) llvm::createInstSimplifyLegacyPass(); + (void) llvm::createInstructionCombiningPass(); + (void) llvm::createJMCInstrumenterPass(); ++#if defined(ENABLE_AUTOTUNER) ++ (void) llvm::createAutotuningDumpPass(); ++ (void) llvm::createAutoTuningCompileFunctionLegacyPass(); ++ (void) llvm::createAutoTuningCompileModuleLegacyPass(); ++#endif + (void) llvm::createKCFIPass(); + (void) llvm::createLCSSAPass(); + (void) llvm::createLICMPass(); +diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h +index a66f7ed73f2f..3bcc0c710498 100644 +--- a/llvm/include/llvm/Remarks/Remark.h ++++ b/llvm/include/llvm/Remarks/Remark.h +@@ -20,6 +20,10 @@ + #include "llvm/Support/raw_ostream.h" + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include ++#include ++#endif + + namespace llvm { + namespace remarks { +@@ -47,6 +51,9 @@ struct Argument { + StringRef Key; + // FIXME: We might want to be able to store other types than strings here. + StringRef Val; ++#if defined(ENABLE_AUTOTUNER) ++ std::optional> VectorVal; ++#endif + // If set, the debug location corresponding to the value. + std::optional Loc; + +@@ -65,6 +72,9 @@ enum class Type { + Analysis, + AnalysisFPCommute, + AnalysisAliasing, ++#if defined(ENABLE_AUTOTUNER) ++ AutoTuning, ++#endif + Failure, + First = Unknown, + Last = Failure +@@ -105,6 +115,28 @@ struct Remark { + /// Mangled name of the function that triggers the emssion of this remark. + StringRef FunctionName; + ++#if defined(ENABLE_AUTOTUNER) ++ /// Type of the code region that the remark is associated with. ++ std::optional CodeRegionType; ++ ++ /// Configuration value for generating the same baseline binary associated ++ /// with this remark. ++ std::optional> BaselineConfig; ++ ++ /// Hash of the code region that the remark is associated with. ++ std::optional CodeRegionHash; ++ ++ /// Configs values passed to AutoTuner for dynamic setting of search space ++ /// for code regions. ++ std::optional>> ++ AutoTunerOptions; ++ ++ /// Invocation/Registering of Optimization Pass in the compilation pipeline. ++ /// It is used to differentiate between different invocations of same ++ /// optimization pass. ++ std::optional Invocation; ++#endif ++ + /// The location in the source file of the remark. + std::optional Loc; + +diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h +index d2079fead668..c59dba2749f0 100644 +--- a/llvm/include/llvm/Support/CommandLine.h ++++ b/llvm/include/llvm/Support/CommandLine.h +@@ -40,6 +40,9 @@ + #include + #include + ++#if defined(ENABLE_AUTOTUNER) ++#include ++#endif + namespace llvm { + + namespace vfs { +@@ -72,6 +75,20 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, + const char *EnvVar = nullptr, + bool LongOptionsUseDoubleDash = false); + ++#if defined(ENABLE_AUTOTUNER) ++// It will parse AutoTuner options (LLVMParams & ProgramParams) and add them as ++// command line flags for the compilation process. These options are suggested ++// by AutoTuner during tuning flow. This function will always be called after ++// AutoTuner initialization. ++// Returns true on success. Otherwise, this will print the error message to ++// stderr and exit. ++bool ParseAutoTunerOptions( ++ std::unordered_map LLVMParams, ++ std::unordered_map ProgramParams, ++ StringRef Overview = "", raw_ostream *Errs = nullptr, ++ const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); ++#endif ++ + // Function pointer type for printing version information. + using VersionPrinterTy = std::function; + +diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h +index aaba710cfde6..e69beeade947 100644 +--- a/llvm/include/llvm/Transforms/Scalar.h ++++ b/llvm/include/llvm/Transforms/Scalar.h +@@ -16,6 +16,10 @@ + + #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/Pass.h" ++#include ++#endif + + namespace llvm { + +@@ -299,6 +303,19 @@ Pass *createLoopSimplifyCFGPass(); + // + FunctionPass *createInstSimplifyLegacyPass(); + ++#if defined(ENABLE_AUTOTUNER) ++//===--------------------------------------------------------------------===// ++// ++// createAutotuningCompilePass - It writes IR files with -fautotune-generate ++// for autotuning flow. It also enables/disables the execution of optimization ++// passes in subsequent compilations (with -fautotune) based on autotuning ++// methodology and available opportunities. ++// ++FunctionPass * ++createAutoTuningCompileFunctionLegacyPass(std::string Pass = "unknown"); ++ModulePass * ++createAutoTuningCompileModuleLegacyPass(std::string Pass = "unknown"); ++#endif + + //===----------------------------------------------------------------------===// + // +diff --git a/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h +new file mode 100644 +index 000000000000..2cbb48f336ef +--- /dev/null ++++ b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h +@@ -0,0 +1,170 @@ ++#if defined(ENABLE_AUTOTUNER) ++//===---------------- AutoTuningCompile.h - Auto-Tuning -------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. ++// ++//===----------------------------------------------------------------------===// ++// ++/// \file ++/// This file declares the interface for AutoTuning Incremental Compilation. ++/// Incremental compilation requires two passes 1) Module Pass and 2) Function ++/// Pass for legacy pass manager. It requires an additional Loop Pass for new ++/// pass manager. ++/// AutoTuningOptPassGate class is also defined here which is used to enable/ ++/// disable the execution of optimization passes for the compilation pipeline. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ ++#define LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ ++ ++#include "llvm/Analysis/LoopAnalysisManager.h" ++#include "llvm/Analysis/LoopInfo.h" ++#include "llvm/Analysis/LoopPass.h" ++#include "llvm/IR/OptBisect.h" ++#include "llvm/IR/PassManager.h" ++#include "llvm/Pass.h" ++#include "llvm/Transforms/Scalar/LoopPassManager.h" ++ ++namespace llvm { ++ ++class Pass; ++ ++// Skips or runs optimization passes. ++class AutoTuningOptPassGate : public OptPassGate { ++public: ++ explicit AutoTuningOptPassGate(bool Skip = false) : Skip(Skip) {} ++ ++ bool shouldRunPass(const StringRef PassName, ++ StringRef IRDescription) override; ++ bool isEnabled() const override { return true; } ++ bool checkPass(const StringRef PassName, const StringRef TargetDesc); ++ void setSkip(bool Skip) { this->Skip = Skip; } ++ bool getSkip() const { return Skip; } ++ ++private: ++ bool Skip; ++}; ++ ++// Returns a static AutoTuningOptPassGate object which will be used to register ++// CallBack for OptBisect instrumentation. ++// It will also be used by AutoTuningCompile passes to enable/disable ++// optimization passes. ++AutoTuningOptPassGate &getAutoTuningOptPassGate(); ++ ++class AutoTuningCompileModule { ++public: ++ explicit AutoTuningCompileModule(std::string Pass = "unknown"); ++ bool run(Module &M); ++ // Write IR files for each module to be re-used in subsequent compilations ++ // for autotuning cycles. It only works with -fautotune-generate. ++ void writeIRFiles(Module &M) const; ++ // Enable/Disable execution of optimization passes in subsequent compilations ++ // based on autotuning methodology and available opportunities. It Only works ++ // with -fautotune ++ bool modifyCompilationPipeline(Module &M) const; ++ ++ static void setSkipCompilation(bool Option) { SkipCompilation = Option; } ++ static bool getSkipCompilation() { return SkipCompilation; } ++ ++private: ++ static bool SkipCompilation; ++ std::string Pass = ""; ++}; ++ ++class AutoTuningCompileModuleLegacy : public ModulePass { ++public: ++ static char ID; ++ explicit AutoTuningCompileModuleLegacy(std::string Pass = "unknown"); ++ bool runOnModule(Module &M) override; ++ StringRef getPassName() const override; ++ void getAnalysisUsage(AnalysisUsage &AU) const override { ++ AU.setPreservesAll(); ++ } ++ ++private: ++ std::string Pass = ""; ++}; ++ ++class AutoTuningCompileModulePass ++ : public PassInfoMixin { ++public: ++ explicit AutoTuningCompileModulePass(std::string Pass = "unknown") ++ : Pass(Pass){}; ++ PreservedAnalyses run(Module &M, ModuleAnalysisManager &); ++ ++private: ++ std::string Pass = ""; ++}; ++ ++class AutoTuningCompileFunction { ++public: ++ explicit AutoTuningCompileFunction(std::string Pass = "unknown"); ++ bool run(Function &F); ++ // Write IR files for each module to be re-used in subsequent compilations ++ // for autotuning cycles. It only works with -fautotune-generate. ++ void writeIRFiles(Module &M); ++ // Enable/Disable execution of optimization passes in subsequent compilations ++ // based on autotuning methodology and available opportunities. It Only works ++ // with -fautotune ++ bool modifyCompilationPipeline(Function &F); ++ ++private: ++ // A module may have multiple functions; decision to enable/disable ++ // execution of an optimization pass will be made for the first function and ++ // will be used for all of the functions in the module. ++ // 'SkipDecision' will be set once the decision is made for a specific 'Pass'. ++ bool SkipDecision = false; ++ ++ // A module may have multiple functions; IR file will be written once for the ++ // entire module for a specific 'Pass'. ++ bool IsModuleWritten = false; ++ std::string Pass = ""; ++}; ++ ++class AutoTuningCompileFunctionLegacy : public FunctionPass { ++public: ++ static char ID; ++ explicit AutoTuningCompileFunctionLegacy(std::string Pass = "unknown"); ++ bool runOnFunction(Function &F) override; ++ StringRef getPassName() const override; ++ void getAnalysisUsage(AnalysisUsage &AU) const override { ++ AU.setPreservesAll(); ++ } ++ ++private: ++ std::string Pass = ""; ++}; ++ ++class AutoTuningCompileFunctionPass ++ : public PassInfoMixin { ++public: ++ explicit AutoTuningCompileFunctionPass(std::string Pass = "unknown") ++ : Pass(Pass){}; ++ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); ++ ++private: ++ std::string Pass = ""; ++}; ++ ++class AutoTuningCompileLoopPass ++ : public PassInfoMixin { ++public: ++ explicit AutoTuningCompileLoopPass(std::string Pass = "unknown") ++ : Pass(Pass){}; ++ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, ++ LoopStandardAnalysisResults &AR, LPMUpdater &U); ++ ++private: ++ std::string Pass = ""; ++}; ++ ++} // end namespace llvm ++ ++#endif /* LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ */ ++#endif +diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +index 4f3010965b59..e1cccf417898 100644 +--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h ++++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +@@ -108,7 +108,11 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, + unsigned TripMultiple, unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, ++#if defined(ENABLE_AUTOTUNER) ++ bool &UseUpperBound, unsigned int Invocation = 0); ++#else + bool &UseUpperBound); ++#endif + + void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, +diff --git a/llvm/lib/Analysis/AutotuningDump.cpp b/llvm/lib/Analysis/AutotuningDump.cpp +new file mode 100644 +index 000000000000..81b2bbead70e +--- /dev/null ++++ b/llvm/lib/Analysis/AutotuningDump.cpp +@@ -0,0 +1,265 @@ ++#if defined(ENABLE_AUTOTUNER) ++// ===-- AutotuningDump.cpp - Auto-Tuning---------------------------------===// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// ===--------------------------------------------------------------------===// ++// ++// This file contains pass collecting IR of tuned regions and storing them into ++// predetrmined locations, to be used later by autotuning ML guidance ++// ++// ===--------------------------------------------------------------------===// ++#include "llvm/Analysis/AutotuningDump.h" ++#include "llvm/Analysis/Passes.h" ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/IR/LegacyPassManager.h" ++#include "llvm/InitializePasses.h" ++#include "llvm/Pass.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Path.h" ++#include "llvm/Support/Process.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "autotuning-dump" ++ ++enum AutotuningDumpOpt { whole_modules, functions, loops }; ++ ++// Enable Debug Options to be specified on the command line ++cl::opt AutotuningDumpMode( ++ "autotuning-dump-mode", cl::desc("Choose autotuning dump mode:"), ++ cl::init(whole_modules), ++ cl::values(clEnumVal(whole_modules, "dump each module in its own file"), ++ clEnumVal(functions, "dump each function in its own file"), ++ clEnumVal(loops, "dump each loop in its own file"))); ++ ++AutotuningDump::AutotuningDump(bool IncrementalCompilation) { ++ // Check if the environment variable AUTOTUNE_DATADIR is set. ++ IsIncrementalCompilation = IncrementalCompilation; ++ AutoTuneDirPath = "autotune_datadir"; ++ if (std::optional MaybePath = ++ llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR")) ++ AutoTuneDirPath = *MaybePath; ++} ++ ++int AutotuningDump::getConfigNumber() { ++ auto ConfigNumOrErr = autotuning::Engine.getConfigNumber(); ++ if (ConfigNumOrErr) ++ return *ConfigNumOrErr; ++ else { ++ report_fatal_error("Invalid/missing Autotuner configuration ID"); ++ return -1; ++ } ++} ++ ++void AutotuningDump::dumpToStream(llvm::raw_ostream &os, const Loop &L) const { ++ L.print(os); ++} ++ ++void AutotuningDump::dumpToStream(llvm::raw_ostream &os, ++ const Function &F) const { ++ F.print(os, /*AAW*/ nullptr, /*ShouldPreserveUseListOrder*/ false, ++ /*IsForDebug*/ false, /*PrintCompleteIR*/ true); ++} ++ ++// Create appropriate file. File will contains AbsolutePath/FileName. ++std::unique_ptr AutotuningDump::createFile(const Twine &File) { ++ std::error_code EC; ++ return std::make_unique((File).str(), EC, ++ sys::fs::CD_CreateAlways, ++ sys::fs::FA_Write, sys::fs::OF_None); ++} ++ ++std::string AutotuningDump::getDirectoryName(const std::string File) const { ++ std::string DirectoryName = AutoTuneDirPath; ++ if (!autotuning::Engine.isMLEnabled()) ++ DirectoryName += "/IR_files"; ++ ++ DirectoryName = DirectoryName + "/" + File + "/"; ++ ++ // Create directory if not already present. ++ if (std::error_code EC = sys::fs::create_directories(DirectoryName)) ++ errs() << "could not create directory: " << DirectoryName << ": " ++ << EC.message(); ++ ++ return DirectoryName; ++} ++ ++std::string AutotuningDump::getFileName(std::string FilePath) { ++ if (autotuning::Engine.isMLEnabled()) ++ return std::to_string(this->getConfigNumber()) + ".ll"; ++ std::replace(FilePath.begin(), FilePath.end(), '/', '_'); ++ return FilePath + ".ll"; ++} ++ ++void AutotuningDump::dumpModule(Module &M) { ++ std::unique_ptr fptr; ++ LLVM_DEBUG(dbgs() << "AutotuningDump: Dump module IR files.\n"); ++ if (IsIncrementalCompilation) { ++ std::string Filename = M.getSourceFileName(); ++ llvm::SmallString<128> FilenameVec = StringRef(Filename); ++ llvm::sys::fs::make_absolute(FilenameVec); ++ size_t Pos = FilenameVec.rfind("."); ++ if (Pos != std::string::npos) { ++ FilenameVec.pop_back_n(FilenameVec.size() - Pos); ++ FilenameVec.append(".ll"); ++ } ++ fptr = createFile(FilenameVec); ++ } else { ++ std::string File = llvm::sys::path::filename(M.getName()).str(); ++ std::string DirectoryName = getDirectoryName(File); ++ std::string FileName = getFileName(M.getName().str()); ++ fptr = createFile(DirectoryName + FileName); ++ } ++ ++ M.print(*fptr, nullptr, true, false); ++} ++ ++void AutotuningDump::dumpFunctions(Module &M) { ++ std::string FilePath = M.getName().str(); ++ std::replace(FilePath.begin(), FilePath.end(), '/', '_'); ++ std::string DirectoryName = getDirectoryName(FilePath); ++ for (Function &F : M.getFunctionList()) { // go through all functions ++ if (F.isDeclaration() || F.empty()) ++ continue; ++ ++ AutoTuningEnabledFunction *AutotuneFunc = &F.getATEFunction(); ++ assert(AutotuneFunc); ++ autotuning::Engine.initContainer(AutotuneFunc, "autotuning-dump", ++ F.getName(), false); ++ std::string FuncName = F.getName().str(); ++ // check the whole function ++ if (AutotuneFunc->requiresIRDump(true)) { ++ auto fptr = createFile(DirectoryName + Twine(FuncName) + ".ll"); ++ this->dumpToStream(*fptr, F); ++ } ++ } ++} ++ ++void AutotuningDump::dumpLoops(Module &M, ++ function_ref GetLI) { ++ for (Function &F : M) { ++ // Nothing to do for declarations. ++ if (F.isDeclaration() || F.empty()) ++ continue; ++ ++ LoopInfo &LI = GetLI(F); ++ for (auto &L : LI.getLoopsInPreorder()) { ++ Function *Func = nullptr; ++ StringRef FuncName = ""; ++ if (!L->isInvalid()) ++ Func = L->getHeader()->getParent(); ++ if (Func) ++ FuncName = Func->getName(); ++ ++ autotuning::Engine.initContainer(L, "autotuning-dump", FuncName, false); ++ if (L->requiresIRDump()) { ++ std::string FuncName = L->getCodeRegion().getFuncName(); ++ unsigned SourceLine = L->getCodeRegion().getSourceLoc().SourceLine; ++ std::string DirectoryName = AutoTuneDirPath + "/" + ++ llvm::sys::path::filename(FuncName).str() + ++ "_loop_" + std::to_string(SourceLine); ++ std::string FileName = std::to_string(this->getConfigNumber()) + ".ll"; ++ auto fptr = createFile(DirectoryName + "/" + FileName); ++ this->dumpToStream(*fptr, *L); ++ } ++ } ++ } ++} ++ ++bool AutotuningDump::run(Module &M, ++ function_ref GetLI) { ++ // Change to absolute path. ++ SmallString<256> OutputPath = StringRef(AutoTuneDirPath); ++ sys::fs::make_absolute(OutputPath); ++ ++ // Creating new output directory, if it does not exists. ++ if (std::error_code EC = sys::fs::create_directories(OutputPath)) { ++ llvm::errs() << (make_error( ++ "could not create directory: " + Twine(OutputPath) + ": " + ++ EC.message(), ++ EC)); ++ return false; ++ } ++ ++ if (IsIncrementalCompilation) { ++ LLVM_DEBUG( ++ dbgs() ++ << "AutotuningDump: IR files writing for incremental compilation.\n"); ++ dumpModule(M); ++ return false; ++ } ++ ++ switch (AutotuningDumpMode) { ++ case whole_modules: ++ dumpModule(M); ++ break; ++ case functions: ++ dumpFunctions(M); ++ break; ++ case loops: ++ dumpLoops(M, GetLI); ++ } ++ ++ return false; ++} ++ ++AutotuningDumpLegacy::AutotuningDumpLegacy(bool IncrementalCompilation) ++ : ModulePass(AutotuningDumpLegacy::ID) { ++ IsIncrementalCompilation = IncrementalCompilation; ++ initializeAutotuningDumpLegacyPass(*PassRegistry::getPassRegistry()); ++} ++ ++bool AutotuningDumpLegacy::runOnModule(Module &M) { ++ if (!autotuning::Engine.isDumpEnabled()) ++ return false; ++ ++ auto GetLI = [this](Function &F) -> LoopInfo & { ++ return getAnalysis(F).getLoopInfo(); ++ }; ++ ++ AutotuningDump Impl(IsIncrementalCompilation); ++ return Impl.run(M, GetLI); ++} ++ ++StringRef AutotuningDumpLegacy::getPassName() const { ++ return "Autotuning Dump"; ++} ++ ++void AutotuningDumpLegacy::getAnalysisUsage(AnalysisUsage &AU) const { ++ AU.setPreservesAll(); ++ AU.addRequired(); ++} ++ ++char AutotuningDumpLegacy::ID = 0; ++INITIALIZE_PASS_BEGIN(AutotuningDumpLegacy, "autotuning-dump", ++ "Dump IR for Autotuned Code Regions", false, false) ++INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) ++INITIALIZE_PASS_END(AutotuningDumpLegacy, "autotuning-dump", ++ "Dump IR for Autotuned Code Regions", false, false) ++ ++ModulePass *llvm::createAutotuningDumpPass() { ++ return new AutotuningDumpLegacy(); ++} ++ ++AnalysisKey AutotuningDumpAnalysis::Key; ++ ++AutotuningDumpAnalysis::Result ++AutotuningDumpAnalysis::run(Module &M, ModuleAnalysisManager &AM) { ++ if (!autotuning::Engine.isDumpEnabled()) ++ return false; ++ ++ auto &FAM = AM.getResult(M).getManager(); ++ auto GetLI = [&FAM](Function &F) -> LoopInfo & { ++ return FAM.getResult(F); ++ }; ++ ++ AutotuningDump Impl(IsIncrementalCompilation); ++ Impl.run(M, GetLI); ++ return false; ++} ++#endif +diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt +index 4a1797c42789..9c6a70f0221f 100644 +--- a/llvm/lib/Analysis/CMakeLists.txt ++++ b/llvm/lib/Analysis/CMakeLists.txt +@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMAnalysis + Analysis.cpp + AssumeBundleQueries.cpp + AssumptionCache.cpp ++ AutotuningDump.cpp + BasicAliasAnalysis.cpp + BlockFrequencyInfo.cpp + BlockFrequencyInfoImpl.cpp +@@ -153,6 +154,7 @@ add_llvm_component_library(LLVMAnalysis + ${MLLinkDeps} + + LINK_COMPONENTS ++ AutoTuner + BinaryFormat + Core + Object +diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp +index e2480d51d372..f6b3c14a0345 100644 +--- a/llvm/lib/Analysis/InlineAdvisor.cpp ++++ b/llvm/lib/Analysis/InlineAdvisor.cpp +@@ -383,15 +383,27 @@ llvm::shouldInline(CallBase &CB, + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); + ++#if defined(ENABLE_AUTOTUNER) ++ // Get the code Region to add BaselineConfig values for inline ++ const autotuning::CodeRegion &CR = CB.ATECallSite.get()->getCodeRegion(); ++ static const std::string ForceInlineParamStr = "ForceInline"; ++#endif ++ + if (IC.isAlways()) { + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) + << ", Call: " << CB << "\n"); ++#if defined(ENABLE_AUTOTUNER) ++ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); ++#endif + return IC; + } + + if (!IC) { + LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) + << ", Call: " << CB << "\n"); ++#if defined(ENABLE_AUTOTUNER) ++ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); ++#endif + if (IC.isNever()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) +@@ -417,6 +429,9 @@ llvm::shouldInline(CallBase &CB, + LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB + << " Cost = " << IC.getCost() + << ", outer Cost = " << TotalSecondaryCost << '\n'); ++#if defined(ENABLE_AUTOTUNER) ++ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); ++#endif + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", + Call) +@@ -430,6 +445,9 @@ llvm::shouldInline(CallBase &CB, + + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB + << '\n'); ++#if defined(ENABLE_AUTOTUNER) ++ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); ++#endif + return IC; + } + +diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp +index a2f46edcf5ef..9f8f57865de2 100644 +--- a/llvm/lib/Analysis/InlineCost.cpp ++++ b/llvm/lib/Analysis/InlineCost.cpp +@@ -162,6 +162,14 @@ static cl::opt DisableGEPConstOperand( + "disable-gep-const-evaluation", cl::Hidden, cl::init(false), + cl::desc("Disables evaluation of GetElementPtr with constant operands")); + ++#if defined(ENABLE_AUTOTUNER) ++static cl::opt ++ EnableLocalCallSiteTuning("auto-tuning-enable-local-callsite-tuning", ++ cl::init(false), cl::Hidden, ++ cl::desc("Enable AutoTuning for local callsites " ++ "as well.")); ++#endif ++ + namespace llvm { + std::optional getStringFnAttrAsInt(const Attribute &Attr) { + if (Attr.isValid()) { +@@ -2990,6 +2998,27 @@ InlineCost llvm::getInlineCost( + return llvm::InlineCost::getNever(UserDecision->getFailureReason()); + } + ++#if defined(ENABLE_AUTOTUNER) ++ if (autotuning::Engine.isEnabled() && Call.getCaller() && ++ (!Callee->hasLocalLinkage() || EnableLocalCallSiteTuning)) { ++ bool ForceInline = false; ++ bool Found = false; ++ ++ autotuning::Engine.initContainer(Call.ATECallSite.get(), "inline", ++ Call.getCaller()->getName(), ++ /* addOpportunity */ false); ++ ++ Found = Call.ATECallSite->lookUpParams("ForceInline", ForceInline); ++ ++ if (Found) { ++ if (ForceInline) ++ return llvm::InlineCost::getAlways("Force inlined by auto-tuning"); ++ else ++ return llvm::InlineCost::getNever("Force non-inlined by auto-tuning"); ++ } ++ } ++#endif ++ + LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "... (caller:" << Call.getCaller()->getName() + << ")\n"); +diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp +index 60a72079e864..36aca73ee675 100644 +--- a/llvm/lib/Analysis/LoopInfo.cpp ++++ b/llvm/lib/Analysis/LoopInfo.cpp +@@ -37,6 +37,10 @@ + #include "llvm/Support/CommandLine.h" + #include "llvm/Support/GenericLoopInfoImpl.h" + #include "llvm/Support/raw_ostream.h" ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/IR/StructuralHash.h" ++#endif + using namespace llvm; + + // Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. +@@ -663,6 +667,54 @@ Loop::LocRange Loop::getLocRange() const { + return LocRange(); + } + ++#if defined(ENABLE_AUTOTUNER) ++uint64_t Loop::computeStructuralHash() { ++ std::vector BBs = getBlocks(); ++ return StructuralHash(BBs); ++} ++ ++void Loop::initCodeRegion() { ++ std::string LoopName; ++ // use the header's name as the loop name ++ if (BasicBlock *Header = getHeader()) { ++ if (Header->hasName()) { ++ LoopName = Header->getName().str(); ++ } ++ // if the header doesn't have a name, ++ // use the label of this header from AsmWriter ++ else { ++ std::string Str; ++ llvm::raw_string_ostream RSO(Str); ++ Header->printAsOperand(RSO); ++ LoopName = RSO.str(); ++ } ++ } else { ++ LoopName = ""; ++ } ++ ++ Function *F = this->getHeader()->getParent(); ++ StringRef FuncName = F->getName(); ++ ++ // init the CodeRegion ++ autotuning::CodeRegion CR = autotuning::CodeRegion( ++ LoopName, FuncName.data(), autotuning::CodeRegionType::Loop, ++ this->getStartLoc()); ++ // Compute the number of non-debug IR instructions in this loop. ++ unsigned TotalNumInstrs = 0; ++ for (const BasicBlock *BB : this->getBlocks()) { ++ unsigned NumInstrs = std::distance(BB->instructionsWithoutDebug().begin(), ++ BB->instructionsWithoutDebug().end()); ++ TotalNumInstrs += NumInstrs; ++ } ++ CR.setSize(TotalNumInstrs); ++ // Compute hotness. ++ autotuning::HotnessType Hotness = F->ATEFunction.getHotness(); ++ CR.setHotness(Hotness); ++ ++ this->setCodeRegion(CR); ++} ++#endif ++ + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } + +diff --git a/llvm/lib/AutoTuner/AutoTuning.cpp b/llvm/lib/AutoTuner/AutoTuning.cpp +new file mode 100644 +index 000000000000..1f09f06d84a2 +--- /dev/null ++++ b/llvm/lib/AutoTuner/AutoTuning.cpp +@@ -0,0 +1,705 @@ ++#if defined(ENABLE_AUTOTUNER) ++//===-- AutoTuning.cpp - Auto-Tuning --------------------------------------===// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines Auto Tuning related functions, models and interfaces. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/AutoTuner/AutoTuningRemarkManager.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Error.h" ++#include "llvm/Support/Process.h" ++ ++// Enable debug messages for AutoTuning. ++#define DEBUG_TYPE "autotuning" ++ ++using namespace llvm; ++ ++// defined in 'lib/Remarks/YAMLRemarkParser.cpp'. ++extern cl::opt OmitAutotuningMetadata; ++ ++// -auto-tuning-input - Command line option to specify the input file. ++static cl::opt InputFile("auto-tuning-input", cl::Hidden, ++ cl::desc("Specify the input file")); ++ ++// -auto-tuning-opp - Command line option to specify the output directory of ++// tuning opportunities. ++static cl::opt OutputOppDir( ++ "auto-tuning-opp", cl::Hidden, ++ cl::desc("Specify the output directory of tuning opportunities")); ++ ++static cl::opt ++ RemarksPasses("auto-tuning-pass-filter", cl::Hidden, ++ cl::desc("Only dump auto-tuning remarks from passes whose " ++ "names match the given regular expression"), ++ cl::value_desc("regex")); ++ ++static cl::opt ++ ProjectDir("autotuning-project-dir", cl::Hidden, cl::init(""), ++ cl::desc("Specify project base dir to make code region name " ++ "relative to base dir. This operation will only be " ++ "applied for coarse-grain code regions.")); ++ ++// -auto-tuning-config-id - Command line option to specify the config number ++// being used for compilation. Required only for ML guidance feature. ++static cl::opt CFGNumber( ++ "auto-tuning-config-id", cl::Hidden, ++ cl::desc( ++ "Specify the auto-tuning configuration ID used in this compilation.")); ++ ++static cl::opt OutputFormat( ++ "auto-tuning-remark-format", cl::Hidden, ++ cl::desc("The format used for auto-tuning remarks (default: YAML)"), ++ cl::value_desc("format"), cl::init("yaml")); ++ ++// AutoTuner incremental compilation options. ++cl::opt AutoTuningCompileMode( ++ "auto-tuning-compile-mode", cl::Hidden, cl::init(Inactive), ++ cl::desc("AutoTuner: Choose incremental compilation mode."), ++ cl::values(clEnumVal(Inactive, ++ "AutoTuner: Disable incremental compilation."), ++ clEnumVal(CoarseGrain, "AutoTuner: Enable incremental " ++ "compilation for coarse grain tuning."), ++ clEnumVal(FineGrain, "AutoTuner: Enable incremental compilation " ++ "for fine grain tuning."), ++ clEnumVal(Basic, "AutoTuner: Enable incremental compilation for " ++ "any kind of code region."))); ++ ++static cl::opt ++ EnableAutoTuningDump("enable-autotuning-dump", cl::Hidden, cl::init(false), ++ cl::desc("Enable AutoTuningDump Pass")); ++ ++static cl::opt ++ ThinLTOTuning("autotuning-thin-lto", cl::Hidden, cl::init(false), ++ cl::desc("AutoTuner enabled in ThinLTO mode.")); ++ ++namespace autotuning { ++ ++static cl::list AutotuningOutputFilter( ++ "auto-tuning-type-filter", cl::Hidden, cl::CommaSeparated, ++ cl::desc( ++ "Select types of code regions to dump auto-tuning opportunities for:"), ++ cl::values(clEnumVal(LLVMParam, "LLVMParam code regions only"), ++ clEnumVal(ProgramParam, "ProgramParam code regions only"), ++ clEnumVal(CallSite, "CallSite code regions only"), ++ clEnumVal(Function, "Function code regions only"), ++ clEnumVal(Loop, "Loop code regions only"), ++ clEnumVal(MachineBasicBlock, ++ "Machine basic block code regions only"), ++ clEnumVal(Switch, "Switch code regions only"), ++ clEnumVal(Other, "All other types of code regions"))); ++ ++static cl::list AutotuningFunctionFilter( ++ "auto-tuning-function-filter", cl::Hidden, cl::CommaSeparated, ++ cl::desc("Apply code region filtering based on function names")); ++ ++static const cl::opt ExcludeColdCodeRegion( ++ "auto-tuning-exclude-cold", cl::Hidden, cl::init(true), ++ cl::desc("Use profile data to prune cold code regions from auto-tuning")); ++ ++static const cl::opt CodeRegionMatchingWithHash( ++ "auto-tuning-code-region-matching-hash", cl::Hidden, cl::init(true), ++ cl::desc("Use IR hashing to match the Code Regions")); ++ ++static const cl::opt HotCodeRegionOnly( ++ "auto-tuning-hot-only", cl::Hidden, cl::init(false), ++ cl::desc( ++ "Use profile data to include hot code regions only from auto-tuning")); ++ ++static const cl::opt ++ SizeThreshold("auto-tuning-size-threshold", cl::Hidden, cl::init(0), ++ cl::desc("Prune small code regions from auto-tuning with a " ++ "size smaller than the threshold")); ++ ++static inline const std::string generateName(const std::string &Name) { ++ if (Name.empty()) ++ return "unnamed"; ++ else ++ return Name; ++} ++ ++//===----------------------------------------------------------------------===// ++// CodeRegion implementation ++CodeRegion::CodeRegion(const CodeRegionType Type) : Type(Type) {} ++ ++CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, ++ const CodeRegionType &Type, const DebugLoc &DL, ++ const DynamicOptions DO) { ++ this->Name = generateName(Name); ++ this->FuncName = generateName(FuncName); ++ this->Type = Type; ++ this->StringType = getTypeAsString(Type); ++ if (DL) { ++ StringRef File = DL->getFilename(); ++ unsigned Line = DL->getLine(); ++ unsigned Col = DL->getColumn(); ++ this->Location = SourceLocation{File.str(), Line, Col}; ++ } ++ this->AutoTunerOptions = DO; ++} ++ ++CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, ++ const CodeRegionType &Type, ++ const SourceLocation &Location, ++ const DynamicOptions DO) { ++ this->Name = generateName(Name); ++ this->FuncName = generateName(FuncName); ++ this->Type = Type; ++ this->StringType = getTypeAsString(Type); ++ this->Location = Location; ++ this->AutoTunerOptions = DO; ++} ++ ++CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, ++ const std::string &PassName, const CodeRegionType &Type, ++ const SourceLocation &Location, ++ const unsigned int Invocation) ++ : CodeRegion(Name, FuncName, Type, Location) { ++ this->PassName = generateName(PassName); ++ this->Invocation = Invocation; ++} ++ ++bool CodeRegion::operator==(const CodeRegion &CodeRegion) const { ++ bool IsEqual = false; ++ if (OmitAutotuningMetadata) ++ IsEqual = (this->getHash() == CodeRegion.getHash()) && ++ (this->Type == CodeRegion.getType()) && ++ (this->PassName == CodeRegion.getPassName()); ++ else { ++ IsEqual = (this->Type == CodeRegion.getType()) && ++ (this->Name == CodeRegion.getName()) && ++ (this->PassName == CodeRegion.getPassName()) && ++ (this->FuncName == CodeRegion.getFuncName()) && ++ (this->Location == CodeRegion.getSourceLoc()); ++ if (CodeRegionMatchingWithHash) ++ IsEqual = IsEqual && (this->getHash() == CodeRegion.getHash()); ++ } ++ ++ if (autotuning::Engine.ParseInput) ++ IsEqual = IsEqual && this->getInvocation() == CodeRegion.getInvocation(); ++ ++ if (autotuning::Engine.GenerateOutput) ++ IsEqual = ++ IsEqual && this->getBaselineConfig() == CodeRegion.getBaselineConfig(); ++ ++ return IsEqual; ++} ++ ++std::string CodeRegion::getTypeAsString(CodeRegionType CRType) { ++ switch (CRType) { ++ case autotuning::CodeRegionType::MachineBasicBlock: ++ return "machine_basic_block"; ++ case autotuning::CodeRegionType::Loop: ++ return "loop"; ++ case autotuning::CodeRegionType::Function: ++ return "function"; ++ case autotuning::CodeRegionType::CallSite: ++ return "callsite"; ++ case autotuning::CodeRegionType::LLVMParam: ++ return "llvm-param"; ++ case autotuning::CodeRegionType::ProgramParam: ++ return "program-param"; ++ case autotuning::CodeRegionType::Switch: ++ return "switch"; ++ default: ++ return "other"; ++ } ++} ++ ++std::string CodeRegion::getHotnessAsString(HotnessType Hotness) { ++ switch (Hotness) { ++ case autotuning::HotnessType::Cold: ++ return "cold"; ++ case autotuning::HotnessType::Hot: ++ return "hot"; ++ default: ++ return "unknown"; ++ } ++} ++ ++void CodeRegion::setPassName(const std::string &NewPassName) { ++ this->PassName = generateName(NewPassName); ++} ++ ++/* static */ ++autotuning::CodeRegion CodeRegion::getInvalidInstance() { ++ static autotuning::CodeRegion Invalid = ++ CodeRegion(autotuning::CodeRegionType::Invalid); ++ return Invalid; ++} ++ ++/* static */ ++autotuning::CodeRegion CodeRegion::getEmptyInstance() { ++ static autotuning::CodeRegion Empty = ++ CodeRegion(autotuning::CodeRegionType::Empty); ++ return Empty; ++} ++ ++//===----------------------------------------------------------------------===// ++// Container implementation ++// ++ ++const CodeRegion &Container::getCodeRegion() const { return CR; } ++ ++void Container::setCodeRegion(const CodeRegion &NewCR) { this->CR = NewCR; } ++ ++template ++bool Container::lookUpParams(const std::string &ParamsName, T &Value) const { ++ bool Found = false; ++ auto ConfigMapIterator = Engine.ParamTable.find(CR); ++ if (ConfigMapIterator != Engine.ParamTable.end()) { ++ ParameterManager InputParams = ConfigMapIterator->second; ++ Found = InputParams.findByName(ParamsName, Value); ++ if (Found) { ++ LLVM_DEBUG(dbgs() << ParamsName << " is set for the CodeRegion: \n" ++ << " Name: " << CR.getName() << "\n" ++ << " FuncName: " << CR.getFuncName() << "\n" ++ << " PassName: " << CR.getPassName() << "\n" ++ << " Type: " << CR.getTypeAsString() << "\n" ++ << " Hash: " << CR.getHash() << "\n" ++ << "\n"); ++ } ++ } ++ return Found; ++} ++ ++bool Container::requiresIRDump(bool IsFunctionIR) const { ++ auto findBaselineRegion = [&]() -> bool { ++ for (auto &entry : Engine.TuningOpps) ++ if (!IsFunctionIR) { ++ if (CR.getSourceLoc() == entry.getSourceLoc()) ++ return true; ++ } else { ++ if (CR.getFileName() == entry.getFileName() && ++ CR.getFuncName() == entry.getFuncName()) ++ return true; ++ } ++ return false; ++ }; ++ auto findNonBaselineRegion = [&]() { ++ for (auto &entry : Engine.ParamTable) ++ if (!IsFunctionIR) { ++ if (CR.getSourceLoc() == entry.first.getSourceLoc()) ++ return true; ++ } else { ++ if (CR.getFileName() == entry.first.getFileName() && ++ CR.getFuncName() == entry.first.getFuncName()) ++ return true; ++ } ++ return false; ++ }; ++ ++ if (CFGNumber == -1) ++ return findBaselineRegion(); ++ else ++ return findNonBaselineRegion(); ++} ++ ++template bool Container::lookUpParams(const std::string &ParamsName, ++ int &Value) const; ++template bool Container::lookUpParams(const std::string &ParamsName, ++ bool &Value) const; ++template bool ++Container::lookUpParams(const std::string &ParamsName, ++ std::string &Value) const; ++template bool Container::lookUpParams>( ++ const std::string &ParamsName, std::vector &Value) const; ++ ++static unsigned int count(SmallVector CallSiteLocs, ++ CallSiteLocation Loc) { ++ unsigned int Count = 0; ++ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { ++ if (Loc.Caller == CallSiteLocs[Idx].Caller && ++ Loc.Callee == CallSiteLocs[Idx].Callee) ++ Count++; ++ } ++ return Count; ++} ++ ++bool AutoTuningEngine::isThinLTOTuning() const { return ThinLTOTuning; } ++ ++CodeRegionType AutoTuningEngine::convertPassToType(std::string PassName) { ++ auto Search = PTTMap.find(PassName); ++ if (Search == PTTMap.end()) ++ llvm_unreachable( ++ "AutoTuningEngine: Invalid/unsupported optimization pass provided.\n"); ++ return Search->second; ++} ++ ++void AutoTuningEngine::insertCallSiteLoc(CallSiteLocation Loc) { ++ CallSiteLocs.emplace_back(Loc); ++} ++ ++// If a function has multiple calls to same callee, then insert all the calls in ++// the CallSiteLocs vector which get available due to inlining of such calls. ++// It will use "Original Call Line No + New Call Line No" instead of using ++// "DebugLoc Line No". ++void AutoTuningEngine::updateCallSiteLocs(llvm::CallBase *OldCB, ++ llvm::CallBase *NewCB, ++ llvm::Function *Callee, ++ unsigned int Line) { ++ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { ++ if (OldCB == CallSiteLocs[Idx].CB) { ++ CallSiteLocation Loc = CallSiteLocs[Idx]; ++ Loc.CB = NewCB; ++ Loc.Callee = Callee; ++ Loc.SrcLoc.SourceLine = Loc.SrcLoc.SourceLine + Line; ++ CallSiteLocs.emplace_back(Loc); ++ break; ++ } ++ } ++} ++ ++void AutoTuningEngine::cleanCallSiteLoc() { ++ unsigned int Size = CallSiteLocs.size(); ++ unsigned int Idx = 0; ++ for (unsigned int I = 0; I < Size; ++I) { ++ CallSiteLocation Loc = CallSiteLocs[Idx]; ++ unsigned int Count = count(CallSiteLocs, Loc); ++ if (Count == 1) { ++ CallSiteLocs.erase(CallSiteLocs.begin() + Idx); ++ continue; ++ } ++ Idx++; ++ } ++} ++ ++void AutoTuningEngine::clearCallSiteLocs() { CallSiteLocs.clear(); } ++ ++std::optional ++AutoTuningEngine::getCallSiteLoc(llvm::CallBase *CB) { ++ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { ++ if (CB == CallSiteLocs[Idx].CB) ++ return CallSiteLocs[Idx].SrcLoc.SourceLine; ++ } ++ return std::nullopt; ++} ++ ++void AutoTuningEngine::addOpportunity( ++ const CodeRegion &OppCR, ++ std::map BaselineConfig) { ++ if (!OppCR.Initialized) ++ return; ++ ++ OppCR.setBaselineConfig(BaselineConfig); ++ if (!TuningOpps.contains(OppCR)) ++ TuningOpps.insert(OppCR); ++ else if (OppCR.getHotness() != Unknown) { ++ // If OppCR already exists in TuningOpps with unknown hotness, ++ // then update it if the current hotness is hot/cold. ++ auto OppI = find(TuningOpps, OppCR); ++ if (OppI->getHotness() == Unknown) ++ OppI->setHotness(OppCR.getHotness()); ++ } ++} ++ ++void AutoTuningEngine::applyOppFilters(CodeRegions &CRs) { ++ CodeRegions NewCRs; ++ for (CodeRegion CR : CRs) { ++ if (AutotuningOutputFilter.getNumOccurrences() > 0) { ++ bool IsMatched = false; ++ for (auto CRType : AutotuningOutputFilter) { ++ if (CRType == CR.getType()) { ++ IsMatched = true; ++ break; ++ } ++ } ++ // Filter out the CodeRegion if its type fails to match any types ++ // specified from the command line. ++ if (!IsMatched) ++ continue; ++ } ++ if (SizeThreshold.getNumOccurrences() > 0 && CR.getSize() < SizeThreshold) ++ continue; ++ if (ExcludeColdCodeRegion && CR.isCold()) { ++ LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " ++ << CR.getFuncName() << "\n"); ++ continue; ++ } ++ if (HotCodeRegionOnly && !CR.isHot()) { ++ LLVM_DEBUG(dbgs() << "Skip CodeRegion with " << CR.getHotnessAsString() ++ << " function " << CR.getFuncName() << "\n"); ++ continue; ++ } ++ NewCRs.insert(CR); ++ LLVM_DEBUG(dbgs() << "CodeRegion added as an tuning opportunity: \n" ++ << " Name: " << CR.getName() << "\n" ++ << " FuncName: " << CR.getFuncName() << "\n" ++ << " PassName: " << CR.getPassName() << "\n" ++ << " Type: " << CR.getTypeAsString() << "\n" ++ << " Size: " << CR.getSize() << "\n" ++ << " Hotness: " << CR.getHotnessAsString() << "\n" ++ << " Hash: " << CR.getHash() << "\n" ++ << " Location: " << CR.getSourceLoc().SourceFilePath ++ << "; " << CR.getSourceLoc().SourceLine << "; " ++ << CR.getSourceLoc().SourceColumn << "\n\n"); ++ } ++ if (AutotuningOutputFilter.getNumOccurrences() == 0 || ++ std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), ++ Other) != AutotuningOutputFilter.end()) { ++ // Add an empty CodeRegion with ModuleID as an tuning opportunity. ++ // It could be used to represent a module level code region. ++ autotuning::CodeRegion GlobalCR = ++ CodeRegion(ModuleID, "none", "all", Other); ++ GlobalCR.setHash(llvm::hash_combine(ModuleID, Other)); ++ NewCRs.insert(GlobalCR); ++ LLVM_DEBUG(dbgs() << "Module added as an tuning opportunity: \n" ++ << " Name: " << GlobalCR.getName() << "\n" ++ << " Hash: " << GlobalCR.getHash() << "\n" ++ << "\n"); ++ } ++ ++ // Include LLVMParam as an tuning opportunity only if it is specified with ++ // -auto-tuning-type-filter. ++ if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), ++ LLVMParam) != AutotuningOutputFilter.end()) ++ NewCRs.insert(CodeRegion(ModuleID, "none", "none", LLVMParam)); ++ ++ if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), ++ ProgramParam) != AutotuningOutputFilter.end()) ++ NewCRs.insert(CodeRegion(ModuleID, "none", "none", ProgramParam)); ++ ++ CRs = NewCRs; ++} ++ ++bool AutoTuningEngine::applyFunctionFilter(std::string FuncName) { ++ if (AutotuningFunctionFilter.getNumOccurrences() == 0) ++ return true; ++ ++ for (std::string FunctionFilter : AutotuningFunctionFilter) ++ if (FuncName == FunctionFilter) ++ return true; ++ ++ return false; ++} ++ ++void AutoTuningEngine::initContainer(Container *Container, ++ const std::string &PassName, ++ const StringRef FuncName, ++ bool AddOpportunity, ++ unsigned int Invocation) { ++ if (Enabled) { ++ if (!isTuningAllowedForType(convertPassToType(PassName)) && ++ !(isGenerateOutput() && ++ AutotuningOutputFilter.getNumOccurrences() == 0)) ++ return; ++ ++ if (!applyFunctionFilter(FuncName.str())) ++ return; ++ ++ // The attributes of a Container could potentially change overtime even with ++ // the same pass if the associated pass is invoked multiple times at ++ // different places in the pipeline. Therefore, we need to initCodeRegion ++ // every time when this function is called to ensure the CodeRegion with the ++ // latest information will be added as tuning opportunities. ++ Container->initCodeRegion(); ++ if (Container->CR.getType() == autotuning::CodeRegionType::Invalid) ++ return; ++ ++ uint64_t hash = Container->computeStructuralHash(); ++ CodeRegion &OppCR = Container->CR; ++ if (GenerateOutput) { ++ if (OppCR.getSize() < SizeThreshold) ++ return; ++ if (ExcludeColdCodeRegion && OppCR.isCold()) { ++ LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " ++ << OppCR.getFuncName() << "\n"); ++ return; ++ } ++ if (HotCodeRegionOnly && !OppCR.isHot()) { ++ LLVM_DEBUG(dbgs() << "Skip CodeRegion with " ++ << OppCR.getHotnessAsString() << " function " ++ << OppCR.getFuncName() << "\n"); ++ return; ++ } ++ } ++ OppCR.setPassName(PassName); ++ OppCR.setHash(hash); ++ OppCR.setInvocation(Invocation); ++ OppCR.Initialized = true; ++ if (AddOpportunity) ++ addOpportunity(OppCR); ++ } ++} ++ ++bool AutoTuningEngine::shouldRunOptPass(std::string Filename, ++ std::string Pass) { ++ return OppPassList.count(Filename) ? OppPassList[Filename].count(Pass) ++ : false; ++} ++ ++Error AutoTuningEngine::init(const std::string &Module) { ++ ParseInput = false; ++ if (std::optional MaybePath = ++ llvm::sys::Process::GetEnv("AUTOTUNE_INPUT")) { ++ InputFile = *MaybePath; ++ ParseInput = true; ++ } else if (InputFile.getNumOccurrences() > 0) { ++ ParseInput = true; ++ } ++ ++ GenerateOutput = false; ++ if (OutputOppDir.getNumOccurrences() > 0) ++ GenerateOutput = true; ++ ++ // Invocation of any of the following command line options ++ // (auto-tuning-input and auto-tuning-opp) or env variable ++ // AUTOTUNE_ALL_INPUT can enable auto-tuning mode. ++ if (ParseInput || GenerateOutput) { ++ Enabled = true; ++ // Generate absolute path and remove the base directory (if available). ++ // A relative path will be used as (coarse-grain) code region name. ++ llvm::SmallString<128> ModuleVec = StringRef(Module); ++ llvm::sys::fs::make_absolute(ModuleVec); ++ if (ProjectDir.size() && ModuleVec.startswith(ProjectDir)) ++ ModuleID = ModuleVec.substr(ProjectDir.size()).str(); ++ else ++ ModuleID = std::string(ModuleVec); ++ } ++ ++ // Initialization of map to be used for pass-name to CodeRegionType ++ // conversion. ++ PTTMap = {{"loop-unroll", Loop}, ++ {"loop-vectorize", Loop}, ++ {"inline", CallSite}, ++ {"machine-scheduler", MachineBasicBlock}, ++ {"switch-lowering", Switch}, ++ {"autotuning-dump", Function}}; ++ ++ if (ParseInput) { ++ // Currently we only support yaml format for input. ++ if (Error E = AutoTuningRemarkManager::read(*this, InputFile, "yaml")) { ++ errs() << "Error parsing auto-tuning input.\n"; ++ return E; ++ } else { ++ LLVM_DEBUG(dbgs() << "AutoTuningEngine is initialized.\n" ++ << " Size of ParamTable: " << this->ParamTable.size() ++ << "\n"); ++ if (LLVMParams.size()) ++ LLVM_DEBUG(dbgs() << "AutoTuner: LLVMParams applied."); ++ if (ProgramParams.size()) ++ LLVM_DEBUG(dbgs() << "AutoTuner: ProgramParams applied.\n"); ++ } ++ } ++ ++ for (auto CRType : AutotuningOutputFilter) ++ CodeRegionFilterTypes.insert(CRType); ++ ++ if (GenerateOutput) { ++ switch (AutoTuningCompileMode) { ++ case CoarseGrain: { ++ bool Valid = false; ++ if (AutotuningOutputFilter.getNumOccurrences() > 0) { ++ Valid = true; ++ for (auto CRType : AutotuningOutputFilter) ++ if (CRType != LLVMParam) { ++ Valid = false; ++ break; ++ } ++ } ++ if (!Valid) { ++ AutoTuningCompileMode = Inactive; ++ errs() << "AutoTunerCompile: Code region type filtering does not match" ++ " with incremental compilation option.\n" ++ "Disabling incremental compilation.\n"; ++ } ++ break; ++ } ++ case FineGrain: { ++ bool Valid = false; ++ if (AutotuningOutputFilter.getNumOccurrences() > 0) { ++ Valid = true; ++ for (auto CRType : AutotuningOutputFilter) { ++ if (CRType != Loop && CRType != CallSite && CRType != Function) { ++ Valid = false; ++ break; ++ } ++ } ++ } ++ if (!Valid) { ++ AutoTuningCompileMode = Inactive; ++ errs() << "AutoTunerCompile: Code region type filtering does not match" ++ "with incremental compilation option.\n" ++ "Disabling incremental compilation.\n"; ++ } ++ break; ++ } ++ case Basic: ++ case Inactive: ++ break; ++ default: ++ llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " ++ "Compilation mode.\n"); ++ } ++ } ++ ++ MLEnabled = (CFGNumber.getNumOccurrences() > 0); ++ if (EnableAutoTuningDump || MLEnabled) ++ DumpEnabled = true; ++ return Error::success(); ++} ++ ++llvm::Expected AutoTuningEngine::getConfigNumber() { ++ if (!isMLEnabled()) { ++ std::string errorMsg = ++ "No Autotuner configuration specified; ML guidance is unavailable."; ++ return createStringError(inconvertibleErrorCode(), errorMsg); ++ } else ++ return CFGNumber; ++} ++ ++Error AutoTuningEngine::finalize() { ++ if (OutputOppDir.getNumOccurrences() > 0) { ++ // Apply filters. ++ applyOppFilters(TuningOpps); ++ if (!TuningOpps.empty()) { ++ if (Error E = AutoTuningRemarkManager::dump( ++ *this, OutputOppDir, OutputFormat, RemarksPasses)) { ++ errs() << "Error generating auto-tuning opportunities.\n"; ++ return E; ++ } ++ } ++ ++ // Clear these two global lists when ending the auto-tuning ++ // in case of redundant information ++ TuningOpps.clear(); ++ } ++ return Error::success(); ++} ++ ++template ++bool AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, ++ T &Value) const { ++ bool Found = GlobalParams.findByName(ParamsName, Value); ++ if (Found) { ++ LLVM_DEBUG(dbgs() << "Global Variable " << ParamsName << " is set.\n"); ++ } ++ return Found; ++} ++ ++template bool ++AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, ++ int &Value) const; ++template bool ++AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, ++ bool &Value) const; ++template bool ++AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, ++ std::string &Value) const; ++template bool AutoTuningEngine::lookUpGlobalParams>( ++ const std::string &ParamsName, std::vector &Value) const; ++ ++class AutoTuningEngine Engine; ++ ++} // namespace autotuning ++ ++#endif +diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp +new file mode 100644 +index 000000000000..3e0506e534c4 +--- /dev/null ++++ b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp +@@ -0,0 +1,299 @@ ++#if defined(ENABLE_AUTOTUNER) ++//===- llvm/AutoTuner/AutoTuningRemarkManager.cpp - Remark Manager --------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the implementation of for inputting and outputting remarks ++// for AutoTuning. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/AutoTuner/AutoTuningRemarkManager.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" ++#include "llvm/IR/DebugInfoMetadata.h" ++#include "llvm/IR/LLVMRemarkStreamer.h" ++#include "llvm/Remarks/Remark.h" ++#include "llvm/Remarks/RemarkFormat.h" ++#include "llvm/Remarks/RemarkParser.h" ++#include "llvm/Remarks/RemarkSerializer.h" ++#include "llvm/Remarks/RemarkStreamer.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/FileSystem.h" ++#include "llvm/Support/MemoryBuffer.h" ++#include "llvm/Support/Path.h" ++#include "llvm/Support/ToolOutputFile.h" ++ ++// Enable debug messages for AutoTuner. ++#define DEBUG_TYPE "autotuning" ++ ++using namespace llvm; ++using namespace autotuning; ++ ++// Helper functions. ++namespace { ++// Convert string into CodeRegionType. ++Expected StringToCodeRegionType(const std::string &CRType) { ++ if (CRType == "machine_basic_block") ++ return autotuning::CodeRegionType::MachineBasicBlock; ++ else if (CRType == "loop") ++ return autotuning::CodeRegionType::Loop; ++ else if (CRType == "function") ++ return autotuning::CodeRegionType::Function; ++ else if (CRType == "callsite") ++ return autotuning::CodeRegionType::CallSite; ++ else if (CRType == "llvm-param") ++ return autotuning::CodeRegionType::LLVMParam; ++ else if (CRType == "program-param") ++ return autotuning::CodeRegionType::ProgramParam; ++ else if (CRType == "switch") ++ return autotuning::CodeRegionType::Switch; ++ else if (CRType == "other") ++ return autotuning::CodeRegionType::Other; ++ else ++ return make_error("Unsupported CodeRegionType:" + CRType, ++ inconvertibleErrorCode()); ++} ++ ++// Remark -> autotuning::ParameterManager ++ParameterManager RemarkToParameterManager(const remarks::Remark &Remark) { ++ // Create Parameters from a remark. ++ ParameterManager ParamManager; ++ for (const remarks::Argument &Arg : Remark.Args) { ++ int Value = 0; ++ if (!Arg.Val.getAsInteger(10, Value)) ++ // If no errors ++ ParamManager.add(Arg.Key.str(), Value); ++ else if (Arg.Val == "true") ++ ParamManager.add(Arg.Key.str(), true); ++ else if (Arg.Val == "false") ++ ParamManager.add(Arg.Key.str(), false); ++ // If there is a value of vector type ++ else if (Arg.VectorVal) { ++ std::vector Strings; ++ for (const StringRef &Val : *Arg.VectorVal) { ++ Strings.push_back(Val.str()); ++ } ++ ParamManager.add(Arg.Key.str(), Strings); ++ } else ++ // Add as String Value ++ ParamManager.add(Arg.Key.str(), Arg.Val); ++ } ++ ++ return ParamManager; ++} ++ ++// Remark -> std::unordered_map ++std::unordered_map ++RemarkToStringMap(const remarks::Remark &Remark) { ++ std::unordered_map LLVMParams; ++ for (const remarks::Argument &Arg : Remark.Args) { ++ // Add as String Value ++ LLVMParams[Arg.Key.str()] = Arg.Val.str(); ++ } ++ return LLVMParams; ++} ++ ++// Remark -> autotuning::SourceLocation ++SourceLocation RemarkToSourceLocation(const remarks::Remark &Remark) { ++ SourceLocation Location; ++ if (Remark.Loc) { ++ StringRef File = Remark.Loc->SourceFilePath; ++ unsigned Line = Remark.Loc->SourceLine; ++ unsigned Column = Remark.Loc->SourceColumn; ++ Location = {File.str(), Line, Column}; ++ } ++ return Location; ++} ++ ++// Remark -> autotuning::CodeRegion ++CodeRegion RemarkToCodeRegion(const remarks::Remark &Remark, ++ Expected &Type) { ++ // Create a SourceLocation from a remark. ++ SourceLocation Location = RemarkToSourceLocation(Remark); ++ // Create a CodeRegion from a remark. ++ CodeRegion CR = CodeRegion(Remark.RemarkName.str(), Remark.FunctionName.str(), ++ Remark.PassName.str(), Type.get(), Location); ++ if (Remark.CodeRegionHash) ++ CR.setHash(Remark.CodeRegionHash.value_or(0)); ++ if (Remark.Invocation) ++ CR.setInvocation(Remark.Invocation.value_or(0)); ++ ++ return CR; ++} ++ ++Expected> emitAutoTuningRemarks( ++ const StringRef RemarksFilename, const StringRef RemarksFormat, ++ const StringRef RemarksPasses, const CodeRegions &CRList) { ++ if (RemarksFilename.empty()) ++ return nullptr; ++ // Parse remark format. Options are yaml, yaml-strtab and bitstream. ++ Expected Format = remarks::parseFormat(RemarksFormat); ++ if (Error E = Format.takeError()) ++ return make_error(std::move(E)); ++ ++ std::error_code EC; ++ auto Flags = ++ *Format == remarks::Format::YAML ? sys::fs::OF_Text : sys::fs::OF_None; ++ auto RemarksFile = ++ std::make_unique(RemarksFilename, EC, Flags); ++ if (EC) ++ return make_error(errorCodeToError(EC)); ++ // Create a remark serializer to emit code regions. ++ Expected> RemarkSerializer = ++ remarks::createRemarkSerializer( ++ *Format, remarks::SerializerMode::Separate, RemarksFile->os()); ++ ++ if (Error E = RemarkSerializer.takeError()) ++ return make_error(std::move(E)); ++ // Create remark streamer based on the serializer. ++ remarks::RemarkStreamer RStreamer = ++ remarks::RemarkStreamer(std::move(*RemarkSerializer), RemarksFilename); ++ AutoTuningRemarkStreamer Streamer(RStreamer); ++ ++ if (!RemarksPasses.empty()) ++ if (Error E = Streamer.setFilter(RemarksPasses)) ++ return make_error(std::move(E)); ++ // Emit CodeRegions in Remark format. ++ for (const CodeRegion &CR : CRList) { ++ Streamer.emit(CR); ++ } ++ return std::move(RemarksFile); ++} ++} // namespace ++ ++llvm::Error AutoTuningRemarkManager::read(AutoTuningEngine &E, ++ const std::string &InputFileName, ++ const std::string &RemarksFormat) { ++ ErrorOr> Buf = ++ MemoryBuffer::getFile(InputFileName.c_str()); ++ if (std::error_code EC = Buf.getError()) ++ return make_error( ++ "Can't open file " + InputFileName + ": " + EC.message(), EC); ++ // Parse remark format. Options are yaml, yaml-strtab and bitstream. ++ Expected Format = remarks::parseFormat(RemarksFormat); ++ if (!Format) ++ return Format.takeError(); ++ ++ Expected> MaybeParser = ++ remarks::createRemarkParserFromMeta(*Format, (*Buf)->getBuffer()); ++ if (!MaybeParser) { ++ return MaybeParser.takeError(); ++ } ++ remarks::RemarkParser &Parser = **MaybeParser; ++ ++ while (true) { ++ Expected> MaybeRemark = Parser.next(); ++ if (!MaybeRemark) { ++ Error E = MaybeRemark.takeError(); ++ if (E.isA()) { ++ // EOF. ++ consumeError(std::move(E)); ++ break; ++ } ++ return E; ++ } ++ const remarks::Remark &Remark = **MaybeRemark; ++ ++ if (Remark.RemarkType != remarks::Type::AutoTuning) ++ continue; ++ ++ if (!Remark.CodeRegionType) ++ return make_error("CodeRegionType field is missing.", ++ inconvertibleErrorCode()); ++ Expected Type = ++ StringToCodeRegionType((*Remark.CodeRegionType).str()); ++ if (!Type) ++ return Type.takeError(); ++ CodeRegionType CRType = Type.get(); ++ // If CodeRegionType is Other, this remark corresponds to global ++ // parameters, and no need to create a CodeRegion object. Check if the ++ // Remark of global parameters is for the current Module. ++ if (CRType == autotuning::Other && Remark.RemarkName == Engine.ModuleID) { ++ Engine.GlobalParams = RemarkToParameterManager(Remark); ++ continue; ++ } ++ if (CRType == autotuning::LLVMParam && ++ Remark.RemarkName == Engine.ModuleID) { ++ Engine.LLVMParams = RemarkToStringMap(Remark); ++ continue; ++ } ++ if (CRType == autotuning::ProgramParam && ++ Remark.RemarkName == Engine.ModuleID) { ++ Engine.ProgramParams = RemarkToStringMap(Remark); ++ continue; ++ } ++ if (Engine.isThinLTOTuning() && ++ (CRType == autotuning::CallSite || CRType == autotuning::Loop || ++ CRType == autotuning::MachineBasicBlock || ++ CRType == autotuning::Function)) { ++ LLVM_DEBUG(dbgs() << "AutoTuner does not support tuning of " ++ << CodeRegion::getTypeAsString(CRType) ++ << " for thinLTO durning link-time optimization. " ++ "Ignoring current code region.\n"); ++ continue; ++ } ++ // Create a SourceLocation from a remark. ++ CodeRegion CR = RemarkToCodeRegion(Remark, Type); ++ ParameterManager ParamManager = RemarkToParameterManager(Remark); ++ // Add the CodeRegion-ParameterManager entry into LoopUpTable. ++ Engine.ParamTable[CR] = ParamManager; ++ ++ std::string Filename = CR.getSourceLoc().SourceFilePath; ++ size_t Pos = Filename.rfind("."); ++ if (Pos != std::string::npos) ++ Filename.erase(Pos, Filename.size()); ++ Engine.OppPassList[Filename].insert(CR.getPassName()); ++ Engine.CodeRegionFilterTypes.insert(CR.getType()); ++ } ++ return Error::success(); ++} ++ ++Error AutoTuningRemarkManager::dump(const autotuning::AutoTuningEngine &E, ++ const std::string &DirName, ++ const std::string &RemarksFormat, ++ const std::string &RemarksPasses) { ++ // Change to absolute path. ++ SmallString<256> OutputPath = StringRef(DirName); ++ sys::fs::make_absolute(OutputPath); ++ ++ // Make sure the new output directory exists, creating it if necessary. ++ if (std::error_code EC = sys::fs::create_directories(OutputPath)) { ++ return make_error("could not create directory: " + ++ Twine(OutputPath) + ": " + EC.message(), ++ EC); ++ } ++ if (!Engine.TuningOpps.empty()) { ++ StringRef ModelFileName = sys::path::filename(Engine.ModuleID); ++ sys::path::append(OutputPath, ModelFileName + "." + RemarksFormat); ++ ++ int i = 1; // Output file suffix starts from 1. ++ // Check all exiting xml files xml.1...i and create a new file ++ // suffix.(i+1). ++ while (sys::fs::exists(OutputPath)) { ++ sys::path::remove_filename(OutputPath); ++ sys::path::append(OutputPath, ++ ModelFileName + "." + RemarksFormat + "." + Twine(i)); ++ i += 1; ++ } ++ Expected> RemarksFileOrErr = ++ emitAutoTuningRemarks(OutputPath, RemarksFormat, RemarksPasses, ++ Engine.TuningOpps); ++ if (Error E = RemarksFileOrErr.takeError()) { ++ return E; ++ } ++ ++ std::unique_ptr RemarksFile = std::move(*RemarksFileOrErr); ++ if (RemarksFile) ++ RemarksFile->keep(); ++ } ++ return Error::success(); ++} ++ ++#endif +diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp +new file mode 100644 +index 000000000000..0516c055a139 +--- /dev/null ++++ b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp +@@ -0,0 +1,55 @@ ++#if defined(ENABLE_AUTOTUNER) ++// ===---------- llvm/AutoTuner/AutoTuningRemarkStreamer.cpp --------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. ++// ++// ===---------------------------------------------------------------------===// ++// ++// This file contains the implementation of the conversion between AutoTuner ++// CodeRegions and serializable remarks::Remark objects. ++// ++// ===---------------------------------------------------------------------===// ++ ++#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" ++ ++using namespace llvm; ++ ++// autotuning::CodeRegion -> Remark ++remarks::Remark ++AutoTuningRemarkStreamer::toRemark(const autotuning::CodeRegion &CR) { ++ remarks::Remark R; // The result. ++ R.RemarkType = remarks::Type::AutoTuning; ++ R.PassName = CR.getPassName(); ++ R.RemarkName = CR.getName(); ++ R.FunctionName = CR.getFuncName(); ++ const autotuning::SourceLocation &Location = CR.getSourceLoc(); ++ if (Location) ++ R.Loc = remarks::RemarkLocation{Location.SourceFilePath, ++ Location.SourceLine, Location.SourceColumn}; ++ R.CodeRegionType = CR.getTypeAsString(); ++ R.CodeRegionHash = CR.getHash(); ++ R.AutoTunerOptions = CR.getAutoTunerOptions(); ++ R.Invocation = CR.getInvocation(); ++ R.BaselineConfig = CR.getBaselineConfig(); ++ return R; ++} ++ ++void AutoTuningRemarkStreamer::emit(const autotuning::CodeRegion &CR) { ++ if (!RS.matchesFilter(CR.getPassName())) ++ return; ++ ++ // First, convert the code region to a remark. ++ remarks::Remark R = toRemark(CR); ++ // Then, emit the remark through the serializer. ++ RS.getSerializer().emit(R); ++} ++ ++Error AutoTuningRemarkStreamer::setFilter(StringRef Filter) { ++ return RS.setFilter(Filter); ++} ++#endif +diff --git a/llvm/lib/AutoTuner/CMakeLists.txt b/llvm/lib/AutoTuner/CMakeLists.txt +new file mode 100644 +index 000000000000..c618474fe5ae +--- /dev/null ++++ b/llvm/lib/AutoTuner/CMakeLists.txt +@@ -0,0 +1,11 @@ ++add_llvm_component_library(LLVMAutoTuner ++ AutoTuning.cpp ++ AutoTuningRemarkManager.cpp ++ AutoTuningRemarkStreamer.cpp ++ ++ ADDITIONAL_HEADER_DIRS ++ ${LLVM_MAIN_INCLUDE_DIR}/llvm/AutoTuner ++ ++ LINK_COMPONENTS ++ Remarks ++) +\ No newline at end of file +diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt +index 283baa6090eb..966137c0f71f 100644 +--- a/llvm/lib/CMakeLists.txt ++++ b/llvm/lib/CMakeLists.txt +@@ -28,6 +28,7 @@ add_subdirectory(Object) + add_subdirectory(ObjectYAML) + add_subdirectory(Option) + add_subdirectory(Remarks) ++add_subdirectory(AutoTuner) + add_subdirectory(Debuginfod) + add_subdirectory(DebugInfo) + add_subdirectory(DWP) +diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt +index 106571b9061b..9029dc7bb3d9 100644 +--- a/llvm/lib/CodeGen/CMakeLists.txt ++++ b/llvm/lib/CodeGen/CMakeLists.txt +@@ -273,6 +273,7 @@ add_llvm_component_library(LLVMCodeGen + + LINK_COMPONENTS + Analysis ++ AutoTuner + BitReader + BitWriter + CodeGenTypes +diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp +index 5a005ba7b414..9dcb3833ab91 100644 +--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp ++++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp +@@ -29,6 +29,24 @@ using namespace llvm; + + #define DEBUG_TYPE "calcspillweights" + ++#if defined(ENABLE_AUTOTUNER) ++static cl::opt LoopWeight( ++ "reg-spill-loop-weight", cl::Hidden, ++ cl::desc( ++ "Tunable extra weight to what looks like a loop induction variable"), ++ cl::init(3)); ++ ++static cl::opt RemaWeight( ++ "reg-spill-rematerialize-weight", cl::Hidden, ++ cl::desc("Tunable reduced weight giving re-materialize oppotunities"), ++ cl::init(0.5f)); ++ ++static cl::opt ++ HintWeight("reg-spill-hint-weight", cl::Hidden, ++ cl::desc("Tunable weakly boost weight of hinted registers"), ++ cl::init(1.01f)); ++#endif ++ + void VirtRegAuxInfo::calculateSpillWeightsAndHints() { + LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" + << "********** Function: " << MF.getName() << '\n'); +@@ -252,7 +270,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, + + // Give extra weight to what looks like a loop induction variable update. + if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB)) ++#if defined(ENABLE_AUTOTUNER) ++ Weight *= LoopWeight; ++#else + Weight *= 3; ++#endif + + TotalWeight += Weight; + } +@@ -288,7 +310,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, + } + + // Weakly boost the spill weight of hinted registers. ++#if defined(ENABLE_AUTOTUNER) ++ TotalWeight *= HintWeight; ++#else + TotalWeight *= 1.01F; ++#endif + } + + // If the live interval was already unspillable, leave it that way. +@@ -315,7 +341,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo())) ++#if defined(ENABLE_AUTOTUNER) ++ TotalWeight *= RemaWeight; ++#else + TotalWeight *= 0.5F; ++#endif + + if (IsLocalSplitArtifact) + return normalize(TotalWeight, Start->distance(*End), NumInstr); +diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp +index 231544494c32..327cd40f86a4 100644 +--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp ++++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp +@@ -37,6 +37,9 @@ + #include "llvm/Support/raw_ostream.h" + #include "llvm/Target/TargetMachine.h" + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/IR/StructuralHash.h" ++#endif + #include + using namespace llvm; + +@@ -1703,6 +1706,39 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { + return LiveIns.begin(); + } + ++#if defined(ENABLE_AUTOTUNER) ++uint64_t MachineBasicBlock::computeStructuralHash() { ++ return StructuralHash(*this); ++} ++ ++void MachineBasicBlock::initCodeRegion() { ++ std::string BasicBlockName = ++ ("%bb." + Twine(this->getNumber()) + ":" + this->getName()).str(); ++ MachineFunction *MF = this->getParent(); ++ StringRef FuncName = MF->getName(); ++ ++ autotuning::CodeRegion CR; ++ if (!this->empty()) { ++ const DebugLoc &StartLoc = this->front().getDebugLoc(); ++ CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), ++ autotuning::CodeRegionType::MachineBasicBlock, ++ StartLoc); ++ } else { ++ CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), ++ autotuning::CodeRegionType::MachineBasicBlock); ++ } ++ // Compute the number of non-debug IR instructions in this MBB. ++ unsigned NumInstrs = std::distance(this->getFirstNonDebugInstr(), ++ this->getLastNonDebugInstr()); ++ CR.setSize(NumInstrs); ++ // Compute hotness. ++ autotuning::HotnessType Hotness = MF->getFunction().ATEFunction.getHotness(); ++ CR.setHotness(Hotness); ++ ++ this->setCodeRegion(CR); ++} ++#endif ++ + MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { + const MachineFunction &MF = *getParent(); + assert(MF.getProperties().hasProperty( +diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp +index ba5432459d12..caccc9e5fad4 100644 +--- a/llvm/lib/CodeGen/MachineScheduler.cpp ++++ b/llvm/lib/CodeGen/MachineScheduler.cpp +@@ -569,6 +569,12 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + ++#if defined(ENABLE_AUTOTUNER) ++ // before visiting this MBB ++ // if AutoTuning is enabled, initialize this MBB for auto-tuning ++ autotuning::Engine.initContainer(&*MBB, DEBUG_TYPE); ++#endif ++ + Scheduler.startBlock(&*MBB); + + #ifndef NDEBUG +@@ -3244,6 +3250,44 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + RegionPolicy.ShouldTrackLaneMasks = false; + } + ++#if defined(ENABLE_AUTOTUNER) ++ // AUTO-TUNING - Look up for MMB level scheduling direction if AutoTuning is ++ // enabled ++ if (autotuning::Engine.isEnabled()) { ++ MachineBasicBlock &MBB = *Begin->getParent(); ++ ++ bool NewForceBottomUp = false; ++ // Look up from xml file, and overwrite values ++ bool IsForceBottomUpSet = ++ MBB.lookUpParams("ForceBottomUp", NewForceBottomUp); ++ ++ bool NewForceForceTopDown = false; ++ bool IsForceTopDownSet = ++ MBB.lookUpParams("ForceTopDown", NewForceForceTopDown); ++ ++ assert((!NewForceBottomUp || !NewForceForceTopDown) && ++ "BottomUp and TopDown cannot both set to true"); ++ ++ if (IsForceBottomUpSet) { ++ RegionPolicy.OnlyBottomUp = NewForceBottomUp; ++ if (RegionPolicy.OnlyBottomUp) { ++ RegionPolicy.OnlyTopDown = false; ++ } ++ } ++ ++ if (IsForceTopDownSet) { ++ RegionPolicy.OnlyTopDown = NewForceForceTopDown; ++ if (RegionPolicy.OnlyTopDown) { ++ RegionPolicy.OnlyBottomUp = false; ++ } ++ } ++ ++ if (IsForceBottomUpSet || IsForceTopDownSet) { ++ return; ++ } ++ } ++#endif ++ + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && +diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +index 36a02d5beb4b..d4ac95d534ed 100644 +--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp ++++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +@@ -16,6 +16,9 @@ + #include "llvm/CodeGen/MachineJumpTableInfo.h" + #include "llvm/CodeGen/TargetLowering.h" + #include "llvm/Target/TargetMachine.h" ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + using namespace llvm; + using namespace SwitchCG; +@@ -61,7 +64,23 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, + if (!TLI->areJTsAllowed(SI->getParent()->getParent())) + return; + ++#if defined(ENABLE_AUTOTUNER) ++ unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); ++ // Overwrite MinJumpTableEntries when it is set by Autotuner ++ if (autotuning::Engine.isEnabled()) { ++ autotuning::Engine.initContainer(SI->ATESwitchInst.get(), ++ "switch-lowering"); ++ ++ int NewValue = 0; // the int value is set by lookUpParams() ++ bool Changed = ++ SI->ATESwitchInst->lookUpParams("MinJumpTableEntries", NewValue); ++ if (Changed) ++ MinJumpTableEntries = NewValue; ++ } ++#else + const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); ++#endif ++ + const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; + + // Bail if not enough cases. +diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp +index df753b91ff90..af77e6c2dc4d 100644 +--- a/llvm/lib/IR/AsmWriter.cpp ++++ b/llvm/lib/IR/AsmWriter.cpp +@@ -2602,11 +2602,21 @@ public: + void writeAllAttributeGroups(); + + void printTypeIdentities(); ++#if defined(ENABLE_AUTOTUNER) ++ void printGlobal(const GlobalVariable *GV, bool PrintDeclarationOnly = false); ++ void printAlias(const GlobalAlias *GA); ++ void printIFunc(const GlobalIFunc *GI); ++ void printComdat(const Comdat *C); ++ void printRequisiteDeclarations(const Function *F); ++ void printFunction(const Function *F, bool PrintCompleteIR = false, ++ bool PrintDeclarationOnly = false); ++#else + void printGlobal(const GlobalVariable *GV); + void printAlias(const GlobalAlias *GA); + void printIFunc(const GlobalIFunc *GI); + void printComdat(const Comdat *C); + void printFunction(const Function *F); ++#endif + void printArgument(const Argument *FA, AttributeSet Attrs); + void printBasicBlock(const BasicBlock *BB); + void printInstructionLine(const Instruction &I); +@@ -3593,15 +3603,26 @@ static void maybePrintComdat(formatted_raw_ostream &Out, + Out << ')'; + } + ++#if defined(ENABLE_AUTOTUNER) ++void AssemblyWriter::printGlobal(const GlobalVariable *GV, ++ bool PrintDeclarationOnly) { ++ if (GV->isMaterializable() && !PrintDeclarationOnly) ++#else + void AssemblyWriter::printGlobal(const GlobalVariable *GV) { + if (GV->isMaterializable()) ++#endif + Out << "; Materializable\n"; + + AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent()); + WriteAsOperandInternal(Out, GV, WriterCtx); + Out << " = "; + ++#if defined(ENABLE_AUTOTUNER) ++ if ((!GV->hasInitializer() || PrintDeclarationOnly) && ++ GV->hasExternalLinkage()) ++#else + if (!GV->hasInitializer() && GV->hasExternalLinkage()) ++#endif + Out << "external "; + + Out << getLinkageNameWithSpace(GV->getLinkage()); +@@ -3619,7 +3640,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { + Out << (GV->isConstant() ? "constant " : "global "); + TypePrinter.print(GV->getValueType(), Out); + ++#if defined(ENABLE_AUTOTUNER) ++ if (GV->hasInitializer() && !PrintDeclarationOnly) { ++#else + if (GV->hasInitializer()) { ++#endif + Out << ' '; + writeOperand(GV->getInitializer(), false); + } +@@ -3769,12 +3794,102 @@ void AssemblyWriter::printTypeIdentities() { + } + } + ++#if defined(ENABLE_AUTOTUNER) ++/// printRequisiteDeclarations - Print the declarations of type identities, ++/// global variables, functions, and function attribute groups of a function. ++void AssemblyWriter::printRequisiteDeclarations(const Function *F) { ++ // walk through instructions and collect global variables & functions ++ SmallPtrSet GVs; ++ SmallPtrSet Functions; ++ for (const BasicBlock &BB : *F) { ++ for (const Instruction &I : BB) { ++ // Check for function ++ if (const auto *CI = dyn_cast(&I)) { ++ Function *func = CI->getCalledFunction(); ++ if (func) ++ Functions.insert(func); ++ } ++ // Check for global variables ++ for (const Use &U : I.operands()) { ++ if (GlobalVariable *gv = dyn_cast(U)) ++ GVs.insert(gv); ++ if (GEPOperator *gepo = dyn_cast(&U)) { ++ if (GlobalVariable *gv = ++ dyn_cast(gepo->getPointerOperand())) ++ GVs.insert(gv); ++ for (auto it = gepo->idx_begin(), et = gepo->idx_end(); it != et; ++ ++it) { ++ if (GlobalVariable *gv = dyn_cast(*it)) ++ GVs.insert(gv); ++ } ++ } ++ } ++ } ++ } ++ ++ // print type identities ++ printTypeIdentities(); ++ ++ // print global variables ++ if (!GVs.empty()) { ++ Out << '\n'; ++ for (auto GVit = GVs.begin(), et = GVs.end(); GVit != et; ++GVit) { ++ // Make backups of some properties. They may be modified for printing. ++ GlobalValue::LinkageTypes SavedLinkage = (*GVit)->getLinkage(); ++ GlobalVariable::VisibilityTypes SavedVisibility = ++ (*GVit)->getVisibility(); ++ ++ // modify property if needed ++ if (!(*GVit)->hasAvailableExternallyLinkage() && ++ !((*GVit)->getName() == "llvm.global_ctors") && ++ (*GVit)->hasLocalLinkage()) { ++ (*GVit)->setLinkage(GlobalValue::ExternalLinkage); ++ (*GVit)->setVisibility(GlobalValue::HiddenVisibility); ++ } ++ ++ printGlobal(*GVit, true); ++ Out << '\n'; ++ ++ // restore backups ++ (*GVit)->setLinkage(SavedLinkage); ++ (*GVit)->setVisibility(SavedVisibility); ++ } ++ Out << '\n'; ++ } ++ ++ // print functions ++ for (auto FuncIt = Functions.begin(), et = Functions.end(); FuncIt != et; ++ ++FuncIt) { ++ Out << '\n'; ++ printFunction(*FuncIt, false, true); ++ } ++ ++ // Write attribute groups. ++ if (!Machine.as_empty()) { ++ Out << '\n'; ++ writeAllAttributeGroups(); ++ } ++ Out << '\n'; ++} ++ + /// printFunction - Print all aspects of a function. ++void AssemblyWriter::printFunction(const Function *F, bool PrintCompleteIR, ++ bool PrintDeclarationOnly) { ++ if (PrintCompleteIR && !PrintDeclarationOnly) { ++ printRequisiteDeclarations(F); ++ } ++ if (AnnotationWriter && !PrintDeclarationOnly) ++ AnnotationWriter->emitFunctionAnnot(F, Out); ++ ++ if (F->isMaterializable() && !PrintDeclarationOnly) ++ Out << "; Materializable\n"; ++#else + void AssemblyWriter::printFunction(const Function *F) { + if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); + + if (F->isMaterializable()) + Out << "; Materializable\n"; ++#endif + + const AttributeList &Attrs = F->getAttributes(); + if (Attrs.hasFnAttrs()) { +@@ -3792,6 +3907,18 @@ void AssemblyWriter::printFunction(const Function *F) { + Out << "; Function Attrs: " << AttrStr << '\n'; + } + ++#if defined(ENABLE_AUTOTUNER) ++ if (!PrintDeclarationOnly) ++ Machine.incorporateFunction(F); ++ ++ if (F->isDeclaration() || PrintDeclarationOnly) { ++ Out << "declare"; ++ if (!PrintDeclarationOnly) { ++ SmallVector, 4> MDs; ++ F->getAllMetadata(MDs); ++ printMetadataAttachments(MDs, " "); ++ } ++#else + Machine.incorporateFunction(F); + + if (F->isDeclaration()) { +@@ -3799,6 +3926,7 @@ void AssemblyWriter::printFunction(const Function *F) { + SmallVector, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); ++#endif + Out << ' '; + } else + Out << "define "; +@@ -3824,7 +3952,11 @@ void AssemblyWriter::printFunction(const Function *F) { + Out << '('; + + // Loop over the arguments, printing them... ++#if defined(ENABLE_AUTOTUNER) ++ if ((F->isDeclaration() && !IsForDebug) || PrintDeclarationOnly) { ++#else + if (F->isDeclaration() && !IsForDebug) { ++#endif + // We're only interested in the type here - don't print argument names. + for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { + // Insert commas as we go... the first arg doesn't get a comma +@@ -3895,7 +4027,11 @@ void AssemblyWriter::printFunction(const Function *F) { + writeOperand(F->getPersonalityFn(), /*PrintType=*/true); + } + ++#if defined(ENABLE_AUTOTUNER) ++ if (F->isDeclaration() || PrintDeclarationOnly) { ++#else + if (F->isDeclaration()) { ++#endif + Out << '\n'; + } else { + SmallVector, 4> MDs; +@@ -3913,6 +4049,13 @@ void AssemblyWriter::printFunction(const Function *F) { + Out << "}\n"; + } + ++#if defined(ENABLE_AUTOTUNER) ++ // Output metadata ++ if (!Machine.mdn_empty() && PrintCompleteIR && !PrintDeclarationOnly) { ++ Out << '\n'; ++ writeAllMDNodes(); ++ } ++#endif + Machine.purgeFunction(); + } + +@@ -4591,13 +4734,21 @@ void AssemblyWriter::printUseLists(const Function *F) { + + void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder, ++#if defined(ENABLE_AUTOTUNER) ++ bool IsForDebug, bool PrintCompleteIR) const { ++#else + bool IsForDebug) const { ++#endif + SlotTracker SlotTable(this->getParent()); + formatted_raw_ostream OS(ROS); + AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, + IsForDebug, + ShouldPreserveUseListOrder); ++#if defined(ENABLE_AUTOTUNER) ++ W.printFunction(this, PrintCompleteIR); ++#else + W.printFunction(this); ++#endif + } + + void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, +diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt +index 217fe703dd4e..d44d1eea9f3e 100644 +--- a/llvm/lib/IR/CMakeLists.txt ++++ b/llvm/lib/IR/CMakeLists.txt +@@ -78,6 +78,7 @@ add_llvm_component_library(LLVMCore + intrinsics_gen + + LINK_COMPONENTS ++ AutoTuner + BinaryFormat + Demangle + Remarks +diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp +index 435800d9e5f9..ec2620efac38 100644 +--- a/llvm/lib/IR/Function.cpp ++++ b/llvm/lib/IR/Function.cpp +@@ -70,6 +70,10 @@ + #include + #include + ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/IR/StructuralHash.h" ++#endif ++ + using namespace llvm; + using ProfileCount = Function::ProfileCount; + +@@ -1977,6 +1981,36 @@ std::optional Function::getSectionPrefix() const { + return std::nullopt; + } + ++#if defined(ENABLE_AUTOTUNER) ++uint64_t AutoTuningEnabledFunction::computeStructuralHash() { ++ return StructuralHash(*(this->Func)); ++} ++ ++void AutoTuningEnabledFunction::initCodeRegion() { ++ StringRef FuncName = Func->getName(); ++ StringRef EntryBBName; ++ autotuning::SourceLocation Loc; ++ ++ if (!Func->empty()) ++ EntryBBName = Func->front().getName(); ++ else ++ EntryBBName = StringRef("None"); ++ ++ DISubprogram *SubProgram = Func->getSubprogram(); ++ if (SubProgram) ++ // Set the column number to 0 because there is no information about ++ // column number for functions. ++ Loc = {SubProgram->getFilename().str(), SubProgram->getLine(), 0}; ++ ++ autotuning::CodeRegion CR = ++ autotuning::CodeRegion(EntryBBName.data(), FuncName.data(), ++ autotuning::CodeRegionType::Function, Loc); ++ CR.setSize(Func->getInstructionCount()); ++ CR.setHotness(this->getHotness()); ++ this->setCodeRegion(CR); ++} ++#endif ++ + bool Function::nullPointerIsDefined() const { + return hasFnAttribute(Attribute::NullPointerIsValid); + } +diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp +index cb0ac0f8eae6..e614285df07a 100644 +--- a/llvm/lib/IR/Instructions.cpp ++++ b/llvm/lib/IR/Instructions.cpp +@@ -45,6 +45,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/IR/StructuralHash.h" ++#endif + + using namespace llvm; + +@@ -259,6 +262,89 @@ void LandingPadInst::addClause(Constant *Val) { + getOperandList()[OpNo] = Val; + } + ++#if defined(ENABLE_AUTOTUNER) ++uint64_t AutoTuningEnabledSwitchInst::computeStructuralHash() { ++ return StructuralHash(*(this->SI)); ++} ++ ++void AutoTuningEnabledSwitchInst::initCodeRegion() { ++ std::string SwitchName; ++ if (this->SI->hasName()) { ++ SwitchName = this->SI->getName().str(); ++ } else { ++ std::string Str; ++ llvm::raw_string_ostream RSO(Str); ++ this->SI->getCondition()->printAsOperand(RSO); ++ SwitchName = RSO.str(); ++ } ++ ++ autotuning::CodeRegion CR = autotuning::CodeRegion( ++ SwitchName, this->SI->getFunction()->getName().str(), ++ autotuning::CodeRegionType::Switch, this->SI->getDebugLoc()); ++ ++ unsigned TotalNumInsts = 0; ++ for (auto Case : SI->cases()) { ++ const BasicBlock *BB = Case.getCaseSuccessor(); ++ unsigned NumInsts = std::distance(BB->instructionsWithoutDebug().begin(), ++ BB->instructionsWithoutDebug().end()); ++ TotalNumInsts += NumInsts; ++ } ++ ++ CR.setSize(TotalNumInsts); ++ // Compute hotness. ++ autotuning::HotnessType Hotness = ++ this->SI->getFunction()->ATEFunction.getHotness(); ++ CR.setHotness(Hotness); ++ ++ this->setCodeRegion(CR); ++} ++ ++uint64_t AutoTuningEnabledCallSite::computeStructuralHash() { ++ return StructuralHash(*(this->CB)); ++} ++ ++void AutoTuningEnabledCallSite::initCodeRegion() { ++ // Use Caller's name as FuncName and Callee's name as Name of a CodeRegion. ++ Function *Caller = this->CB->getCaller(); ++ Function *Callee = this->CB->getCalledFunction(); ++ if (Caller == nullptr || Callee == nullptr) { ++ this->setCodeRegion(autotuning::CodeRegion::getInvalidInstance()); ++ return; ++ } ++ ++ autotuning::SourceLocation SrcLoc; ++ if (this->CB->getDebugLoc()) { ++ unsigned int SourceLine = this->CB->getDebugLoc()->getLine(); ++ // Get modified source line number for current callsite if there is another ++ // call instruction (to same callee) which has same source line number ++ // happened due to inlining. ++ std::optional LineNum = autotuning::Engine.getCallSiteLoc(CB); ++ if (LineNum) ++ SourceLine = *LineNum; ++ SrcLoc = autotuning::SourceLocation{ ++ this->CB->getDebugLoc()->getFilename().str(), SourceLine, ++ this->CB->getDebugLoc()->getColumn()}; ++ } ++ ++ // We are using DebugLoc to distinguish between multiple calls to the same ++ // callee in a function. It may be possible that these multiple calls have ++ // same DebugLoc either 1) due to inlining of multiple calls (same callee) ++ // and callee having more calls, or 2) cloned calls added by previous ++ // optimizations. We are using 'callee name + it's parent (basic block) name' ++ // to solve these problems. Additionally we are using modified line number ++ // for the issue # 1; this will handle the cases where the multiple calls are ++ // in the same basic block. ++ autotuning::CodeRegion CR = autotuning::CodeRegion( ++ Callee->getName().str() + "-" + this->CB->getParent()->getName().str(), ++ Caller->getName().data(), autotuning::CodeRegionType::CallSite, SrcLoc, ++ autotuning::DynamicOptions{{"ForceInline", {0, 1}}}); ++ ++ CR.setSize(Callee->getInstructionCount()); ++ CR.setHotness(Caller->ATEFunction.getHotness()); ++ this->setCodeRegion(CR); ++} ++#endif ++ + //===----------------------------------------------------------------------===// + // CallBase Implementation + //===----------------------------------------------------------------------===// +diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp +index 6ea108d831a1..1583e1c82b3e 100644 +--- a/llvm/lib/IR/StructuralHash.cpp ++++ b/llvm/lib/IR/StructuralHash.cpp +@@ -10,9 +10,23 @@ + #include "llvm/IR/Function.h" + #include "llvm/IR/GlobalVariable.h" + #include "llvm/IR/Module.h" ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/IR/InstrTypes.h" ++#include "llvm/IR/Instructions.h" ++#include "llvm/Support/CommandLine.h" ++#endif + + using namespace llvm; + ++#if defined(ENABLE_AUTOTUNER) ++// AutoTuner Flag to use callsite Debug Location for hash cacluation. ++static cl::opt HashCallSite( ++ "hash-prior-to-callsite", cl::init(true), cl::Hidden, ++ cl::desc("Use function IR prior to a call site to compute the hashcode for" ++ " the call site")); ++#endif ++ + namespace { + + // Basic hashing mechanism to detect structural change to the IR, used to verify +@@ -21,16 +35,81 @@ namespace { + + class StructuralHashImpl { + hash_code Hash; ++#if defined(ENABLE_AUTOTUNER) ++ const uint64_t BLOCK_HEADER_HASH = 45798; ++#endif + + template void hash(const T &V) { Hash = hash_combine(Hash, V); } + + public: + StructuralHashImpl() : Hash(4) {} + ++#if defined(ENABLE_AUTOTUNER) ++ void update(const MachineBasicBlock &MBB) { ++ // Update the structural hash when we encounter a new basic block. ++ // Prevents CodeRegions with different structures, but many empty ++ // BasicBlocks to have the same structural hash. ++ if (const BasicBlock *Block = MBB.getBasicBlock()) { ++ hash(BLOCK_HEADER_HASH); // Block header ++ for (auto &Inst : *Block) ++ hash(Inst.getOpcode()); ++ } ++ } ++ ++ void update(const std::vector BBs) { ++ // Update the structural hash when we encounter a new basic block. ++ // Prevents CodeRegions with different structures, but many empty ++ // BasicBlocks to have the same structural hash. ++ for (BasicBlock *BB : BBs) { ++ if (BB == nullptr) ++ continue; ++ ++ hash(BLOCK_HEADER_HASH); // Block header ++ for (auto &Inst : *BB) ++ hash(Inst.getOpcode()); ++ } ++ } ++ ++ void update(const llvm::CallBase &CB) { ++ StringRef Name = ""; ++ if (HashCallSite) { ++ update(*CB.getCaller(), std::addressof(CB)); ++ } else { ++ const Function &F = *CB.getCaller(); ++ Name = F.getName(); ++ std::string FileName = Name.str(); ++ for (uint64_t Idx = 0; Idx < Name.size(); Idx = Idx + sizeof(uint64_t)) { ++ uint64_t Value = 0; ++ FileName.copy((char *)&Value, sizeof(uint64_t), Idx); ++ hash(Value); ++ } ++ } ++ ++ update(*CB.getCalledFunction()); ++ } ++ ++ void update(const SwitchInst &SI) { ++ hash(SI.getNumCases()); ++ for (auto Case : SI.cases()) { ++ hash(BLOCK_HEADER_HASH); ++ const BasicBlock *BB = Case.getCaseSuccessor(); ++ for (auto &Inst : *BB) ++ hash(Inst.getOpcode()); ++ } ++ } ++ ++ void update(const Function &F, const CallBase *TargetCB = nullptr) { ++ if (F.isDeclaration()) ++ return; ++ ++ const Instruction *I = ++ TargetCB ? (dyn_cast(TargetCB)) : nullptr; ++#else + void update(const Function &F) { + // Declarations don't affect analyses. + if (F.isDeclaration()) + return; ++#endif + + hash(12345); // Function header + +@@ -44,9 +123,18 @@ public: + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); ++#if defined(ENABLE_AUTOTUNER) ++ hash(BLOCK_HEADER_HASH); // Block header ++ for (auto &Inst : *BB) { ++ hash(Inst.getOpcode()); ++ if (I && Inst.isIdenticalTo(I)) ++ return; ++ } ++#else + hash(45798); // Block header + for (auto &Inst : *BB) + hash(Inst.getOpcode()); ++#endif + + const Instruction *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { +@@ -79,6 +167,32 @@ public: + + } // namespace + ++#if defined(ENABLE_AUTOTUNER) ++uint64_t llvm::StructuralHash(const MachineBasicBlock &MBB) { ++ StructuralHashImpl H; ++ H.update(MBB); ++ return H.getHash(); ++} ++ ++uint64_t llvm::StructuralHash(const std::vector BBs) { ++ StructuralHashImpl H; ++ H.update(BBs); ++ return H.getHash(); ++} ++ ++uint64_t llvm::StructuralHash(const CallBase &CB) { ++ StructuralHashImpl H; ++ H.update(CB); ++ return H.getHash(); ++} ++ ++uint64_t llvm::StructuralHash(const SwitchInst &SI) { ++ StructuralHashImpl H; ++ H.update(SI); ++ return H.getHash(); ++} ++#endif ++ + uint64_t llvm::StructuralHash(const Function &F) { + StructuralHashImpl H; + H.update(F); +diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp +index d0cbbcc0e310..a3ccbc6d258f 100644 +--- a/llvm/lib/Passes/PassBuilder.cpp ++++ b/llvm/lib/Passes/PassBuilder.cpp +@@ -262,6 +262,11 @@ + #include "llvm/Transforms/Vectorize/VectorCombine.h" + #include + ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/Analysis/AutotuningDump.h" ++#include "llvm/Transforms/Scalar/AutoTuningCompile.h" ++#endif ++ + using namespace llvm; + + static const Regex DefaultAliasRegex( +diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp +index 660cb2e974d7..8009e011833c 100644 +--- a/llvm/lib/Passes/PassBuilderPipelines.cpp ++++ b/llvm/lib/Passes/PassBuilderPipelines.cpp +@@ -133,6 +133,11 @@ + #include "llvm/Transforms/Vectorize/SLPVectorizer.h" + #include "llvm/Transforms/Vectorize/VectorCombine.h" + ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/Transforms/Scalar/AutoTuningCompile.h" ++#endif ++ + using namespace llvm; + + static cl::opt UseInlineAdvisor( +@@ -289,6 +294,10 @@ PipelineTuningOptions::PipelineTuningOptions() { + EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; + } + ++#if defined(ENABLE_AUTOTUNER) ++extern cl::opt AutoTuningCompileMode; ++#endif ++ + namespace llvm { + extern cl::opt MaxDevirtIterations; + extern cl::opt EnableKnowledgeRetention; +@@ -452,9 +461,17 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, + // attention to it. + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) ++#if defined(ENABLE_AUTOTUNER) ++ { ++ if (AutoTuningCompileMode) ++ LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); ++#endif + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /* OnlyWhenForced= */ !PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll)); ++#if defined(ENABLE_AUTOTUNER) ++ } ++#endif + + invokeLoopOptimizerEndEPCallbacks(LPM2, Level); + +@@ -631,9 +648,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, + // attention to it. + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) ++#if defined(ENABLE_AUTOTUNER) ++ { ++ if (AutoTuningCompileMode) ++ LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); ++#endif + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /* OnlyWhenForced= */ !PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll)); ++#if defined(ENABLE_AUTOTUNER) ++ } ++#endif + + invokeLoopOptimizerEndEPCallbacks(LPM2, Level); + +@@ -1110,6 +1135,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, + if (EnableSyntheticCounts && !PGOOpt) + MPM.addPass(SyntheticCountsPropagation()); + ++#if defined(ENABLE_AUTOTUNER) ++ if (AutoTuningCompileMode) ++ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionInline)); ++#endif ++ + if (EnableModuleInliner) + MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); + else +@@ -1131,6 +1161,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, + /// TODO: Should LTO cause any differences to this set of passes? + void PassBuilder::addVectorPasses(OptimizationLevel Level, + FunctionPassManager &FPM, bool IsFullLTO) { ++#if defined(ENABLE_AUTOTUNER) ++ if (AutoTuningCompileMode && !IsFullLTO) ++ FPM.addPass( ++ AutoTuningCompileFunctionPass(autotuning::CompileOptionVectorize)); ++#endif ++ + FPM.addPass(LoopVectorizePass( + LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); + +@@ -1444,6 +1480,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + return buildO0DefaultPipeline(Level, LTOPreLink); + + ModulePassManager MPM; ++#if defined(ENABLE_AUTOTUNER) ++ if (AutoTuningCompileMode) ++ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionStart)); ++#endif + + // Convert @llvm.global.annotations to !annotation metadata. + MPM.addPass(Annotation2MetadataPass()); +@@ -1475,6 +1515,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + + if (LTOPreLink) + addRequiredLTOPreLinkPasses(MPM); ++ ++#if defined(ENABLE_AUTOTUNER) ++ if (AutoTuningCompileMode) ++ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionEnd)); ++#endif ++ + return MPM; + } + +diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def +index e10dc995c493..45a539f14b93 100644 +--- a/llvm/lib/Passes/PassRegistry.def ++++ b/llvm/lib/Passes/PassRegistry.def +@@ -29,6 +29,10 @@ MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) + MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) + MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) + ++#if defined(ENABLE_AUTOTUNER) ++MODULE_ANALYSIS("autotuning-dump", AutotuningDumpAnalysis()) ++#endif ++ + #ifndef MODULE_ALIAS_ANALYSIS + #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + MODULE_ANALYSIS(NAME, CREATE_PASS) +@@ -127,6 +131,9 @@ MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass()) + MODULE_PASS("memprof-module", ModuleMemProfilerPass()) + MODULE_PASS("poison-checking", PoisonCheckingPass()) + MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) ++#if defined(ENABLE_AUTOTUNER) ++MODULE_PASS("autotuning-compile-module", AutoTuningCompileModulePass()) ++#endif + #undef MODULE_PASS + + #ifndef MODULE_PASS_WITH_PARAMS +@@ -430,6 +437,9 @@ FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) + FUNCTION_PASS("tsan", ThreadSanitizerPass()) + FUNCTION_PASS("memprof", MemProfilerPass()) + FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass()) ++#if defined(ENABLE_AUTOTUNER) ++FUNCTION_PASS("autotuning-compile-function", AutoTuningCompileFunctionPass()) ++#endif + #undef FUNCTION_PASS + + #ifndef FUNCTION_PASS_WITH_PARAMS +@@ -614,6 +624,9 @@ LOOP_PASS("guard-widening", GuardWideningPass()) + LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) + LOOP_PASS("loop-reroll", LoopRerollPass()) + LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) ++#if defined(ENABLE_AUTOTUNER) ++LOOP_PASS("autotuning-compile-loop", AutoTuningCompileLoopPass()) ++#endif + #undef LOOP_PASS + + #ifndef LOOP_PASS_WITH_PARAMS +diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp +index 7eef511928ec..8653027ceed2 100644 +--- a/llvm/lib/Passes/StandardInstrumentations.cpp ++++ b/llvm/lib/Passes/StandardInstrumentations.cpp +@@ -41,6 +41,10 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/Transforms/Scalar/AutoTuningCompile.h" ++#endif + + using namespace llvm; + +@@ -107,6 +111,10 @@ static cl::opt PrintOnCrash( + cl::desc("Print the last form of the IR before crash (use -print-on-crash-path to dump to a file)"), + cl::Hidden); + ++#if defined(ENABLE_AUTOTUNER) ++extern cl::opt AutoTuningCompileMode; ++#endif ++ + static cl::opt OptBisectPrintIRPath( + "opt-bisect-print-ir-path", + cl::desc("Print IR to path when opt-bisect-limit is reached"), cl::Hidden); +@@ -874,6 +882,21 @@ bool OptPassGateInstrumentation::shouldRun(StringRef PassName, Any IR) { + + void OptPassGateInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { ++#if defined(ENABLE_AUTOTUNER) ++ // Using AutoTuner OptBisect to change the behavior of compilation pipeline. ++ // Flag 'opt-bisect-limit' will be preferred if both 'opt-bisect-limit' and ++ // incremental compilation flags are used. ++ if (autotuning::Engine.isParseInput() && AutoTuningCompileMode) { ++ if (!getAutoTuningOptPassGate().isEnabled()) ++ return; ++ ++ PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) { ++ return isIgnored(PassID) || ++ getAutoTuningOptPassGate().checkPass(PassID, getIRName(IR)); ++ }); ++ return; ++ } ++#endif + OptPassGate &PassGate = Context.getOptPassGate(); + if (!PassGate.isEnabled()) + return; +diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +index b2627196bce6..b1dfa9d0f2cf 100644 +--- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp ++++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +@@ -277,6 +277,14 @@ void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark, + R.push_back(StrTab.add(Remark.RemarkName).first); + R.push_back(StrTab.add(Remark.PassName).first); + R.push_back(StrTab.add(Remark.FunctionName).first); ++#if defined(ENABLE_AUTOTUNER) ++ if (Remark.CodeRegionType) ++ R.push_back(StrTab.add(*Remark.CodeRegionType).first); ++ if (std::optional hash = Remark.CodeRegionHash) ++ R.push_back(*hash); ++ if (std::optional Invocation = Remark.Invocation) ++ R.push_back(*Invocation); ++#endif + Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R); + + if (const std::optional &Loc = Remark.Loc) { +diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp +index 9f4676ce37ab..d1faf4f1553a 100644 +--- a/llvm/lib/Remarks/RemarkStreamer.cpp ++++ b/llvm/lib/Remarks/RemarkStreamer.cpp +@@ -14,6 +14,10 @@ + #include "llvm/Support/CommandLine.h" + #include + ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/IR/DebugInfoMetadata.h" ++#endif ++ + using namespace llvm; + using namespace llvm::remarks; + +diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp +index f5123b0f64ce..baa393c6a619 100644 +--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp ++++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp +@@ -17,10 +17,23 @@ + #include "llvm/Support/Endian.h" + #include "llvm/Support/Path.h" + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/Support/CommandLine.h" ++#endif + + using namespace llvm; + using namespace llvm::remarks; + ++#if defined(ENABLE_AUTOTUNER) ++// Creating code regions without meta data (e.g. debug Location, Function Name, ++// etc.). ++// This flag is added here instead of 'lib/AutoTuner/AutoTuning.cpp' to avoid ++// making LLVMRemarks dependent on LLVMCore. ++cl::opt OmitAutotuningMetadata( ++ "auto-tuning-omit-metadata", cl::Hidden, cl::init(false), ++ cl::desc("Include only code region hashes and types in opportunity files")); ++#endif ++ + char YAMLParseError::ID = 0; + + static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) { +@@ -235,6 +248,23 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { + TheRemark.FunctionName = *MaybeStr; + else + return MaybeStr.takeError(); ++#if defined(ENABLE_AUTOTUNER) ++ } else if (KeyName == "CodeRegionType") { ++ if (Expected MaybeStr = parseStr(RemarkField)) ++ TheRemark.CodeRegionType = *MaybeStr; ++ else ++ return MaybeStr.takeError(); ++ } else if (KeyName == "CodeRegionHash") { ++ if (Expected MaybeULL = parseUnsignedLL(RemarkField)) ++ TheRemark.CodeRegionHash = *MaybeULL; ++ else ++ return MaybeULL.takeError(); ++ } else if (KeyName == "Invocation") { ++ if (Expected MaybeULL = parseUnsignedLL(RemarkField)) ++ TheRemark.Invocation = *MaybeULL; ++ else ++ return MaybeULL.takeError(); ++#endif + } else if (KeyName == "Hotness") { + if (Expected MaybeU = parseUnsigned(RemarkField)) + TheRemark.Hotness = *MaybeU; +@@ -261,11 +291,35 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { + } + } + ++#if defined(ENABLE_AUTOTUNER) ++ // Check if any of the mandatory fields are missing. ++ if (TheRemark.RemarkType == Type::AutoTuning) { ++ // We expect type, and pass to be present at least. ++ if (!TheRemark.CodeRegionType || TheRemark.PassName.empty()) ++ return error("CodeRegionHash, CodeRegionType, or Pass missing.", ++ *RemarkEntry.getRoot()); ++ ++ // Sanity check for the correct command line option. ++ if (!OmitAutotuningMetadata && TheRemark.RemarkName.empty()) ++ return error("Remark Name expected; enable -autotuning-omit-metadata.", ++ *RemarkEntry.getRoot()); ++ ++ if (!OmitAutotuningMetadata && TheRemark.FunctionName.empty()) ++ return error( ++ "Remark Function Name expected; enable -autotuning-omit-metadata.", ++ *RemarkEntry.getRoot()); ++ } else if (TheRemark.RemarkType == Type::Unknown || ++ TheRemark.PassName.empty() || TheRemark.RemarkName.empty() || ++ TheRemark.FunctionName.empty()) ++ return error("Type, Pass, Name or Function missing.", ++ *RemarkEntry.getRoot()); ++#else + // Check if any of the mandatory fields are missing. + if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() || + TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty()) + return error("Type, Pass, Name or Function missing.", + *RemarkEntry.getRoot()); ++#endif + + return std::move(Result); + } +@@ -277,6 +331,9 @@ Expected YAMLRemarkParser::parseType(yaml::MappingNode &Node) { + .Case("!Analysis", remarks::Type::Analysis) + .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute) + .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing) ++#if defined(ENABLE_AUTOTUNER) ++ .Case("!AutoTuning", remarks::Type::AutoTuning) ++#endif + .Case("!Failure", remarks::Type::Failure) + .Default(remarks::Type::Unknown); + if (Type == remarks::Type::Unknown) +@@ -313,6 +370,31 @@ Expected YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { + return Result; + } + ++#if defined(ENABLE_AUTOTUNER) ++Expected> ++YAMLRemarkParser::parseStrVector(yaml::KeyValueNode &Node) { ++ std::vector Result; ++ auto *SequenceNode = dyn_cast(Node.getValue()); ++ if (!SequenceNode) ++ return error("expected a value of sequence type.", Node); ++ ++ for (yaml::Node &Element : *SequenceNode) { ++ auto *ScalarNode = dyn_cast(&Element); ++ if (!ScalarNode) ++ return error("expected a value of scalar type.", Element); ++ else { ++ StringRef Str = ScalarNode->getRawValue(); ++ if (Str.front() == '\'') ++ Str = Str.drop_front(); ++ if (Str.back() == '\'') ++ Str = Str.drop_back(); ++ Result.push_back(Str); ++ } ++ } ++ return Result; ++} ++#endif ++ + Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { + SmallVector Tmp; + auto *Value = dyn_cast(Node.getValue()); +@@ -324,6 +406,19 @@ Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { + return UnsignedValue; + } + ++#if defined(ENABLE_AUTOTUNER) ++Expected YAMLRemarkParser::parseUnsignedLL(yaml::KeyValueNode &Node) { ++ SmallVector Tmp; ++ if (auto *Value = dyn_cast(Node.getValue())) { ++ uint64_t UnsignedValue = 0; ++ if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue)) ++ return error("expected a value of integer type.", *Value); ++ return UnsignedValue; ++ } ++ return error("expected a value of scalar type.", Node); ++} ++#endif ++ + Expected + YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) { + auto *DebugLoc = dyn_cast(Node.getValue()); +@@ -374,6 +469,9 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { + + std::optional KeyStr; + std::optional ValueStr; ++#if defined(ENABLE_AUTOTUNER) ++ std::optional> ValueStrVector; ++#endif + std::optional Loc; + + for (yaml::KeyValueNode &ArgEntry : *ArgMap) { +@@ -400,11 +498,27 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { + if (ValueStr) + return error("only one string entry is allowed per argument.", ArgEntry); + ++#if defined(ENABLE_AUTOTUNER) ++ // Try to parse the value to a string vector. ++ if (Expected> MaybeStrVector = ++ parseStrVector(ArgEntry)) { ++ ValueStrVector = *MaybeStrVector; ++ ValueStr = ""; ++ } else { ++ consumeError(MaybeStrVector.takeError()); ++ // Try to parse the value. ++ if (Expected MaybeStr = parseStr(ArgEntry)) ++ ValueStr = *MaybeStr; ++ else ++ return MaybeStr.takeError(); ++ } ++#else + // Try to parse the value. + if (Expected MaybeStr = parseStr(ArgEntry)) + ValueStr = *MaybeStr; + else + return MaybeStr.takeError(); ++#endif + + // Keep the key from the string. + KeyStr = KeyName; +@@ -412,10 +526,18 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { + + if (!KeyStr) + return error("argument key is missing.", *ArgMap); ++#if defined(ENABLE_AUTOTUNER) ++ if (!ValueStr && !ValueStrVector) ++#else + if (!ValueStr) ++#endif + return error("argument value is missing.", *ArgMap); + ++#if defined(ENABLE_AUTOTUNER) ++ return Argument{*KeyStr, *ValueStr, ValueStrVector, Loc}; ++#else + return Argument{*KeyStr, *ValueStr, Loc}; ++#endif + } + + Expected> YAMLRemarkParser::next() { +diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h +index 8ef72e16be74..141f10dd3900 100644 +--- a/llvm/lib/Remarks/YAMLRemarkParser.h ++++ b/llvm/lib/Remarks/YAMLRemarkParser.h +@@ -91,6 +91,12 @@ protected: + Expected parseDebugLoc(yaml::KeyValueNode &Node); + /// Parse an argument. + Expected parseArg(yaml::Node &Node); ++#if defined(ENABLE_AUTOTUNER) ++ /// parse a vector of strings. ++ Expected> parseStrVector(yaml::KeyValueNode &Node); ++ /// Parse one value to an unsigned long long. ++ Expected parseUnsignedLL(yaml::KeyValueNode &Node); ++#endif + }; + + /// YAML with a string table to Remark parser. +diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +index 68285c3dde1b..1bc0f23f9221 100644 +--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp ++++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +@@ -15,10 +15,45 @@ + #include "llvm/Remarks/Remark.h" + #include "llvm/Support/FileSystem.h" + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/Support/CommandLine.h" ++#endif + + using namespace llvm; + using namespace llvm::remarks; + ++#if defined(ENABLE_AUTOTUNER) ++extern cl::opt OmitAutotuningMetadata; ++ ++// Use the same keys whether we use a string table or not (respectively, T is an ++// unsigned or a StringRef). ++template ++static void mapRemarkHeader( ++ yaml::IO &io, T PassName, T RemarkName, std::optional RL, ++ T FunctionName, std::optional CodeRegionType, ++ std::optional CodeRegionHash, ++ std::optional Invocation, ++ std::optional> BaselineConfig, ++ std::optional>> ++ AutoTunerOptions, ++ std::optional Hotness, ArrayRef Args) { ++ io.mapRequired("Pass", PassName); ++ if (!OmitAutotuningMetadata) { ++ io.mapRequired("Name", RemarkName); ++ io.mapOptional("DebugLoc", RL); ++ io.mapRequired("Function", FunctionName); ++ } ++ io.mapOptional("CodeRegionType", CodeRegionType); ++ io.mapOptional("CodeRegionHash", CodeRegionHash); ++ io.mapOptional("DynamicConfigs", AutoTunerOptions); ++ io.mapOptional("BaselineConfig", BaselineConfig); ++ io.mapOptional("Invocation", Invocation); ++ if (!OmitAutotuningMetadata) { ++ io.mapOptional("Hotness", Hotness); ++ io.mapOptional("Args", Args); ++ } ++} ++#else + // Use the same keys whether we use a string table or not (respectively, T is an + // unsigned or a StringRef). + template +@@ -33,6 +68,7 @@ static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName, + io.mapOptional("Hotness", Hotness); + io.mapOptional("Args", Args); + } ++#endif + + namespace llvm { + namespace yaml { +@@ -53,6 +89,10 @@ template <> struct MappingTraits { + else if (io.mapTag("!AnalysisAliasing", + (Remark->RemarkType == Type::AnalysisAliasing))) + ; ++#if defined(ENABLE_AUTOTUNER) ++ else if (io.mapTag("!AutoTuning", (Remark->RemarkType == Type::AutoTuning))) ++ ; ++#endif + else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure))) + ; + else +@@ -66,14 +106,58 @@ template <> struct MappingTraits { + unsigned NameID = StrTab.add(Remark->RemarkName).first; + unsigned FunctionID = StrTab.add(Remark->FunctionName).first; + mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, ++#if defined(ENABLE_AUTOTUNER) ++ Remark->CodeRegionType, Remark->CodeRegionHash, ++ Remark->Invocation, Remark->BaselineConfig, ++ Remark->AutoTunerOptions, Remark->Hotness, Remark->Args); ++ ++#else + Remark->Hotness, Remark->Args); ++#endif + } else { + mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, ++#if defined(ENABLE_AUTOTUNER) ++ Remark->FunctionName, Remark->CodeRegionType, ++ Remark->CodeRegionHash, Remark->Invocation, ++ Remark->BaselineConfig, Remark->AutoTunerOptions, ++ Remark->Hotness, Remark->Args); ++#else + Remark->FunctionName, Remark->Hotness, Remark->Args); ++#endif + } + } + }; + ++#if defined(ENABLE_AUTOTUNER) ++// YAML I/O to support dumping 'Values: { key: [...], ... }' in opportunity ++// files. ++template <> ++struct MappingTraits>> { ++ static void mapping(IO &io, ++ std::map> &OM) { ++ assert(io.outputting() && "input not yet implemented"); ++ ++ // Print as an abbreviated dictionary ++ llvm::yaml::StdMapStringCustomMappingTraitsImpl< ++ std::vector>::output(io, OM); ++ } ++ // This sets the beginFlowMapping and endFlowMapping ++ static const bool flow = true; ++}; ++ ++template <> struct MappingTraits> { ++ static void mapping(IO &io, std::map &OM) { ++ assert(io.outputting() && "input not yet implemented"); ++ ++ // Print as an abbreviated dictionary ++ llvm::yaml::StdMapStringCustomMappingTraitsImpl::output(io, ++ OM); ++ } ++ // This sets the beginFlowMapping and endFlowMapping ++ static const bool flow = true; ++}; ++#endif ++ + template <> struct MappingTraits { + static void mapping(IO &io, RemarkLocation &RL) { + assert(io.outputting() && "input not yet implemented"); +diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp +index d3efb8b67be5..b66415c0e9a9 100644 +--- a/llvm/lib/Support/CommandLine.cpp ++++ b/llvm/lib/Support/CommandLine.cpp +@@ -127,6 +127,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) { + O->getFormattingFlag() == cl::AlwaysPrefix; + } + ++#if defined(ENABLE_AUTOTUNER) ++#include ++#endif + + namespace { + +@@ -1470,6 +1473,44 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv, + Errs, LongOptionsUseDoubleDash); + } + ++#if defined(ENABLE_AUTOTUNER) ++bool cl::ParseAutoTunerOptions( ++ std::unordered_map LLVMParams, ++ std::unordered_map ProgramParams, ++ StringRef Overview, raw_ostream *Errs, const char *EnvVar, ++ bool LongOptionsUseDoubleDash) { ++ SmallVector NewArgv; ++ BumpPtrAllocator A; ++ StringSaver Saver(A); ++ // GlobalParser requires arguments similar to C style command line options ++ // (int argc, char * argv[]) where argv[0] refers to the program name. ++ // We are using a fake program name here which is consistent with LLVM. ++ NewArgv.push_back("AutoTuner (LLVM option parsing)"); ++ ++ for (const auto &I : LLVMParams) { ++ std::string NewOption = I.first + "=" + I.second; ++ NewArgv.push_back(Saver.save(NewOption).data()); ++ } ++ ++ for (const auto &I : ProgramParams) { ++ std::string NewOption = I.first + "=" + I.second; ++ NewArgv.push_back(Saver.save(NewOption).data()); ++ } ++ ++ // Parse options from environment variable. ++ if (EnvVar) { ++ if (std::optional EnvValue = ++ sys::Process::GetEnv(StringRef(EnvVar))) ++ TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv); ++ } ++ ++ int NewArgc = static_cast(NewArgv.size()); ++ // Parse all options. ++ return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview, ++ Errs, LongOptionsUseDoubleDash); ++} ++#endif ++ + /// Reset all options at least once, so that we can parse different options. + void CommandLineParser::ResetAllOptionOccurrences() { + // Reset all option values to look like they have never been seen before. +diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt +index 034f1587ae8d..3507d357a4c6 100644 +--- a/llvm/lib/Transforms/IPO/CMakeLists.txt ++++ b/llvm/lib/Transforms/IPO/CMakeLists.txt +@@ -57,6 +57,7 @@ add_llvm_component_library(LLVMipo + LINK_COMPONENTS + AggressiveInstCombine + Analysis ++ AutoTuner + BitReader + BitWriter + Core +diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp +index 3e00aebce372..802667819c44 100644 +--- a/llvm/lib/Transforms/IPO/Inliner.cpp ++++ b/llvm/lib/Transforms/IPO/Inliner.cpp +@@ -64,6 +64,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + using namespace llvm; + +@@ -298,6 +301,27 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, + // be deleted as a batch after inlining. + SmallVector DeadFunctionsInComdats; + ++#if defined(ENABLE_AUTOTUNER) ++ bool IsAutoTunerEnabled = ++ autotuning::Engine.isEnabled() && ++ autotuning::Engine.isTuningAllowedForType(autotuning::CallSite); ++ if (IsAutoTunerEnabled) { ++ SmallVector, 16> CallsCopy = Calls; ++ for (int I = 0; I < (int)CallsCopy.size(); ++I) { ++ CallBase &CB = *CallsCopy[I].first; ++ DebugLoc DLoc = CB.getDebugLoc(); ++ if (!CB.getCaller() || !CB.getCalledFunction() || !DLoc) ++ continue; ++ autotuning::CallSiteLocation Loc = autotuning::CallSiteLocation{ ++ &CB, CB.getCaller(), CB.getCalledFunction(), ++ autotuning::SourceLocation{DLoc->getFilename().str(), DLoc->getLine(), ++ DLoc->getColumn()}}; ++ autotuning::Engine.insertCallSiteLoc(Loc); ++ } ++ autotuning::Engine.cleanCallSiteLoc(); ++ } ++#endif ++ + // Loop forward over all of the calls. Note that we cannot cache the size as + // inlining can introduce new calls that need to be processed. + for (int I = 0; I < (int)Calls.size(); ++I) { +@@ -412,6 +436,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, + if (NewCallee) { + if (!NewCallee->isDeclaration()) { + Calls.push_back({ICB, NewHistoryID}); ++#if defined(ENABLE_AUTOTUNER) ++ if (IsAutoTunerEnabled) ++ if (ICB->getDebugLoc()) ++ autotuning::Engine.updateCallSiteLocs( ++ CB, ICB, ICB->getCalledFunction(), ++ ICB->getDebugLoc()->getLine()); ++#endif + // Continually inlining through an SCC can result in huge compile + // times and bloated code since we arbitrarily stop at some point + // when the inliner decides it's not profitable to inline anymore. +@@ -527,6 +558,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, + FAM.invalidate(F, PreservedAnalyses::none()); + } + ++#if defined(ENABLE_AUTOTUNER) ++ if (IsAutoTunerEnabled) ++ autotuning::Engine.clearCallSiteLocs(); ++#endif ++ + // We must ensure that we only delete functions with comdats if every function + // in the comdat is going to be deleted. + if (!DeadFunctionsInComdats.empty()) { +diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp +index a53baecd4776..9590cf625c64 100644 +--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp ++++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp +@@ -1212,6 +1212,20 @@ bool SampleProfileLoader::inlineHotFunctions( + } + } + } ++#if defined(ENABLE_AUTOTUNER) ++ if (autotuning::Engine.isEnabled()) { ++ // If a callsite is hot/cold, mark its corresponding callee as ++ // hot/cold respectively so that auto-tuning engine will be able to ++ // selectively dump code regions as tuning opportunities. ++ if (const CallInst *CI = dyn_cast(&I)) ++ if (Function *Callee = CI->getCalledFunction()) { ++ if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) ++ Callee->getATEFunction().setHot(); ++ else ++ Callee->getATEFunction().setCold(); ++ } ++ } ++#endif + } + if (Hot || ExternalInlineAdvisor) { + CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); +diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +index 424f1d433606..955353944b14 100644 +--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt ++++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMInstrumentation + + LINK_COMPONENTS + Analysis ++ AutoTuner + Core + Demangle + MC +diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +index 3c8f25d73c62..b9459b59e704 100644 +--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp ++++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +@@ -2132,6 +2132,10 @@ static bool annotateAllFunctions( + F->addFnAttr(Attribute::InlineHint); + LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() + << "\n"); ++#if defined(ENABLE_AUTOTUNER) ++ if (autotuning::Engine.isEnabled()) ++ F->getATEFunction().setHot(); ++#endif + } + for (auto &F : ColdFunctions) { + // Only set when there is no Attribute::Hot set by the user. For Hot +@@ -2148,6 +2152,10 @@ static bool annotateAllFunctions( + F->addFnAttr(Attribute::Cold); + LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() + << "\n"); ++#if defined(ENABLE_AUTOTUNER) ++ if (autotuning::Engine.isEnabled()) ++ F->getATEFunction().setCold(); ++#endif + } + return true; + } +diff --git a/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp +new file mode 100644 +index 000000000000..c33cb7cfc256 +--- /dev/null ++++ b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp +@@ -0,0 +1,334 @@ ++#if defined(ENABLE_AUTOTUNER) ++//===--------------- AutoTuningCompile.cpp - Auto-Tuning ------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. ++// ++//===----------------------------------------------------------------------===// ++// ++/// \file ++/// This pass implements incremental compilation for AutoTuner to reduce the ++/// compilation time for tuning process. ++/// This pass performs 2 operations. ++/// 1. Writing module level IR files which can be used in subsequent ++/// compilations for AutoTuner flow. So clang frontend don't have to process ++/// the source code from scratch. ++/// 2. Add/Remove attributes for modules and functions to enable/disable ++/// execution of optimization pass(es). It further reduces the compilation ++/// time by skipping optimization pass(es) (If feasible). ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/Transforms/Scalar/AutoTuningCompile.h" ++#include "llvm/Analysis/AutotuningDump.h" ++#include "llvm/AutoTuner/AutoTuning.h" ++#include "llvm/InitializePasses.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Transforms/Scalar.h" ++#include ++ ++// Enable debug messages for AutoTuning Compilation. ++#define DEBUG_TYPE "autotuning-compile" ++ ++using namespace llvm; ++ ++extern cl::opt AutoTuningCompileMode; ++ ++AutoTuningOptPassGate SkipPasses = AutoTuningOptPassGate(true); ++AutoTuningOptPassGate RunPasses = AutoTuningOptPassGate(false); ++bool AutoTuningCompileModule::SkipCompilation = false; ++ ++static void writeFiles(Module &M, std::string Pass) { ++ if (autotuning::Engine.isGenerateOutput()) { ++ switch (AutoTuningCompileMode) { ++ case Basic: ++ case CoarseGrain: ++ if (Pass == autotuning::CompileOptionStart) { ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " ++ << Pass << ".\n"); ++ auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); ++ ATD->runOnModule(M); ++ } ++ break; ++ case FineGrain: ++ if (autotuning::Engine.hasOpportunities()) { ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " ++ << Pass << ".\n"); ++ auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); ++ ATD->runOnModule(M); ++ } ++ break; ++ default: ++ llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " ++ "Compilation mode.\n"); ++ } ++ } ++} ++ ++bool AutoTuningOptPassGate::shouldRunPass(const StringRef PassName, ++ StringRef IRDescription) { ++ LLVM_DEBUG(dbgs() << "Skip pass '" << PassName ++ << "': " << (Skip ? "True" : "False") << '\n'); ++ return !Skip; ++} ++ ++bool AutoTuningOptPassGate::checkPass(const StringRef PassName, ++ const StringRef TargetDesc) { ++ if (PassName.startswith("AutoTuningCompile")) { ++ LLVM_DEBUG(dbgs() << "Running '" << PassName << "'pass.\n"); ++ return true; ++ } ++ ++ LLVM_DEBUG(dbgs() << "Skip pass '" << PassName ++ << "': " << (Skip ? "True" : "False") << '\n'); ++ return !Skip; ++} ++ ++AutoTuningCompileModule::AutoTuningCompileModule(std::string Pass) { ++ this->Pass = Pass; ++} ++ ++void AutoTuningCompileModule::writeIRFiles(Module &M) const { ++ writeFiles(M, Pass); ++} ++ ++bool AutoTuningCompileModule::modifyCompilationPipeline(Module &M) const { ++ bool Changed = false; ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " ++ "optimization of module/functions. Pass: " ++ << Pass << '\n'); ++ ++ StringRef Filename = M.getName(); ++ size_t Pos = Filename.rfind(".ll"); ++ if (Pos == StringRef::npos) { ++ errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " ++ "Disabling incremental compilation.\n"; ++ AutoTuningCompileMode = Inactive; ++ return Changed; ++ } ++ Filename = Filename.substr(0, Pos); ++ ++ switch (AutoTuningCompileMode) { ++ case Basic: ++ case CoarseGrain: ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " ++ "Basic/CoarseGrain incremental compilation mode.\n"); ++ break; ++ case FineGrain: { ++ if (Pass == autotuning::CompileOptionStart) { ++ M.getContext().setOptPassGate(SkipPasses); ++ getAutoTuningOptPassGate().setSkip(true); ++ setSkipCompilation(true); ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); ++ } else if (getSkipCompilation() && ++ (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || ++ Pass == "end")) { ++ M.getContext().setOptPassGate(RunPasses); ++ getAutoTuningOptPassGate().setSkip(false); ++ setSkipCompilation(false); ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); ++ } else ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " ++ << (getSkipCompilation() ? "True" : "False") ++ << " ) continued.\n"); ++ ++ Changed = true; ++ break; ++ } ++ default: ++ llvm_unreachable( ++ "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); ++ } ++ ++ return Changed; ++} ++ ++bool AutoTuningCompileModule::run(Module &M) { ++ bool Changed = false; ++ if (AutoTuningCompileMode == Inactive) ++ return Changed; ++ ++ if (!autotuning::Engine.isEnabled()) { ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); ++ return Changed; ++ } ++ ++ writeIRFiles(M); ++ ++ if (autotuning::Engine.isParseInput()) ++ Changed |= modifyCompilationPipeline(M); ++ ++ return Changed; ++} ++ ++AutoTuningCompileModuleLegacy::AutoTuningCompileModuleLegacy(std::string Pass) ++ : ModulePass(AutoTuningCompileModuleLegacy::ID) { ++ this->Pass = Pass; ++} ++ ++bool AutoTuningCompileModuleLegacy::runOnModule(Module &M) { ++ AutoTuningCompileModule Impl(Pass); ++ return Impl.run(M); ++} ++ ++char AutoTuningCompileModuleLegacy::ID = 0; ++ ++StringRef AutoTuningCompileModuleLegacy::getPassName() const { ++ return "AutoTuner Incremental Compilation"; ++} ++ ++INITIALIZE_PASS(AutoTuningCompileModuleLegacy, "autotuning-compile-module", ++ "AutoTuner Incremental Compilation", false, false) ++ ++// Public interface to the AutoTuningCompile pass ++ModulePass *llvm::createAutoTuningCompileModuleLegacyPass(std::string Pass) { ++ return new AutoTuningCompileModuleLegacy(Pass); ++} ++ ++PreservedAnalyses AutoTuningCompileModulePass::run(Module &M, ++ ModuleAnalysisManager &) { ++ AutoTuningCompileModule Impl(Pass); ++ Impl.run(M); ++ return PreservedAnalyses::all(); ++} ++ ++AutoTuningCompileFunction::AutoTuningCompileFunction(std::string Pass) { ++ this->Pass = Pass; ++} ++ ++void AutoTuningCompileFunction::writeIRFiles(Module &M) { ++ if (IsModuleWritten) ++ return; ++ IsModuleWritten = true; ++ writeFiles(M, Pass); ++} ++ ++bool AutoTuningCompileFunction::modifyCompilationPipeline(Function &F) { ++ bool Changed = false; ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " ++ "optimization of module/functions. Pass: " ++ << Pass << '\n'); ++ Module *M = F.getParent(); ++ StringRef Filename = M->getName(); ++ size_t Pos = Filename.rfind(".ll"); ++ if (Pos == StringRef::npos) { ++ errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " ++ "Disabling incremental compilation.\n"; ++ AutoTuningCompileMode = Inactive; ++ return Changed; ++ } ++ Filename = Filename.substr(0, Pos); ++ ++ switch (AutoTuningCompileMode) { ++ case Basic: ++ case CoarseGrain: ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " ++ "Basic/CoarseGrain incremental compilation mode.\n"); ++ break; ++ case FineGrain: { ++ if (!AutoTuningCompileModule::getSkipCompilation() && ++ Pass == autotuning::CompileOptionStart) { ++ if (!SkipDecision) { ++ M->getContext().setOptPassGate(SkipPasses); ++ getAutoTuningOptPassGate().setSkip(true); ++ SkipDecision = true; ++ } ++ AutoTuningCompileModule::setSkipCompilation(true); ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); ++ } else if (AutoTuningCompileModule::getSkipCompilation() && ++ Pass != autotuning::CompileOptionStart && ++ (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || ++ Pass == autotuning::CompileOptionEnd)) { ++ M->getContext().setOptPassGate(RunPasses); ++ getAutoTuningOptPassGate().setSkip(false); ++ SkipDecision = false; ++ AutoTuningCompileModule::setSkipCompilation(false); ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); ++ } else ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " ++ << (AutoTuningCompileModule::getSkipCompilation() ++ ? "True" ++ : "False") ++ << " ) continued.\n"); ++ ++ Changed = true; ++ break; ++ } ++ default: ++ llvm_unreachable( ++ "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); ++ } ++ ++ return Changed; ++} ++ ++bool AutoTuningCompileFunction::run(Function &F) { ++ bool Changed = false; ++ if (AutoTuningCompileMode == Inactive) ++ return Changed; ++ ++ if (!autotuning::Engine.isEnabled()) { ++ LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); ++ return Changed; ++ } ++ ++ writeIRFiles(*F.getParent()); ++ ++ if (autotuning::Engine.isParseInput()) ++ Changed |= modifyCompilationPipeline(F); ++ ++ return Changed; ++} ++ ++AutoTuningCompileFunctionLegacy::AutoTuningCompileFunctionLegacy( ++ std::string Pass) ++ : FunctionPass(AutoTuningCompileFunctionLegacy::ID) { ++ this->Pass = Pass; ++} ++ ++bool AutoTuningCompileFunctionLegacy::runOnFunction(Function &F) { ++ AutoTuningCompileFunction Impl(Pass); ++ return Impl.run(F); ++} ++ ++char AutoTuningCompileFunctionLegacy::ID = 0; ++ ++StringRef AutoTuningCompileFunctionLegacy::getPassName() const { ++ return "AutoTuner Incremental Compilation"; ++} ++ ++INITIALIZE_PASS(AutoTuningCompileFunctionLegacy, "autotuning-compile-function", ++ "AutoTuner Incremental Compilation", false, false) ++ ++// Public interface to the AutoTuningCompile pass ++FunctionPass * ++llvm::createAutoTuningCompileFunctionLegacyPass(std::string Pass) { ++ return new AutoTuningCompileFunctionLegacy(Pass); ++} ++ ++PreservedAnalyses ++AutoTuningCompileFunctionPass::run(Function &F, FunctionAnalysisManager &AM) { ++ AutoTuningCompileFunction Impl(Pass); ++ Impl.run(F); ++ return PreservedAnalyses::all(); ++} ++ ++PreservedAnalyses ++AutoTuningCompileLoopPass::run(Loop &L, LoopAnalysisManager &AM, ++ LoopStandardAnalysisResults &AR, LPMUpdater &U) { ++ AutoTuningCompileFunction Impl(Pass); ++ Function *F = L.getHeader()->getParent(); ++ Impl.run(*F); ++ return PreservedAnalyses::all(); ++} ++ ++AutoTuningOptPassGate &llvm::getAutoTuningOptPassGate() { ++ static AutoTuningOptPassGate AutoTuningGate; ++ return AutoTuningGate; ++} ++ ++#endif +diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt +index eb008c15903a..e5a82ea8f923 100644 +--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt ++++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt +@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts + ADCE.cpp + AlignmentFromAssumptions.cpp + AnnotationRemarks.cpp ++ AutoTuningCompile.cpp + BDCE.cpp + CallSiteSplitting.cpp + ConstantHoisting.cpp +@@ -92,6 +93,7 @@ add_llvm_component_library(LLVMScalarOpts + LINK_COMPONENTS + AggressiveInstCombine + Analysis ++ AutoTuner + Core + InstCombine + Support +diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +index 335b489d3cb2..feb8932eaae7 100644 +--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp ++++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +@@ -66,6 +66,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + using namespace llvm; + +@@ -173,6 +176,10 @@ static cl::opt + cl::desc("Default threshold (max size of unrolled " + "loop), used in all but O3 optimizations")); + ++#if defined(ENABLE_AUTOTUNER) ++static const std::string UnrollCountParamStr = "UnrollCount"; ++#endif ++ + /// A magic value for use with the Threshold parameter to indicate + /// that the loop unroll should be performed regardless of how much + /// code expansion would result. +@@ -893,7 +900,12 @@ bool llvm::computeUnrollCount( + OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, + bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP, ++#if defined(ENABLE_AUTOTUNER) ++ TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound, ++ unsigned int Invocation) { ++#else + TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { ++#endif + + UnrollCostEstimator UCE(*L, LoopSize); + +@@ -942,6 +954,43 @@ bool llvm::computeUnrollCount( + } + } + ++#if defined(ENABLE_AUTOTUNER) ++ // Priority 2.5 is using Unroll Count set by AutoTuner (if enabled). ++ if (autotuning::Engine.isEnabled()) { ++ // Create a code region for current loop. This code region will be added to ++ // opportunity list once all the relevant information is gathered. ++ autotuning::Engine.initContainer(L, DEBUG_TYPE, ++ L->getHeader()->getParent()->getName(), ++ /* addOpportunity */ false, Invocation); ++ ++ int NewValue = 0; // the int value is set by lookUpParams() ++ bool UnrollCountChanged = L->lookUpParams("UnrollCount", NewValue); ++ ++ if (UnrollCountChanged) { ++ // Setting the UP.Count with the value suggested by AutoTuner. ++ // AutoTuner will use UnrollCount = 0, 1, X, Y, Z in case of dynamic ++ // configuration and UnrollCount = 0, 1, 2, 4, 8 otherwise to find ++ // optimal configuration. Compiler will unroll the loop with suggested ++ // UnrollCount except when UnrollCount = 1 where AutoTuner is suggesting ++ // to try loop peeling. ++ UP.Count = NewValue; ++ UP.AllowExpensiveTripCount = true; ++ UP.Force = true; ++ UP.Runtime = true; ++ if (!UP.AllowRemainder && UP.Count != 1) ++ UP.Count = 0; ++ ++ // Check for Loop Peeling ++ if (UP.Count == 1) { ++ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold); ++ UP.Runtime = (PP.PeelCount) ? false : UP.Runtime; ++ } ++ ++ return true; ++ } ++ } ++#endif ++ + // 3rd priority is exact full unrolling. This will eliminate all copies + // of some exit test. + UP.Count = 0; +@@ -1119,6 +1168,59 @@ bool llvm::computeUnrollCount( + return ExplicitUnroll; + } + ++#if defined(ENABLE_AUTOTUNER) ++// Given UnrollingPreferences count (UPCount) and TripCount for CodeRegion ++// CR, compute the dynamic Unroll values for tuning and add it to CR. ++static void ++computeAutoTunerDynamicUnrollOptions(unsigned UPCount, unsigned TripCount, ++ const autotuning::CodeRegion &CR) { ++ std::vector DynamicTuningOptions; ++ unsigned int PotentialTuningOptions[2]; ++ unsigned int Idx = 0; ++ int Count = -1; ++ unsigned int CurrentOption = 2; ++ unsigned int MaxTuningCount = 64; ++ DynamicTuningOptions.push_back(0); ++ // Add LoopPeeling as an additional option. ++ DynamicTuningOptions.push_back(1); ++ if (!UPCount) { ++ TripCount = (TripCount > MaxTuningCount) ? MaxTuningCount : TripCount; ++ unsigned int Limit = (TripCount == 0) ? 8 : TripCount; ++ DynamicTuningOptions.push_back(TripCount ? TripCount : 8); ++ while (CurrentOption < Limit) { ++ PotentialTuningOptions[Idx] = CurrentOption; ++ CurrentOption *= 2; ++ Idx = (Idx + 1) % 2; ++ ++Count; ++ } ++ } else { ++ while (CurrentOption < UPCount) { ++ PotentialTuningOptions[Idx] = CurrentOption; ++ CurrentOption *= 2; ++ Idx = (Idx + 1) % 2; ++ ++Count; ++ } ++ if (TripCount != UPCount) { ++ if (CurrentOption == UPCount) { ++ CurrentOption *= 2; ++ } ++ if (!TripCount || CurrentOption < TripCount) { ++ PotentialTuningOptions[Idx] = CurrentOption; ++ ++Count; ++ } ++ } ++ if (UPCount != 1) ++ DynamicTuningOptions.push_back(UPCount); ++ } ++ ++ Count = std::min(1, Count); ++ while (Count >= 0) ++ DynamicTuningOptions.push_back(PotentialTuningOptions[Count--]); ++ ++ CR.addAutoTunerOptions("UnrollCount", DynamicTuningOptions); ++} ++#endif ++ + static LoopUnrollResult + tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + const TargetTransformInfo &TTI, AssumptionCache &AC, +@@ -1132,7 +1234,12 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + std::optional ProvidedUpperBound, + std::optional ProvidedAllowPeeling, + std::optional ProvidedAllowProfileBasedPeeling, ++#if defined(ENABLE_AUTOTUNER) ++ std::optional ProvidedFullUnrollMaxCount, ++ unsigned int Invocation = 0) { ++#else + std::optional ProvidedFullUnrollMaxCount) { ++#endif + + LLVM_DEBUG(dbgs() << "Loop Unroll: F[" + << L->getHeader()->getParent()->getName() << "] Loop %" +@@ -1276,11 +1383,28 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + // computeUnrollCount() decides whether it is beneficial to use upper bound to + // fully unroll the loop. + bool UseUpperBound = false; ++ ++#if defined(ENABLE_AUTOTUNER) ++ bool IsCountSetExplicitly = computeUnrollCount( ++ L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, ++ MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound, Invocation); ++ const autotuning::CodeRegion CR = L->getCodeRegion(); ++ // computeAutoTunerDynamicUnrollOptions() adds the dynamic Unroll values to ++ // the CodeRegion. ++ computeAutoTunerDynamicUnrollOptions(UP.Count, TripCount, CR); ++ ++ if (!UP.Count) { ++ autotuning::Engine.addOpportunity( ++ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); ++ return LoopUnrollResult::Unmodified; ++ } ++#else + bool IsCountSetExplicitly = computeUnrollCount( + L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, + TripMultiple, LoopSize, UP, PP, UseUpperBound); + if (!UP.Count) + return LoopUnrollResult::Unmodified; ++#endif + + if (PP.PeelCount) { + assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step"); +@@ -1300,8 +1424,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + // we had, so we don't want to unroll or peel again. + if (PP.PeelProfiledIterations) + L->setLoopAlreadyUnrolled(); ++#if defined(ENABLE_AUTOTUNER) ++ autotuning::Engine.addOpportunity( ++ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); ++ return LoopUnrollResult::PartiallyUnrolled; ++ } ++ autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); ++#else + return LoopUnrollResult::PartiallyUnrolled; + } ++#endif + return LoopUnrollResult::Unmodified; + } + +@@ -1329,8 +1461,18 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, + UP.UnrollRemainder, ForgetAllSCEV}, + LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); ++ ++#if defined(ENABLE_AUTOTUNER) ++ if (UnrollResult == LoopUnrollResult::Unmodified) { ++ autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); ++ return LoopUnrollResult::Unmodified; ++ } ++ autotuning::Engine.addOpportunity( ++ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); ++#else + if (UnrollResult == LoopUnrollResult::Unmodified) + return LoopUnrollResult::Unmodified; ++#endif + + if (RemainderLoop) { + std::optional RemainderLoopID = +@@ -1379,6 +1521,20 @@ public: + /// Otherwise, forgetAllLoops and rebuild when needed next. + bool ForgetAllSCEV; + ++#if defined(ENABLE_AUTOTUNER) ++private: ++ // 'InvocationCounter' keeps track of Invocation of Loop Unroll Pass and ++ // assign it to 'Invocation'. So each LoopUnroll Object knows when it is ++ // being invoked during optimization pipeline. It is used to identify the ++ // Invocation of a pass if it is invoked multiple times. AutoTuner will use ++ // this information to generate the Code Regions and apply the suggested ++ // configuration during the correct invocation of the Loop Unroll Pass. ++ static unsigned int InvocationCounter; ++ unsigned int Invocation; ++ ++public: ++#endif ++ + std::optional ProvidedCount; + std::optional ProvidedThreshold; + std::optional ProvidedAllowPartial; +@@ -1405,6 +1561,9 @@ public: + ProvidedAllowPeeling(AllowPeeling), + ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), + ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { ++#if defined(ENABLE_AUTOTUNER) ++ Invocation = InvocationCounter++; ++#endif + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } + +@@ -1431,7 +1590,12 @@ public: + /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, + ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, + ProvidedUpperBound, ProvidedAllowPeeling, ++#if defined(ENABLE_AUTOTUNER) ++ ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount, ++ Invocation); ++#else + ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); ++#endif + + if (Result == LoopUnrollResult::FullyUnrolled) + LPM.markLoopAsDeleted(*L); +@@ -1449,6 +1613,9 @@ public: + getLoopAnalysisUsage(AU); + } + }; ++#if defined(ENABLE_AUTOTUNER) ++unsigned int LoopUnroll::InvocationCounter = 0; ++#endif + + } // end anonymous namespace + +@@ -1496,6 +1663,11 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, + + std::string LoopName = std::string(L.getName()); + ++#if defined(ENABLE_AUTOTUNER) ++ // LoopFullUnrollPass will be invoked first during optimization pipeline. ++ unsigned int Invocation = 0; ++#endif ++ + bool Changed = + tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, +@@ -1505,7 +1677,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, + /*Runtime*/ false, /*UpperBound*/ false, + /*AllowPeeling*/ true, + /*AllowProfileBasedPeeling*/ false, ++#if defined(ENABLE_AUTOTUNER) ++ /*FullUnrollMaxCount*/ std::nullopt, ++ /*Invocation*/ Invocation) != ++#else + /*FullUnrollMaxCount*/ std::nullopt) != ++#endif + LoopUnrollResult::Unmodified; + if (!Changed) + return PreservedAnalyses::all(); +@@ -1588,6 +1765,11 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, + + bool Changed = false; + ++#if defined(ENABLE_AUTOTUNER) ++ // LoopUnrollPass will be invoked second during optimization pipeline. ++ unsigned int Invocation = 1; ++#endif ++ + // The unroller requires loops to be in simplified form, and also needs LCSSA. + // Since simplification may add new inner loops, it has to run before the + // legality and profitability checks. This means running the loop unroller +@@ -1630,7 +1812,12 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, + /*Count*/ std::nullopt, + /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, + UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, ++#if defined(ENABLE_AUTOTUNER) ++ UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount, ++ Invocation); ++#else + UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); ++#endif + Changed |= Result != LoopUnrollResult::Unmodified; + + // The parent must not be damaged by unrolling! +diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp +index 37b032e4d7c7..4b140e8d600b 100644 +--- a/llvm/lib/Transforms/Scalar/Scalar.cpp ++++ b/llvm/lib/Transforms/Scalar/Scalar.cpp +@@ -64,4 +64,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { + initializeStraightLineStrengthReduceLegacyPassPass(Registry); + initializePlaceBackedgeSafepointsLegacyPassPass(Registry); + initializeLoopSimplifyCFGLegacyPassPass(Registry); ++#if defined(ENABLE_AUTOTUNER) ++ initializeAutoTuningCompileFunctionLegacyPass(Registry); ++ initializeAutoTuningCompileModuleLegacyPass(Registry); ++#endif + } +diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp +index 8b99f73b850b..b3c60686e252 100644 +--- a/llvm/lib/Transforms/Scalar/Sink.cpp ++++ b/llvm/lib/Transforms/Scalar/Sink.cpp +@@ -248,6 +248,11 @@ namespace { + } + + bool runOnFunction(Function &F) override { ++#if defined(ENABLE_AUTOTUNER) ++ if (skipFunction(F)) ++ return false; ++#endif ++ + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &AA = getAnalysis().getAAResults(); +diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt +index a870071f3f64..8616e7b923c0 100644 +--- a/llvm/lib/Transforms/Utils/CMakeLists.txt ++++ b/llvm/lib/Transforms/Utils/CMakeLists.txt +@@ -93,6 +93,7 @@ add_llvm_component_library(LLVMTransformUtils + + LINK_COMPONENTS + Analysis ++ AutoTuner + Core + Support + TargetParser +diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp +index c36b0533580b..20a4edcb29db 100644 +--- a/llvm/lib/Transforms/Utils/LCSSA.cpp ++++ b/llvm/lib/Transforms/Utils/LCSSA.cpp +@@ -491,6 +491,11 @@ char &llvm::LCSSAID = LCSSAWrapperPass::ID; + + /// Transform \p F into loop-closed SSA form. + bool LCSSAWrapperPass::runOnFunction(Function &F) { ++#if defined(ENABLE_AUTOTUNER) ++ if (skipFunction(F)) ++ return false; ++#endif ++ + LI = &getAnalysis().getLoopInfo(); + DT = &getAnalysis().getDomTree(); + auto *SEWP = getAnalysisIfAvailable(); +diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp +index 3e604fdf2e11..2e42e7f1397f 100644 +--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp ++++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp +@@ -69,6 +69,9 @@ + #include "llvm/Transforms/Utils/BasicBlockUtils.h" + #include "llvm/Transforms/Utils/Local.h" + #include "llvm/Transforms/Utils/LoopUtils.h" ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + using namespace llvm; + + #define DEBUG_TYPE "loop-simplify" +@@ -793,6 +796,11 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + /// it in any convenient order) inserting preheaders... + /// + bool LoopSimplify::runOnFunction(Function &F) { ++#if defined(ENABLE_AUTOTUNER) ++ if (autotuning::Engine.isEnabled() && skipFunction(F)) ++ return false; ++#endif ++ + bool Changed = false; + LoopInfo *LI = &getAnalysis().getLoopInfo(); + DominatorTree *DT = &getAnalysis().getDomTree(); +diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp +index 511dd61308f9..2d2c3e50514b 100644 +--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp ++++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp +@@ -69,6 +69,9 @@ + #include + #include + #include ++#if defined(ENABLE_AUTOTUNER) ++#include "llvm/AutoTuner/AutoTuning.h" ++#endif + + namespace llvm { + class DataLayout; +diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt +index 998dfd956575..f2c5c04abb13 100644 +--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt ++++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt +@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMVectorize + + LINK_COMPONENTS + Analysis ++ AutoTuner + Core + Support + TransformUtils +diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +index f923f0be6621..f13ce6853666 100644 +--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp ++++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +@@ -113,6 +113,18 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, + // Populate values with existing loop metadata. + getHintsFromMetadata(); + ++#if defined(ENABLE_AUTOTUNER) ++ if (autotuning::Engine.isEnabled()) { ++ int NewValue = 0; ++ bool VectorizationInterleaveChanged = ++ L->lookUpParams("VectorizationInterleave", NewValue); ++ ++ if (VectorizationInterleaveChanged) { ++ Interleave.Value = NewValue; ++ } ++ } ++#endif ++ + // force-vector-interleave overrides DisableInterleaving. + if (VectorizerParams::isInterleaveForced()) + Interleave.Value = VectorizerParams::VectorizationInterleave; +diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +index b603bbe55dc9..46fab860f5a3 100644 +--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp ++++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +@@ -10178,6 +10178,22 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) + VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced || + !EnableLoopVectorization) {} + ++#if defined(ENABLE_AUTOTUNER) ++// Given the iterleave count (IC) and CR, compute the dynamic values for ++// interleave count. Then add it to CR. ++static void ++computeAutoTunerDynamicInterleaveOptions(unsigned IC, ++ const autotuning::CodeRegion &CR) { ++ ++ std::vector AutoTunerOptions{1, 2, 4}; ++ if (std::find(AutoTunerOptions.begin(), AutoTunerOptions.end(), IC) == ++ AutoTunerOptions.end()) ++ AutoTunerOptions[2] = IC; ++ ++ CR.addAutoTunerOptions("VectorizationInterleave", AutoTunerOptions); ++} ++#endif ++ + bool LoopVectorizePass::processLoop(Loop *L) { + assert((EnableVPlanNativePath || L->isInnermost()) && + "VPlan-native path is not enabled. Only process inner loops."); +@@ -10190,6 +10206,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { + << L->getHeader()->getParent()->getName() << "' from " + << DebugLocStr << "\n"); + ++#if defined(ENABLE_AUTOTUNER) ++ // Initialize the loop for auto-tuning but do not add it ++ // as an tuning opportunity yet. ++ autotuning::Engine.initContainer( ++ L, LV_NAME, L->getHeader()->getParent()->getName(), false); ++#endif + LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); + + LLVM_DEBUG( +@@ -10422,6 +10444,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { + InterleaveLoop = false; + } + ++#if defined(ENABLE_AUTOTUNER) ++ if (!VectorizerParams::isInterleaveForced()) { ++ // Compute the dynamic values for VectorizationInterleave and add it to the ++ // CodeRegion. ++ computeAutoTunerDynamicInterleaveOptions(IC, L->getCodeRegion()); ++ ++ // Add the current loop as a tuning opportunity explicitly. ++ autotuning::Engine.addOpportunity( ++ L->getCodeRegion(), {{"VectorizationInterleave", std::to_string(IC)}}); ++ } ++#endif ++ + // Override IC if user provided an interleave count. + IC = UserIC > 0 ? UserIC : IC; + +diff --git a/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml +new file mode 100644 +index 000000000000..f483a269906a +--- /dev/null ++++ b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml +@@ -0,0 +1,8 @@ ++--- !AutoTuning ++Pass: loop-unroll ++Name: [name] ++Function: foo ++CodeRegionType: loop ++Args: ++ - UnrollCount: [number] ++... +diff --git a/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll +new file mode 100644 +index 000000000000..ceb9b4fb2ca6 +--- /dev/null ++++ b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll +@@ -0,0 +1,65 @@ ++; UNSUPPORTED: windows ++; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' \ ++; RUN: %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml ++; RUN: opt --disable-output %s -S -passes='require' \ ++; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 ++; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s ++; RUN: rm -rf %T/../autotune_datadir/* ++ ++; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml ++; RUN: opt %s -S -passes='require' -auto-tuning-config-id=1 ++; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s ++; RUN: rm -rf %T/../autotune_datadir/* ++ ++; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml ++; RUN: opt %s -S -passes='require' -enable-autotuning-dump ++; RUN: echo -n %T/../autotune_datadir/IR_files/ > %t.filename ++; RUN: echo -n "create-data-dir.ll/" >> %t.filename ++; RUN: echo -n %s | sed 's#/#_#g' >> %t.filename ++; RUN: echo -n ".ll" >> %t.filename ++; RUN: cat %t.filename | xargs cat | FileCheck %s ++; RUN: rm -rf %T/../autotune_datadir ++ ++; ModuleID = 'search.c' ++source_filename = "search.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++; Function Attrs: argmemonly nofree norecurse nosync nounwind readonly uwtable ++define dso_local i32 @search(ptr nocapture noundef readonly %Arr, i32 noundef %Value, i32 noundef %Size) { ++entry: ++ %cmp5 = icmp sgt i32 %Size, 0 ++ br i1 %cmp5, label %for.body.preheader, label %for.end ++ ++for.body.preheader: ; preds = %entry ++ %wide.trip.count = zext i32 %Size to i64 ++ br label %for.body ++ ++for.body: ; preds = %for.body.preheader, %for.inc ++ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] ++ %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv ++ %0 = load i32, ptr %arrayidx, align 4 ++ %cmp1 = icmp eq i32 %0, %Value ++ br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc ++ ++for.inc: ; preds = %for.body ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count ++ br i1 %exitcond.not, label %for.end, label %for.body ++ ++for.end.loopexit.split.loop.exit: ; preds = %for.body ++ %1 = trunc i64 %indvars.iv to i32 ++ br label %for.end ++ ++for.end: ; preds = %for.inc, %for.end.loopexit.split.loop.exit, %entry ++ %Idx.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit.split.loop.exit ], [ %Size, %for.inc ] ++ ret i32 %Idx.0.lcssa ++} ++ ++; Check that only loop body is inside the IR File. ++; CHECK-LABEL: for.body: ; preds = ++; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] ++; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv ++; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 ++; CHECK-NEXT: %cmp1 = icmp eq i32 %0, %Value ++; CHECK-NEXT: br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc +diff --git a/llvm/test/AutoTuning/AutotuningDump/unroll.ll b/llvm/test/AutoTuning/AutotuningDump/unroll.ll +new file mode 100644 +index 000000000000..e8243da55fff +--- /dev/null ++++ b/llvm/test/AutoTuning/AutotuningDump/unroll.ll +@@ -0,0 +1,35 @@ ++; RUN: rm -rf %T.tmp/Output ++; RUN: mkdir -p %T.tmp/Output ++; RUN: rm %t.DEFAULT.yaml -rf ++; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml ++; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require' \ ++; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 ++; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require' \ ++; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=2 ++; RUN: cat %T.tmp/Output/unroll.ll/1.ll | FileCheck %s -check-prefix=DEFAULT ++; RUN: cat %T.tmp/Output/unroll.ll/2.ll | FileCheck %s -check-prefix=DEFAULT ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++for.end: ; preds = %for.body ++ ret void ++} ++; Check that only loop body is inside the IR File. ++; DEFAULT-LABEL: for.body: ; preds = %for.body, %entry ++; DEFAULT-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++; DEFAULT-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv ++; DEFAULT: %exitcond = icmp eq i64 %indvars.iv.next, 64 ++; DEFAULT: br i1 %exitcond, label %for.end, label %for.body ++ ++; RUN: rm -rf %T.tmp/Output +diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml +new file mode 100644 +index 000000000000..a5e669c17a71 +--- /dev/null ++++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml +@@ -0,0 +1,9 @@ ++!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421, ++ CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, ++ Pass: loop-unroll} ++--- !AutoTuning {Args: [{VectorizationInterleave: 2}], ++ CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, ++ Invocation: 0, Name: for.body, Pass: loop-vectorize} ++--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, ++ CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, ++ Pass: loop-unroll} +diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml +new file mode 100644 +index 000000000000..738cf55ffe9a +--- /dev/null ++++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml +@@ -0,0 +1,9 @@ ++!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 12835463591102937421, ++ CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, ++ Pass: loop-unroll} ++--- !AutoTuning {Args: [{VectorizationInterleave: 2}], ++ CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, ++ Invocation: 0, Name: for.body, Pass: loop-vectorize} ++--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, ++ CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, ++ Pass: loop-unroll} +diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll +new file mode 100644 +index 000000000000..667a076b2d23 +--- /dev/null ++++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll +@@ -0,0 +1,117 @@ ++; ModuleID = 'test.c' ++source_filename = "test.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++@.str = private unnamed_addr constant [12 x i8] c"tmp <= 10.0\00", align 1 ++@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 ++@__PRETTY_FUNCTION__.test = private unnamed_addr constant [12 x i8] c"void test()\00", align 1 ++ ++; Function Attrs: nounwind uwtable ++define dso_local void @test() #0 { ++entry: ++ %cs = alloca i32, align 4 ++ %flush = alloca ptr, align 8 ++ %i = alloca i32, align 4 ++ %tmp = alloca double, align 8 ++ call void @llvm.lifetime.start.p0(i64 4, ptr %cs) #5 ++ store i32 16431360, ptr %cs, align 4, !tbaa !6 ++ call void @llvm.lifetime.start.p0(i64 8, ptr %flush) #5 ++ %0 = load i32, ptr %cs, align 4, !tbaa !6 ++ %conv = sext i32 %0 to i64 ++ %call = call noalias ptr @calloc(i64 noundef %conv, i64 noundef 8) #6 ++ store ptr %call, ptr %flush, align 8, !tbaa !10 ++ call void @llvm.lifetime.start.p0(i64 4, ptr %i) #5 ++ call void @llvm.lifetime.start.p0(i64 8, ptr %tmp) #5 ++ store double 0.000000e+00, ptr %tmp, align 8, !tbaa !12 ++ store i32 0, ptr %i, align 4, !tbaa !6 ++ br label %for.cond ++ ++for.cond: ; preds = %for.inc, %entry ++ %1 = load i32, ptr %i, align 4, !tbaa !6 ++ %2 = load i32, ptr %cs, align 4, !tbaa !6 ++ %cmp = icmp slt i32 %1, %2 ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond ++ %3 = load ptr, ptr %flush, align 8, !tbaa !10 ++ %4 = load i32, ptr %i, align 4, !tbaa !6 ++ %idxprom = sext i32 %4 to i64 ++ %arrayidx = getelementptr inbounds double, ptr %3, i64 %idxprom ++ %5 = load double, ptr %arrayidx, align 8, !tbaa !12 ++ %6 = load double, ptr %tmp, align 8, !tbaa !12 ++ %add = fadd double %6, %5 ++ store double %add, ptr %tmp, align 8, !tbaa !12 ++ br label %for.inc ++ ++for.inc: ; preds = %for.body ++ %7 = load i32, ptr %i, align 4, !tbaa !6 ++ %inc = add nsw i32 %7, 1 ++ store i32 %inc, ptr %i, align 4, !tbaa !6 ++ br label %for.cond, !llvm.loop !14 ++ ++for.end: ; preds = %for.cond ++ %8 = load double, ptr %tmp, align 8, !tbaa !12 ++ %cmp2 = fcmp ole double %8, 1.000000e+01 ++ br i1 %cmp2, label %if.then, label %if.else ++ ++if.then: ; preds = %for.end ++ br label %if.end ++ ++if.else: ; preds = %for.end ++ call void @__assert_fail(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 11, ptr noundef @__PRETTY_FUNCTION__.test) #7 ++ unreachable ++ ++if.end: ; preds = %if.then ++ %9 = load ptr, ptr %flush, align 8, !tbaa !10 ++ call void @free(ptr noundef %9) #5 ++ call void @llvm.lifetime.end.p0(i64 8, ptr %tmp) #5 ++ call void @llvm.lifetime.end.p0(i64 4, ptr %i) #5 ++ call void @llvm.lifetime.end.p0(i64 8, ptr %flush) #5 ++ call void @llvm.lifetime.end.p0(i64 4, ptr %cs) #5 ++ ret void ++} ++ ++; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) ++declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 ++ ++; Function Attrs: nounwind allocsize(0,1) ++declare noalias ptr @calloc(i64 noundef, i64 noundef) #2 ++ ++; Function Attrs: noreturn nounwind ++declare void @__assert_fail(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #3 ++ ++; Function Attrs: nounwind ++declare void @free(ptr noundef) #4 ++ ++; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) ++declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 ++ ++attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } ++attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ++attributes #2 = { nounwind allocsize(0,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } ++attributes #3 = { noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } ++attributes #4 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } ++attributes #5 = { nounwind } ++attributes #6 = { nounwind allocsize(0,1) } ++attributes #7 = { noreturn nounwind } ++ ++!llvm.module.flags = !{!0, !1, !2, !3, !4} ++!llvm.ident = !{!5} ++ ++!0 = !{i32 1, !"wchar_size", i32 4} ++!1 = !{i32 8, !"PIC Level", i32 2} ++!2 = !{i32 7, !"PIE Level", i32 2} ++!3 = !{i32 7, !"uwtable", i32 2} ++!4 = !{i32 7, !"frame-pointer", i32 1} ++!5 = !{!"Huawei BiSheng Compiler clang version 18.0.0 (ssh://git@codehub-dg-y.huawei.com:2222/CompilerKernel/BiShengKernel/BiSheng.git 026024071a7fb66b26b65fb81da702cc5f0cf405)"} ++!6 = !{!7, !7, i64 0} ++!7 = !{!"int", !8, i64 0} ++!8 = !{!"omnipotent char", !9, i64 0} ++!9 = !{!"Simple C/C++ TBAA"} ++!10 = !{!11, !11, i64 0} ++!11 = !{!"any pointer", !8, i64 0} ++!12 = !{!13, !13, i64 0} ++!13 = !{!"double", !8, i64 0} ++!14 = distinct !{!14, !15} ++!15 = !{!"llvm.loop.mustprogress"} +diff --git a/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll +new file mode 100644 +index 000000000000..f905208a2f3b +--- /dev/null ++++ b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll +@@ -0,0 +1,11 @@ ++; The purpose is to test the baseline IR is the same as the 1st iteration of ++; autotuning process with --use-baseline-config enabled. ++; RUN: rm %t.baseline %t.firstIt -f ++; RUN: opt -O3 %S/Inputs/test.ll -o %t.baseline ++; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_baseline \ ++; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/baseline_config.yaml ++; RUN: cmp %t.firstIt_baseline %t.baseline ++ ++; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_random \ ++; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/random_config.yaml ++; RUN: not cmp %t.firstIt_random %t.baseline +diff --git a/llvm/test/AutoTuning/BaselineConfig/opp.ll b/llvm/test/AutoTuning/BaselineConfig/opp.ll +new file mode 100644 +index 000000000000..b2897316fc22 +--- /dev/null ++++ b/llvm/test/AutoTuning/BaselineConfig/opp.ll +@@ -0,0 +1,67 @@ ++; REQUIRES: asserts ++; RUN: rm %t.callsite_opp -rf ++; RUN: opt %s -O3 -debug-only=inline -disable-output -S 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=DEFAULT ++; RUN: opt %s -O3 -auto-tuning-opp=%t.callsite_opp -disable-output -S 2>&1 ++; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=AUTOTUNE ++ ++@a = global i32 4 ++ ++; Function Attrs: nounwind readnone uwtable ++define i32 @simpleFunction(i32 %a) #0 { ++entry: ++ call void @extern() ++ %a1 = load volatile i32, i32* @a ++ %x1 = add i32 %a1, %a1 ++ %a2 = load volatile i32, i32* @a ++ %x2 = add i32 %x1, %a2 ++ %a3 = load volatile i32, i32* @a ++ %x3 = add i32 %x2, %a3 ++ %a4 = load volatile i32, i32* @a ++ %x4 = add i32 %x3, %a4 ++ %a5 = load volatile i32, i32* @a ++ %x5 = add i32 %x4, %a5 ++ %a6 = load volatile i32, i32* @a ++ %x6 = add i32 %x5, %a6 ++ %a7 = load volatile i32, i32* @a ++ %x7 = add i32 %x6, %a6 ++ %a8 = load volatile i32, i32* @a ++ %x8 = add i32 %x7, %a8 ++ %a9 = load volatile i32, i32* @a ++ %x9 = add i32 %x8, %a9 ++ %a10 = load volatile i32, i32* @a ++ %x10 = add i32 %x9, %a10 ++ %a11 = load volatile i32, i32* @a ++ %x11 = add i32 %x10, %a11 ++ %a12 = load volatile i32, i32* @a ++ %x12 = add i32 %x11, %a12 ++ %add = add i32 %x12, %a ++ ret i32 %add ++} ++ ++; Function Attrs: nounwind readnone uwtable ++define i32 @bar(i32 %a) #0 { ++entry: ++ %0 = tail call i32 @simpleFunction(i32 6) ++ ret i32 %0 ++} ++ ++declare void @extern() ++ ++attributes #0 = { nounwind readnone uwtable } ++attributes #1 = { nounwind cold readnone uwtable } ++ ++ ++; NOTE: Need to make sure the function inling have the same behaviour as O3 and ++; 'BaselineConfig' ++; DEFAULT: Inlining calls in: bar ++; DEFAULT: Inlining (cost=115, threshold=375), Call: %0 = tail call i32 @simpleFunction(i32 6) ++ ++; AUTOTUNE: Pass: inline ++; AUTOTUNE-NEXT: Name: simpleFunction ++; AUTOTUNE-NEXT: Function: bar ++; AUTOTUNE-NEXT: CodeRegionType: callsite ++; AUTOTUNE-NEXT: CodeRegionHash: {{[0-9]+}} ++; AUTOTUNE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } ++; AUTOTUNE-NEXT: BaselineConfig: { ForceInline: '1' } ++; AUTOTUNE-NEXT: Invocation: 0 +diff --git a/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll +new file mode 100644 +index 000000000000..13acafae6fc4 +--- /dev/null ++++ b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll +@@ -0,0 +1,62 @@ ++; REQUIRES: asserts ++ ++; RUN: rm -rf %t.filter ++; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ ++; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop --disable-output ++; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=DEFAULT ++ ++; RUN: rm -rf %t.filter ++; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ ++; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ ++; RUN: -auto-tuning-function-filter=foo --disable-output ++; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_FOO ++ ++; RUN: rm -rf %t.filter ++; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ ++; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ ++; RUN: -auto-tuning-function-filter=bar --disable-output ++; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_BAR ++ ++; RUN: rm -rf %t.filter ++; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ ++; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ ++; RUN: -auto-tuning-function-filter=dummy -debug-only=autotuning | \ ++; RUN: FileCheck %s -check-prefix=FILTER_DUMMY ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++define void @bar(i32* nocapture %a) { ++entry: ++ call void @foo(i32* %a) ++ ret void ++} ++ ++; DEFAULT: --- !AutoTuning ++; DEFAULT: --- !AutoTuning ++ ++; FILTER_FOO: --- !AutoTuning ++; FILTER_FOO: Function: foo ++; FILTER_FOO-NOT: --- !AutoTuning ++ ++; FILTER_BAR: --- !AutoTuning ++; FILTER_BAR: Function: bar ++; FILTER_BAR-NOT: --- !AutoTuning ++ ++; FILTER_DUMMY-NOT: --- !AutoTuning ++; FILTER_DUMMY-NOT: --- !AutoTuning +diff --git a/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml +new file mode 100644 +index 000000000000..9c203e58f0ab +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml +@@ -0,0 +1,3 @@ ++ ++ this is a xml file ++ +diff --git a/llvm/test/AutoTuning/Error/Inputs/template.yaml b/llvm/test/AutoTuning/Error/Inputs/template.yaml +new file mode 100644 +index 000000000000..1f02b52ffb38 +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/Inputs/template.yaml +@@ -0,0 +1,10 @@ ++--- !AutoTuning ++Pass: pass ++Name: for.body ++Function: foo ++CodeRegionType: loop ++CodeRegionHash: 0 ++Args: ++ - UnrollCount: 2 ++ - PassOrder: [test, test2] ++... +diff --git a/llvm/test/AutoTuning/Error/file-not-found-error.ll b/llvm/test/AutoTuning/Error/file-not-found-error.ll +new file mode 100644 +index 000000000000..6a364239a271 +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/file-not-found-error.ll +@@ -0,0 +1,29 @@ ++; RUN: rm %t.non-existing.yaml -rf ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.non-existing.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR ++ ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++; check if error massage is shown properly when input yaml is not found ++; ++; ERROR: Error parsing auto-tuning input. ++; ERROR: No such file or directory +diff --git a/llvm/test/AutoTuning/Error/invalid-yaml-error.ll b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll +new file mode 100644 +index 000000000000..bfc8784c4ea4 +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll +@@ -0,0 +1,27 @@ ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%S/Inputs/invalid-format.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR ++ ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++; check if error massage is shown properly when input yaml is in invalid format ++; ++; ERROR: error: YAML:1:1: error: document root is not of mapping type. +diff --git a/llvm/test/AutoTuning/Error/malformed-input-error.ll b/llvm/test/AutoTuning/Error/malformed-input-error.ll +new file mode 100644 +index 000000000000..0b73c3195503 +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/malformed-input-error.ll +@@ -0,0 +1,136 @@ ++; Check if error messages are shown properly for malformed YAML files. ++ ++; Missing Pass Field ++; RUN: rm %t.missing-pass.yaml -rf ++; RUN: sed 's#Pass: pass##g' %S/Inputs/template.yaml > %t.missing-pass.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-pass.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-FIELD ++ ++; Missing Pass Value ++; RUN: rm %t.missing-value-pass.yaml -rf ++; RUN: sed 's#pass##g' %S/Inputs/template.yaml > %t.missing-value-pass.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-value-pass.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-PASS-VALUE ++ ++; Missing Name Field ++; RUN: rm %t.missing-name.yaml -rf ++; RUN: sed 's#Name: for.body##g' %S/Inputs/template.yaml > %t.missing-name.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-name.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-NAME-FIELD ++ ++; Missing Name Value ++; RUN: rm %t.missing-value-name.yaml -rf ++; RUN: sed 's#for.body##g' %S/Inputs/template.yaml > %t.missing-value-name.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-value-name.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-NAME-VALUE ++ ++; Missing Function Field ++; RUN: rm %t.missing-function.yaml -rf ++; RUN: sed 's#Function: foo##g' %S/Inputs/template.yaml > %t.missing-function.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' -auto-tuning-input=%t.missing-function.yaml 2>&1 | FileCheck %s -check-prefix=ERROR-FUNCTION-FIELD ++ ++; Missing Function Value ++; RUN: rm %t.missing-value-func.yaml -rf ++; RUN: sed 's#foo##g' %S/Inputs/template.yaml > %t.missing-value-func.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-value-func.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-FUNC-VALUE ++ ++; Missing CodeRegionType Field ++; RUN: rm %t.missing-type.yaml -rf ++; RUN: sed 's#CodeRegionType: loop##g' %S/Inputs/template.yaml > %t.missing-type.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-type.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-FIELD ++ ++; Missing CodeRegionType Value ++; RUN: rm %t.missing-value-type.yaml -rf ++; RUN: sed 's#loop##g' %S/Inputs/template.yaml > %t.missing-value-type.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-value-type.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-VALUE ++ ++; Invalid CodeRegionType Value ++; RUN: rm %t.invalid-value-type.yaml -rf ++; RUN: sed 's#loop#error-type#g' %S/Inputs/template.yaml > %t.invalid-value-type.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.invalid-value-type.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-INVALID ++ ++; Missing Param Name ++; RUN: rm %t.missing-param-name.yaml -rf ++; RUN: sed 's#UnrollCount##g' %S/Inputs/template.yaml > %t.missing-param-name.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-param-name.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-PARAM-NAME ++ ++; Missing Param Value ++; RUN: rm %t.missing-value-param.yaml -rf ++; RUN: sed 's#2##g' %S/Inputs/template.yaml > %t.missing-value-param.yaml ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.missing-value-param.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=ERROR-PARAM-VALUE ++ ++; Empty Param List ++; RUN: rm %t.empty-value-param-list.yaml -rf ++; RUN: sed 's#\[test, test2\]#\[\]#g' %S/Inputs/template.yaml > %t.empty-value-param-list.yaml ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.empty-value-param-list.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=VALID ++ ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++; check if error massage is shown properly for malformed YAML input files. ++; ++ ++; ERROR-FIELD: error: CodeRegionHash, CodeRegionType, or Pass missing. ++ ++; ERROR-NAME-FIELD: error: Remark Name expected; enable -autotuning-omit-metadata. ++ ++; ERROR-FUNCTION-FIELD: error: Remark Function Name expected; enable -autotuning-omit-metadata. ++ ++; ERROR-PASS-VALUE: error: YAML:2:1: error: expected a value of scalar type. ++; ERROR-PASS-VALUE: Pass: ++ ++; ERROR-NAME-VALUE: error: YAML:3:1: error: expected a value of scalar type. ++; ERROR-NAME-VALUE: Name: ++ ++; ERROR-FUNC-VALUE: error: YAML:4:1: error: expected a value of scalar type. ++; ERROR-FUNC-VALUE: Function: ++ ++; ERROR-CODE-REGION-TYPE-FIELD: CodeRegionHash, CodeRegionType, or Pass missing. ++ ++; ERROR-CODE-REGION-TYPE-VALUE: error: YAML:5:1: error: expected a value of scalar type. ++; ERROR-CODE-REGION-TYPE-VALUE: CodeRegionType: ++ ++; ERROR-CODE-REGION-TYPE-INVALID: Unsupported CodeRegionType:error-type ++ ++; ERROR-PARAM-NAME: error: YAML:8:5: error: argument key is missing. ++; ERROR-PARAM-NAME: - : 2 ++ ++; ERROR-PARAM-VALUE: error: YAML:8:5: error: expected a value of scalar type. ++; ERROR-PARAM-VALUE: - UnrollCount: ++ ++; VALID-NOT: -auto-tuning-input=(input file) option failed. +diff --git a/llvm/test/AutoTuning/Error/output-error.ll b/llvm/test/AutoTuning/Error/output-error.ll +new file mode 100644 +index 000000000000..61ffba50924b +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/output-error.ll +@@ -0,0 +1,28 @@ ++; RUN: rm %t.opp -rf; touch %t.opp ++; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-opp=%t.opp 2>&1 | FileCheck %s -check-prefix=ERROR-OPP ++ ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++; check if error massage is shown properly when output files cannot be created ++; ++; ERROR-OPP: Error generating auto-tuning opportunities. ++; ERROR-OPP: error: Not a directory +diff --git a/llvm/test/AutoTuning/Error/valid-input.ll b/llvm/test/AutoTuning/Error/valid-input.ll +new file mode 100644 +index 000000000000..dae90cdbe408 +--- /dev/null ++++ b/llvm/test/AutoTuning/Error/valid-input.ll +@@ -0,0 +1,27 @@ ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%S/Inputs/template.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=VALID ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++; check if error massage is shown properly when the input is valid ++; ++ ++; VALID-NOT: -auto-tuning-input=(input file) option failed. +diff --git a/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml +new file mode 100644 +index 000000000000..a7d390be63e7 +--- /dev/null ++++ b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml +@@ -0,0 +1,9 @@ ++--- !AutoTuning ++Pass: [dummy-pass] ++CodeRegionType: [dummy-type] ++Name: foo ++DebugLoc: { File: [dummy-file], Line: 0, Column: 0 } ++Function: foo ++CodeRegionHash: 0 ++Invocation: 0 ++... +diff --git a/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll +new file mode 100644 +index 000000000000..b9dc81089d40 +--- /dev/null ++++ b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll +@@ -0,0 +1,103 @@ ++; REQUIRES: asserts ++; RUN: rm %t.output -rf ++; RUN: rm %t.inc_compile.yaml -rf ++; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml ++; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml ++; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml ++; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ ++; RUN: -auto-tuning-compile-mode=CoarseGrain -print-after-all \ ++; RUN: -debug-only=autotuning-compile \ ++; RUN: -o %t.output 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=COARSEGRAIN ++ ++; RUN: rm %t.output -rf ++; RUN: rm %t.inc_compile.yaml -rf ++; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml ++; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml ++; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml ++; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ ++; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ ++; RUN: -debug-only=autotuning-compile \ ++; RUN: -o %t.output 2>&1 | \ ++; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-INLINE ++ ++; RUN: rm %t.output -rf ++; RUN: rm %t.inc_compile.yaml -rf ++; RUN: sed 's#\[dummy-pass\]#loop-unroll#g' %S/Inputs/template.yaml > %t.temp.yaml ++; RUN: sed 's#\[dummy-type\]#loop#g' %t.temp.yaml > %t.temp2.yaml ++; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml ++; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ ++; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ ++; RUN: -debug-only=autotuning-compile \ ++; RUN: -o %t.output 2>&1 | \ ++; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-2,FINEGRAIN-UNROLL ++ ++; ModuleID = 'test.c' ++source_filename = "test.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++; Function Attrs: argmemonly nofree norecurse nosync nounwind uwtable ++define dso_local i32 @test(i32* nocapture noundef %a, i32* nocapture noundef readonly %b, i32 noundef %size) local_unnamed_addr #0 { ++entry: ++ %cmp11 = icmp sgt i32 %size, 0 ++ br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup ++ ++for.body.preheader: ; preds = %entry ++ %wide.trip.count = zext i32 %size to i64 ++ br label %for.body ++ ++for.cond.cleanup: ; preds = %for.body, %entry ++ ret i32 undef ++ ++for.body: ; preds = %for.body.preheader, %for.body ++ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %1 = load i32, i32* %arrayidx2, align 4 ++ %add = add nsw i32 %1, %0 ++ store i32 %add, i32* %arrayidx2, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count ++ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body ++} ++ ++attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} ++!llvm.ident = !{!9} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 1, !"branch-target-enforcement", i32 0} ++!6 = !{i32 1, !"sign-return-address", i32 0} ++!7 = !{i32 1, !"sign-return-address-all", i32 0} ++!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} ++!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)"} ++!10 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!11 = !DISubroutineType(types: !2) ++!12 = !DILocation(line: 2, column: 5, scope: !10) ++ ++; COARSEGRAIN: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start ++; COARSEGRAIN-NEXT: AutoTuningCompile: No change in opt pipeline for Basic/CoarseGrain incremental compilation mode. ++; COARSEGRAIN-NOT: Skip pass {{.*}}: True ++ ++; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start ++; FINEGRAIN-1-NEXT: AutoTuningCompile: SkipPasses enabled. ++; FINEGRAIN-1-NOT: Skip pass {{.*}}: False ++; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: inline ++; FINEGRAIN-INLINE: AutoTuningCompile: SkipPasses disabled. ++; FINEGRAIN-INLINE: Skip pass 'InlinerPass': False ++; FINEGRAIN-INLINE-NEXT: *** IR Dump After InlinerPass ++; FINEGRAIN-INLINE-NOT: Skip pass {{.*}}: True ++ ++; FINEGRAIN-2: AutoTuningCompile: Old decision (SkipPasses = True ) continued. ++; FINEGRAIN-2-NOT: Skip pass {{.*}}: False ++; FINEGRAIN-2: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: loop-unroll ++; FINEGRAIN-UNROLL: AutoTuningCompile: SkipPasses disabled. ++; FINEGRAIN-UNROLL-NOT: Skip pass {{.*}}: True +diff --git a/llvm/test/AutoTuning/Inline/Inputs/template.yaml b/llvm/test/AutoTuning/Inline/Inputs/template.yaml +new file mode 100644 +index 000000000000..e04612183d1f +--- /dev/null ++++ b/llvm/test/AutoTuning/Inline/Inputs/template.yaml +@@ -0,0 +1,9 @@ ++--- !AutoTuning ++Pass: inline ++Name: simpleFunction-entry ++Function: bar ++CodeRegionType: callsite ++CodeRegionHash: 5550568187071847048 ++Args: ++ - ForceInline: [force-inline] ++... +diff --git a/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml +new file mode 100644 +index 000000000000..9fc88f56d6bc +--- /dev/null ++++ b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml +@@ -0,0 +1,7 @@ ++--- !AutoTuning ++Pass: inline ++CodeRegionType: callsite ++CodeRegionHash: 5550568187071847048 ++Args: ++ - ForceInline: [force-inline] ++... +diff --git a/llvm/test/AutoTuning/Inline/duplicate-calls.ll b/llvm/test/AutoTuning/Inline/duplicate-calls.ll +new file mode 100644 +index 000000000000..ad32262ad044 +--- /dev/null ++++ b/llvm/test/AutoTuning/Inline/duplicate-calls.ll +@@ -0,0 +1,96 @@ ++; RUN: rm %t.duplicate_calls -rf ++; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.duplicate_calls \ ++; RUN: -auto-tuning-type-filter=CallSite --disable-output ++; RUN: FileCheck %s --input-file %t.duplicate_calls/duplicate-calls.ll.yaml ++ ++; ModuleID = 'duplicate-calls.c' ++source_filename = "duplicate-calls.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++; Function Attrs: nounwind uwtable ++define dso_local void @bar(i32* nocapture %result, i32* %cfb, i32 %bytes) local_unnamed_addr #0 !dbg !10 { ++entry: ++ %call = tail call i32 @test(i32* %cfb, i32 %bytes) #1, !dbg !12 ++ store i32 %call, i32* %result, align 4, !dbg !13, !tbaa !14 ++ ret void, !dbg !18 ++} ++ ++declare dso_local i32 @test(i32*, i32) local_unnamed_addr #0 ++ ++; Function Attrs: nounwind uwtable ++define dso_local void @foo(i32* %cfb, i32* readnone %saved, i32* nocapture %result, i32 %bytes) local_unnamed_addr #0 !dbg !19 { ++entry: ++ %tobool.not = icmp eq i32* %cfb, null, !dbg !20 ++ br i1 %tobool.not, label %if.else, label %if.then.split, !dbg !20 ++ ++if.then.split: ; preds = %entry ++ tail call void @bar(i32* %result, i32* nonnull %cfb, i32 %bytes), !dbg !21 ++ br label %return, !dbg !22 ++ ++if.else: ; preds = %entry ++ %tobool1.not = icmp eq i32* %saved, null, !dbg !23 ++ br i1 %tobool1.not, label %if.else.split, label %return, !dbg !23 ++ ++if.else.split: ; preds = %if.else ++ tail call void @bar(i32* %result, i32* null, i32 %bytes), !dbg !21 ++ br label %return, !dbg !23 ++ ++return: ; preds = %if.then.split, %if.else.split, %if.else ++ ret void, !dbg !24 ++} ++ ++attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { nounwind } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} ++!llvm.ident = !{!9} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "duplicate-calls.c", directory: "/home/m00629332/benchmarks/cBench/source/security_pgp_d/src") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 1, !"branch-target-enforcement", i32 0} ++!6 = !{i32 1, !"sign-return-address", i32 0} ++!7 = !{i32 1, !"sign-return-address-all", i32 0} ++!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} ++!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)"} ++!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !11, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!11 = !DISubroutineType(types: !2) ++!12 = !DILocation(line: 10, column: 16, scope: !10) ++!13 = !DILocation(line: 10, column: 14, scope: !10) ++!14 = !{!15, !15, i64 0} ++!15 = !{!"int", !16, i64 0} ++!16 = !{!"omnipotent char", !17, i64 0} ++!17 = !{!"Simple C/C++ TBAA"} ++!18 = !DILocation(line: 14, column: 1, scope: !10) ++!19 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !11, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!20 = !DILocation(line: 22, column: 6, scope: !19) ++!21 = !DILocation(line: 27, column: 2, scope: !19) ++!22 = !DILocation(line: 23, column: 3, scope: !19) ++!23 = !DILocation(line: 24, column: 11, scope: !19) ++!24 = !DILocation(line: 28, column: 1, scope: !19) ++ ++; CHECK: --- !AutoTuning ++; CHECK-NEXT: Pass: inline ++; CHECK-NEXT: Name: bar-if.then.split ++; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } ++; CHECK-NEXT: Function: foo ++; CHECK-NEXT: CodeRegionType: callsite ++; CHECK-NEXT: CodeRegionHash: ++; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } ++; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } ++; CHECK-NEXT: Invocation: 0 ++; CHECK-NEXT: ... ++; CHECK-NEXT: --- !AutoTuning ++; CHECK-NEXT: Pass: inline ++; CHECK-NEXT: Name: bar-if.else.split ++; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } ++; CHECK-NEXT: Function: foo ++; CHECK-NEXT: CodeRegionType: callsite ++; CHECK-NEXT: CodeRegionHash: ++; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } ++; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } ++; CHECK-NEXT: Invocation: 0 +diff --git a/llvm/test/AutoTuning/Inline/force-inline.ll b/llvm/test/AutoTuning/Inline/force-inline.ll +new file mode 100644 +index 000000000000..cedfc8df3483 +--- /dev/null ++++ b/llvm/test/AutoTuning/Inline/force-inline.ll +@@ -0,0 +1,84 @@ ++; REQUIRES: asserts ++; RUN: opt < %s -passes=inline -debug-only=inline -disable-output -S 2>&1 | FileCheck %s -check-prefix=DEFAULT ++; simpleFunction will be inlined with the default behavior. ++ ++; RUN: rm %t.force-inline.yaml -rf ++; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template.yaml > %t.force-inline.yaml ++; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ ++; RUN: -auto-tuning-input=%t.force-inline.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=FORCE-INLINE ++; Test with ForceInline=true; ++ ++; RUN: rm %t.force-inline.yaml -rf ++; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template_no_metadata.yaml > %t.force-inline.yaml ++; RUN: opt %s -passes=inline -S -auto-tuning-input=%t.force-inline.yaml \ ++; RUN: -debug-only=inline -disable-output -auto-tuning-omit-metadata 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=FORCE-INLINE ++; Test with ForceInline=true; ++ ++; RUN: rm %t.no-inline.yaml -rf ++; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template.yaml > %t.no-inline.yaml ++; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ ++; RUN: -auto-tuning-input=%t.no-inline.yaml 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=NO-INLINE ++; Test with ForceInline=false; ++ ++; RUN: rm %t.no-inline.yaml -rf ++; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template_no_metadata.yaml > %t.no-inline.yaml ++; RUN: opt %s -passes='cgscc(inline)' -debug-only=inline -disable-output -S \ ++; RUN: -auto-tuning-input=%t.no-inline.yaml -auto-tuning-omit-metadata 2>&1 | \ ++; RUN: FileCheck %s -check-prefix=NO-INLINE ++; Test with ForceInline=false; ++ ++@a = global i32 4 ++ ++; Function Attrs: nounwind readnone uwtable ++define i32 @simpleFunction(i32 %a) #0 { ++entry: ++ call void @extern() ++ %a1 = load volatile i32, i32* @a ++ %x1 = add i32 %a1, %a1 ++ %a2 = load volatile i32, i32* @a ++ %x2 = add i32 %x1, %a2 ++ %a3 = load volatile i32, i32* @a ++ %x3 = add i32 %x2, %a3 ++ %a4 = load volatile i32, i32* @a ++ %x4 = add i32 %x3, %a4 ++ %a5 = load volatile i32, i32* @a ++ %x5 = add i32 %x4, %a5 ++ %a6 = load volatile i32, i32* @a ++ %x6 = add i32 %x5, %a6 ++ %a7 = load volatile i32, i32* @a ++ %x7 = add i32 %x6, %a6 ++ %a8 = load volatile i32, i32* @a ++ %x8 = add i32 %x7, %a8 ++ %a9 = load volatile i32, i32* @a ++ %x9 = add i32 %x8, %a9 ++ %a10 = load volatile i32, i32* @a ++ %x10 = add i32 %x9, %a10 ++ %a11 = load volatile i32, i32* @a ++ %x11 = add i32 %x10, %a11 ++ %a12 = load volatile i32, i32* @a ++ %x12 = add i32 %x11, %a12 ++ %add = add i32 %x12, %a ++ ret i32 %add ++} ++ ++; Function Attrs: nounwind readnone uwtable ++define i32 @bar(i32 %a) #0 { ++entry: ++ %0 = tail call i32 @simpleFunction(i32 6) ++ ret i32 %0 ++} ++ ++declare void @extern() ++ ++attributes #0 = { nounwind readnone uwtable } ++attributes #1 = { nounwind cold readnone uwtable } ++ ++; DEFAULT: Inlining (cost=120, threshold=337) ++; DEFAULT-SAME: simpleFunction ++; FORCE-INLINE: Inlining (cost=always): Force inlined by auto-tuning ++; FORCE-INLINE-SAME: simpleFunction ++; NO-INLINE: NOT Inlining (cost=never): Force non-inlined by auto-tuning ++; NO-INLINE-SAME: simpleFunction +diff --git a/llvm/test/AutoTuning/Inline/inline-attribute.ll b/llvm/test/AutoTuning/Inline/inline-attribute.ll +new file mode 100644 +index 000000000000..50f583d0a51e +--- /dev/null ++++ b/llvm/test/AutoTuning/Inline/inline-attribute.ll +@@ -0,0 +1,85 @@ ++; RUN: rm %t.inline_opp -rf ++; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.inline_opp -auto-tuning-type-filter=CallSite --disable-output ++; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-1 ++; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-2 ++ ++; ModuleID = 'inline.c' ++source_filename = "inline.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++; Function Attrs: noinline norecurse nounwind readnone uwtable willreturn ++define dso_local i32 @mul(i32 %a) local_unnamed_addr #0 !dbg !10 { ++entry: ++ %mul = mul nsw i32 %a, %a, !dbg !12 ++ ret i32 %mul, !dbg !13 ++} ++ ++; Function Attrs: alwaysinline nounwind uwtable ++define dso_local i32 @add(i32 %a) local_unnamed_addr #1 !dbg !14 { ++entry: ++ %add = shl nsw i32 %a, 1, !dbg !15 ++ ret i32 %add, !dbg !16 ++} ++ ++; Function Attrs: nounwind uwtable ++define dso_local i32 @inc(i32 %a) local_unnamed_addr #2 !dbg !17 { ++entry: ++ %inc = add nsw i32 %a, 1, !dbg !18 ++ ret i32 %inc, !dbg !19 ++} ++ ++; Function Attrs: nounwind uwtable ++define dso_local i32 @func(i32 %a) local_unnamed_addr #2 !dbg !20 { ++entry: ++ %call = call i32 @add(i32 %a), !dbg !21 ++ %call1 = call i32 @mul(i32 %a), !dbg !22 ++ %add = add nsw i32 %call, %call1, !dbg !23 ++ %call2 = call i32 @inc(i32 %a), !dbg !24 ++ %add3 = add nsw i32 %add, %call2, !dbg !25 ++ ret i32 %add3, !dbg !26 ++} ++ ++attributes #0 = { noinline norecurse nounwind readnone uwtable willreturn "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} ++!llvm.ident = !{!9} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner/ir-hashing") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 1, !"branch-target-enforcement", i32 0} ++!6 = !{i32 1, !"sign-return-address", i32 0} ++!7 = !{i32 1, !"sign-return-address-all", i32 0} ++!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} ++!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)"} ++!10 = distinct !DISubprogram(name: "mul", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!11 = !DISubroutineType(types: !2) ++!12 = !DILocation(line: 3, column: 13, scope: !10) ++!13 = !DILocation(line: 3, column: 5, scope: !10) ++!14 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!15 = !DILocation(line: 8, column: 13, scope: !14) ++!16 = !DILocation(line: 8, column: 5, scope: !14) ++!17 = distinct !DISubprogram(name: "inc", scope: !1, file: !1, line: 11, type: !11, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!18 = !DILocation(line: 12, column: 12, scope: !17) ++!19 = !DILocation(line: 12, column: 5, scope: !17) ++!20 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 15, type: !11, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!21 = !DILocation(line: 16, column: 12, scope: !20) ++!22 = !DILocation(line: 16, column: 19, scope: !20) ++!23 = !DILocation(line: 16, column: 18, scope: !20) ++!24 = !DILocation(line: 16, column: 26, scope: !20) ++!25 = !DILocation(line: 16, column: 25, scope: !20) ++!26 = !DILocation(line: 16, column: 5, scope: !20) ++ ++; TEST-1: Pass: inline ++; TEST-1-NOT: Pass: inline ++ ++; TEST-2: Name: inc ++; TEST-2-NEXT: DebugLoc: { File: test.c, Line: 16, Column: 26 } ++; TEST-2-NEXT: Function: func ++; TEST-2-NEXT: CodeRegionType: callsite +diff --git a/llvm/test/AutoTuning/Inline/opp.ll b/llvm/test/AutoTuning/Inline/opp.ll +new file mode 100644 +index 000000000000..dfe1dac29476 +--- /dev/null ++++ b/llvm/test/AutoTuning/Inline/opp.ll +@@ -0,0 +1,64 @@ ++; RUN: rm %t.callsite_opp -rf ++; RUN: sed 's#\[number\]#25#g; s#\[func_name\]#ColdFunction#g' %S/Inputs/template.yaml > %t.template25.yaml ++; RUN: opt %s -passes=inline -S -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite ++ ++; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=CALLSITE ++ ++@a = global i32 4 ++ ++declare void @extern() ++; Function Attrs: nounwind readnone uwtable ++define i32 @simpleFunction(i32 %a) #1 { ++entry: ++ call void @extern() ++ %a1 = load volatile i32, i32* @a ++ %x1 = add i32 %a1, %a1 ++ %a2 = load volatile i32, i32* @a ++ %x2 = add i32 %x1, %a2 ++ %a3 = load volatile i32, i32* @a ++ %x3 = add i32 %x2, %a3 ++ %a4 = load volatile i32, i32* @a ++ %x4 = add i32 %x3, %a4 ++ %a5 = load volatile i32, i32* @a ++ %x5 = add i32 %x4, %a5 ++ %a6 = load volatile i32, i32* @a ++ %x6 = add i32 %x5, %a6 ++ %a7 = load volatile i32, i32* @a ++ %x7 = add i32 %x6, %a6 ++ %a8 = load volatile i32, i32* @a ++ %x8 = add i32 %x7, %a8 ++ %a9 = load volatile i32, i32* @a ++ %x9 = add i32 %x8, %a9 ++ %a10 = load volatile i32, i32* @a ++ %x10 = add i32 %x9, %a10 ++ %a11 = load volatile i32, i32* @a ++ %x11 = add i32 %x10, %a11 ++ %a12 = load volatile i32, i32* @a ++ %x12 = add i32 %x11, %a12 ++ %add = add i32 %x12, %a ++ ret i32 %add ++} ++ ++define i32 @bar(i32 %a) #0 { ++entry: ++ %0 = tail call i32 @simpleFunction(i32 6) ++ ret i32 %0 ++} ++ ++attributes #0 = { nounwind readnone uwtable } ++attributes #1 = { nounwind cold readnone uwtable } ++ ++; Check if code regions are properly generated as tuning opportunities. ++; CALLSITE: --- !AutoTuning ++; CALLSITE-NEXT: Pass: inline ++; CALLSITE-NEXT: Name: simpleFunction ++; CALLSITE-NEXT: Function: bar ++; CALLSITE-NEXT: CodeRegionType: callsite ++; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} ++; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } ++; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } ++; CALLSITE-NEXT: Invocation: 0 ++; CALLSITE-NEXT: ... ++ ++; Check if external functions are filtered out. ++; EXTERNAL-NOT: Name: extern +diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml +new file mode 100644 +index 000000000000..6dc49a1f7dc2 +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml +@@ -0,0 +1,10 @@ ++--- !AutoTuning ++Pass: loop-unroll ++Name: for.cond ++DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } ++Function: foo ++CodeRegionType: loop ++Args: ++ - UnrollCount: [number] ++Invocation: 0 ++... +diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml +new file mode 100644 +index 000000000000..4920329dbd4b +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml +@@ -0,0 +1,10 @@ ++# CodeRegionHash is correct for only first code region only. ++!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, ++ DebugLoc: {Column: 8, File: loop-nest.c, Line: 10}, Function: loop_nest, Invocation: 0, ++ Name: for.body6.us, Pass: loop-unroll} ++--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, ++ DebugLoc: {Column: 5, File: loop-nest.c, Line: 9}, Function: loop_nest, Invocation: 0, ++ Name: for.cond4.preheader.us, Pass: loop-unroll} ++--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, ++ DebugLoc: {Column: 3, File: loop-nest.c, Line: 8}, Function: loop_nest, Invocation: 0, ++ Name: for.cond1.preheader, Pass: loop-unroll} +diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml +new file mode 100644 +index 000000000000..a90cebbce88f +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml +@@ -0,0 +1,9 @@ ++--- !AutoTuning ++Pass: loop-unroll ++Name: loop ++Function: invariant_backedge_1 ++CodeRegionType: loop ++Args: ++ - UnrollCount: [number] ++Invocation: 0 ++... +diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml +new file mode 100644 +index 000000000000..18681a0e2efe +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml +@@ -0,0 +1,10 @@ ++--- !AutoTuning ++Pass: loop-unroll ++Name: label %5 ++Function: main ++CodeRegionType: loop ++CodeRegionHash: [hash] ++Args: ++- UnrollCount: [number] ++Invocation: 1 ++... +diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml +new file mode 100644 +index 000000000000..166f877a232e +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml +@@ -0,0 +1,10 @@ ++--- !AutoTuning ++Pass: loop-unroll ++Name: [name] ++Function: foo ++CodeRegionType: loop ++CodeRegionHash: [hash] ++Args: ++ - UnrollCount: [number] ++Invocation: 1 ++... +diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml +new file mode 100644 +index 000000000000..b626473cf782 +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml +@@ -0,0 +1,8 @@ ++--- !AutoTuning ++Pass: loop-unroll ++CodeRegionType: loop ++CodeRegionHash: [hash] ++Args: ++ - UnrollCount: [number] ++Invocation: 1 ++... +diff --git a/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll +new file mode 100644 +index 000000000000..85dd690d01c5 +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll +@@ -0,0 +1,161 @@ ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' | \ ++; RUN: FileCheck %s -check-prefix=DISABLE ++ ++; RUN: rm %t.unroll_debug_loc0.yaml -rf ++; RUN: sed 's#\[number\]#0#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc0.yaml ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%t.unroll_debug_loc0.yaml | \ ++; RUN: FileCheck %s -check-prefix=UNROLL0 ++ ++; RUN: rm %t.unroll_debug_loc4.yaml -rf ++; RUN: sed 's#\[number\]#4#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc4.yaml ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-code-region-matching-hash=false \ ++; RUN: -auto-tuning-input=%t.unroll_debug_loc4.yaml | \ ++; RUN: FileCheck %s -check-prefix=UNROLL4 ++ ++; RUN: rm %t.unroll4.yaml -rf ++; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.cond#g; s#\[hash\]#11552168367013316892#g;'\ ++; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-code-region-matching-hash=false \ ++; RUN: -auto-tuning-input=%t.unroll4.yaml | \ ++; RUN: FileCheck %s -check-prefix=UNROLL4-MISMATCH ++ ++; UNSUPPORTED: windows ++ ++; ModuleID = 'loop-opp.c' ++source_filename = "loop-opp.c" ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-unknown-linux-gnu" ++ ++; Function Attrs: noinline nounwind uwtable ++define i32 @foo(i32* %n) #0 !dbg !6 { ++entry: ++ %n.addr = alloca i32*, align 8 ++ %b = alloca i32, align 4 ++ %i = alloca i32, align 4 ++ store i32* %n, i32** %n.addr, align 8 ++ call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 ++ call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 ++ store i32 0, i32* %b, align 4, !dbg !15 ++ call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 ++ store i32 0, i32* %i, align 4, !dbg !18 ++ br label %for.cond, !dbg !19 ++ ++for.cond: ; preds = %for.inc, %entry ++ %0 = load i32, i32* %i, align 4, !dbg !20 ++ %1 = load i32*, i32** %n.addr, align 8, !dbg !23 ++ %2 = load i32, i32* %1, align 4, !dbg !24 ++ %cmp = icmp slt i32 %0, %2, !dbg !25 ++ br i1 %cmp, label %for.body, label %for.end, !dbg !26 ++ ++for.body: ; preds = %for.cond ++ %3 = load i32, i32* %b, align 4, !dbg !28 ++ %add = add nsw i32 %3, 1, !dbg !30 ++ store i32 %add, i32* %b, align 4, !dbg !31 ++ br label %for.inc, !dbg !32 ++ ++for.inc: ; preds = %for.body ++ %4 = load i32, i32* %i, align 4, !dbg !33 ++ %inc = add nsw i32 %4, 1, !dbg !33 ++ store i32 %inc, i32* %i, align 4, !dbg !33 ++ br label %for.cond, !dbg !35, !llvm.loop !36 ++ ++for.end: ; preds = %for.cond ++ %5 = load i32, i32* %b, align 4, !dbg !39 ++ ret i32 %5, !dbg !40 ++} ++ ++; Function Attrs: nounwind readnone ++declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 ++ ++attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { nounwind readnone } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4} ++!llvm.ident = !{!5} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) ++!1 = !DIFile(filename: "loop-opp.c", directory: "") ++!2 = !{} ++!3 = !{i32 2, !"Dwarf Version", i32 4} ++!4 = !{i32 2, !"Debug Info Version", i32 3} ++!5 = !{!""} ++!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) ++!7 = !DISubroutineType(types: !8) ++!8 = !{!9, !10} ++!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) ++!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) ++!12 = !DIExpression() ++!13 = !DILocation(line: 1, column: 20, scope: !6) ++!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) ++!15 = !DILocation(line: 3, column: 9, scope: !6) ++!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) ++!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) ++!18 = !DILocation(line: 4, column: 14, scope: !17) ++!19 = !DILocation(line: 4, column: 10, scope: !17) ++!20 = !DILocation(line: 4, column: 20, scope: !21) ++!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) ++!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) ++!23 = !DILocation(line: 4, column: 25, scope: !21) ++!24 = !DILocation(line: 4, column: 24, scope: !21) ++!25 = !DILocation(line: 4, column: 22, scope: !21) ++!26 = !DILocation(line: 4, column: 5, scope: !27) ++!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) ++!28 = !DILocation(line: 6, column: 11, scope: !29) ++!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) ++!30 = !DILocation(line: 6, column: 12, scope: !29) ++!31 = !DILocation(line: 6, column: 9, scope: !29) ++!32 = !DILocation(line: 7, column: 5, scope: !29) ++!33 = !DILocation(line: 4, column: 28, scope: !34) ++!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) ++!35 = !DILocation(line: 4, column: 5, scope: !34) ++!36 = distinct !{!36, !37, !38} ++!37 = !DILocation(line: 4, column: 5, scope: !17) ++!38 = !DILocation(line: 7, column: 5, scope: !17) ++!39 = !DILocation(line: 8, column: 12, scope: !6) ++!40 = !DILocation(line: 8, column: 5, scope: !6) ++ ++; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled when ++; the input remark contains DebugLoc info. ++; ++; DISABLE-LABEL: @foo( ++; DISABLE: for.cond ++; DISABLE: for.body ++; DISABLE-NOT: for.body.1 ++; DISABLE: for.inc ++; DISABLE-NOT: llvm.loop.unroll.disable ++ ++; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled ++; when unroll count explicitly set to be 0. ++; ++; UNROLL0-LABEL: @foo( ++; UNROLL0: for.cond ++; UNROLL0: for.body ++; UNROLL0-NOT: for.body.1 ++; UNROLL0: for.inc ++; UNROLL0-NOT: llvm.loop.unroll.disable ++ ++; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 ++; when explicitly requested. ++; ++; UNROLL4-LABEL: @foo( ++; UNROLL4: for.cond ++; UNROLL4: for.body ++; UNROLL4: for.body.1 ++; UNROLL4: for.body.2 ++; UNROLL4: for.body.3 ++; UNROLL4: llvm.loop.unroll.disable ++ ++; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled ++; when DebugLoc is missing in the input remark. ++; ++; UNROLL4-MISMATCH-LABEL: @foo( ++; UNROLL4-MISMATCH: for.cond ++; UNROLL4-MISMATCH: for.body ++; UNROLL4-MISMATCH-NOT: for.body.1 ++; UNROLL4-MISMATCH: for.inc ++; UNROLL4-MISMATCH-NOT: llvm.loop.unroll.disable +diff --git a/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll +new file mode 100644 +index 000000000000..414c6ff2d1b0 +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll +@@ -0,0 +1,56 @@ ++; RUN: rm %t.default_opp -rf ++; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-type-filter=Loop \ ++; RUN: -passes='require,loop(loop-unroll-full)' --disable-output ++; RUN: FileCheck %s --input-file %t.default_opp/dynamic_config.ll.yaml ++ ++; Function Attrs: nofree norecurse nounwind uwtable ++define dso_local void @transform(i64* nocapture %W) local_unnamed_addr{ ++entry: ++ br label %for.body ++ ++for.body: ; preds = %entry, %for.body ++ %i.037 = phi i32 [ 16, %entry ], [ %inc, %for.body ] ++ %sub = add nsw i32 %i.037, -3 ++ %idxprom = sext i32 %sub to i64 ++ %arrayidx = getelementptr inbounds i64, i64* %W, i64 %idxprom ++ %0 = load i64, i64* %arrayidx, align 8 ++ %sub1 = add nsw i32 %i.037, -6 ++ %idxprom2 = sext i32 %sub1 to i64 ++ %arrayidx3 = getelementptr inbounds i64, i64* %W, i64 %idxprom2 ++ %1 = load i64, i64* %arrayidx3, align 8 ++ %xor = xor i64 %1, %0 ++ %idxprom4 = zext i32 %i.037 to i64 ++ %arrayidx5 = getelementptr inbounds i64, i64* %W, i64 %idxprom4 ++ store i64 %xor, i64* %arrayidx5, align 8 ++ %inc = add nuw nsw i32 %i.037, 1 ++ %cmp = icmp ult i32 %i.037, 79 ++ br i1 %cmp, label %for.body, label %for.body8.preheader ++ ++for.body8.preheader: ; preds = %for.body ++ br label %for.body8 ++ ++for.body8: ; preds = %for.body8.preheader, %for.body8 ++ %indvars.iv = phi i64 [ 80, %for.body8.preheader ], [ %indvars.iv.next, %for.body8 ] ++ %2 = add nsw i64 %indvars.iv, -4 ++ %arrayidx11 = getelementptr inbounds i64, i64* %W, i64 %2 ++ %3 = load i64, i64* %arrayidx11, align 8 ++ %4 = add nsw i64 %indvars.iv, -5 ++ %arrayidx14 = getelementptr inbounds i64, i64* %W, i64 %4 ++ %5 = load i64, i64* %arrayidx14, align 8 ++ %xor15 = xor i64 %5, %3 ++ %arrayidx17 = getelementptr inbounds i64, i64* %W, i64 %indvars.iv ++ store i64 %xor15, i64* %arrayidx17, align 8 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp ne i64 %indvars.iv.next, 256 ++ br i1 %exitcond, label %for.body8, label %for.end20 ++ ++for.end20: ; preds = %for.body8 ++ ret void ++} ++ ++; CHECK: --- !AutoTuning ++; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] ++; CHECK: ... ++; CHECK-NEXT: --- !AutoTuning ++; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] ++; CHECK: ... +diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll +new file mode 100644 +index 000000000000..7f3e27ca057a +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll +@@ -0,0 +1,136 @@ ++; REQUIRES: asserts ++; CodeRegionHash matches for the first code region only. AutoTuner will find ++; match for one code region when hash matching is enabled. AutoTuner will find ++; match for all three code regions when hash matching is disabl3ed. ++ ++; RUN: rm -rf %t.loop_nest.txt ++; RUN: opt %s -passes='require,loop(loop-unroll-full)' \ ++; RUN: -debug-only=autotuning -auto-tuning-input=%S/Inputs/loop_nest.yaml \ ++; RUN: --disable-output &> %t.loop_nest.txt ++; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ ++; RUN: FileCheck %s -check-prefix=HASH_MATCHING_ENABLED ++ ++; RUN: rm -rf %t.loop_nest.txt ++; RUN: opt %s -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-input=%S/Inputs/loop_nest.yaml -debug-only=autotuning \ ++; RUN: -auto-tuning-code-region-matching-hash=false --disable-output &> %t.loop_nest.txt ++; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ ++; RUN: FileCheck %s -check-prefix=HASH_MATCHING_DISABLED ++ ++; ModuleID = 'loop-nest.c' ++source_filename = "loop-nest.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++; Function Attrs: nofree norecurse nounwind uwtable ++define dso_local void @loop_nest(i32 %ni, i32 %nj, i32 %nk, i32 %alpha, i32 %beta, i32** nocapture readonly %A, i32** nocapture readonly %B, i32** nocapture readonly %C) local_unnamed_addr #0 !dbg !10 { ++entry: ++ %cmp41 = icmp sgt i32 %ni, 0, !dbg !12 ++ br i1 %cmp41, label %for.cond1.preheader.lr.ph, label %for.end23, !dbg !13 ++ ++for.cond1.preheader.lr.ph: ; preds = %entry ++ %cmp238 = icmp slt i32 %nk, 1 ++ %cmp536 = icmp slt i32 %nj, 1 ++ %wide.trip.count51 = zext i32 %ni to i64, !dbg !12 ++ %wide.trip.count47 = zext i32 %nk to i64 ++ %wide.trip.count = zext i32 %nj to i64 ++ %brmerge = or i1 %cmp238, %cmp536 ++ br label %for.cond1.preheader, !dbg !13 ++ ++for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc21 ++ %indvars.iv49 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next50, %for.inc21 ] ++ br i1 %brmerge, label %for.inc21, label %for.cond4.preheader.us.preheader, !dbg !14 ++ ++for.cond4.preheader.us.preheader: ; preds = %for.cond1.preheader ++ %arrayidx15 = getelementptr inbounds i32*, i32** %C, i64 %indvars.iv49 ++ %arrayidx = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv49 ++ %.pre = load i32*, i32** %arrayidx, align 8, !tbaa !15 ++ %.pre53 = load i32*, i32** %arrayidx15, align 8, !tbaa !15 ++ br label %for.cond4.preheader.us, !dbg !14 ++ ++for.cond4.preheader.us: ; preds = %for.cond4.preheader.us.preheader, %for.cond4.for.inc18_crit_edge.us ++ %indvars.iv45 = phi i64 [ 0, %for.cond4.preheader.us.preheader ], [ %indvars.iv.next46, %for.cond4.for.inc18_crit_edge.us ] ++ %arrayidx8.us = getelementptr inbounds i32, i32* %.pre, i64 %indvars.iv45 ++ %arrayidx10.us = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv45 ++ %0 = load i32*, i32** %arrayidx10.us, align 8, !tbaa !15 ++ br label %for.body6.us, !dbg !19 ++ ++for.body6.us: ; preds = %for.cond4.preheader.us, %for.body6.us ++ %indvars.iv = phi i64 [ 0, %for.cond4.preheader.us ], [ %indvars.iv.next, %for.body6.us ] ++ %1 = load i32, i32* %arrayidx8.us, align 4, !dbg !20, !tbaa !21 ++ %mul.us = mul nsw i32 %1, %alpha, !dbg !23 ++ %arrayidx12.us = getelementptr inbounds i32, i32* %0, i64 %indvars.iv, !dbg !24 ++ %2 = load i32, i32* %arrayidx12.us, align 4, !dbg !24, !tbaa !21 ++ %mul13.us = mul nsw i32 %mul.us, %2, !dbg !25 ++ %arrayidx17.us = getelementptr inbounds i32, i32* %.pre53, i64 %indvars.iv, !dbg !26 ++ %3 = load i32, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 ++ %add.us = add nsw i32 %3, %mul13.us, !dbg !27 ++ store i32 %add.us, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28 ++ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !29 ++ br i1 %exitcond.not, label %for.cond4.for.inc18_crit_edge.us, label %for.body6.us, !dbg !19, !llvm.loop !30 ++ ++for.cond4.for.inc18_crit_edge.us: ; preds = %for.body6.us ++ %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1, !dbg !33 ++ %exitcond48.not = icmp eq i64 %indvars.iv.next46, %wide.trip.count47, !dbg !34 ++ br i1 %exitcond48.not, label %for.inc21, label %for.cond4.preheader.us, !dbg !14, !llvm.loop !35 ++ ++for.inc21: ; preds = %for.cond4.for.inc18_crit_edge.us, %for.cond1.preheader ++ %indvars.iv.next50 = add nuw nsw i64 %indvars.iv49, 1, !dbg !37 ++ %exitcond52.not = icmp eq i64 %indvars.iv.next50, %wide.trip.count51, !dbg !12 ++ br i1 %exitcond52.not, label %for.end23, label %for.cond1.preheader, !dbg !13, !llvm.loop !38 ++ ++for.end23: ; preds = %for.inc21, %entry ++ ret void, !dbg !40 ++} ++ ++attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} ++!llvm.ident = !{!9} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "loop-nest.c", directory: "/home/m00629332/code/autoTuner") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 1, !"branch-target-enforcement", i32 0} ++!6 = !{i32 1, !"sign-return-address", i32 0} ++!7 = !{i32 1, !"sign-return-address-all", i32 0} ++!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} ++!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)"} ++!10 = distinct !DISubprogram(name: "loop_nest", scope: !1, file: !1, line: 1, type: !11, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!11 = !DISubroutineType(types: !2) ++!12 = !DILocation(line: 8, column: 17, scope: !10) ++!13 = !DILocation(line: 8, column: 3, scope: !10) ++!14 = !DILocation(line: 9, column: 5, scope: !10) ++!15 = !{!16, !16, i64 0} ++!16 = !{!"any pointer", !17, i64 0} ++!17 = !{!"omnipotent char", !18, i64 0} ++!18 = !{!"Simple C/C++ TBAA"} ++!19 = !DILocation(line: 10, column: 8, scope: !10) ++!20 = !DILocation(line: 11, column: 23, scope: !10) ++!21 = !{!22, !22, i64 0} ++!22 = !{!"int", !17, i64 0} ++!23 = !DILocation(line: 11, column: 21, scope: !10) ++!24 = !DILocation(line: 11, column: 33, scope: !10) ++!25 = !DILocation(line: 11, column: 31, scope: !10) ++!26 = !DILocation(line: 11, column: 4, scope: !10) ++!27 = !DILocation(line: 11, column: 12, scope: !10) ++!28 = !DILocation(line: 10, column: 29, scope: !10) ++!29 = !DILocation(line: 10, column: 22, scope: !10) ++!30 = distinct !{!30, !19, !31, !32} ++!31 = !DILocation(line: 11, column: 39, scope: !10) ++!32 = !{!"llvm.loop.mustprogress"} ++!33 = !DILocation(line: 9, column: 26, scope: !10) ++!34 = !DILocation(line: 9, column: 19, scope: !10) ++!35 = distinct !{!35, !14, !36, !32} ++!36 = !DILocation(line: 12, column: 5, scope: !10) ++!37 = !DILocation(line: 8, column: 24, scope: !10) ++!38 = distinct !{!38, !13, !39, !32} ++!39 = !DILocation(line: 13, column: 3, scope: !10) ++!40 = !DILocation(line: 15, column: 1, scope: !10) ++ ++; HASH_MATCHING_ENABLED: 1 ++; HASH_MATCHING_DISABLED: 3 +diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll +new file mode 100644 +index 000000000000..f3839a49b20e +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll +@@ -0,0 +1,53 @@ ++; NOTE: This file is used to test when UnrollCount = 1 and when the compiler ++; sees that Loop Peeling is beneficial and possible, then we do Loop Peeling. ++; RUN: rm %t.unroll1.yaml -rf ++; RUN: sed 's#\[number\]#1#g;' %S/Inputs/loop_peel.yaml > %t.unroll1.yaml ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-code-region-matching-hash=false \ ++; RUN: -auto-tuning-input=%t.unroll1.yaml | FileCheck %s ++ ++; RUN: rm %t.unroll0.yaml -rf ++; RUN: sed 's#\[number\]#0#g;' %S/Inputs/loop_peel.yaml > %t.unroll0.yaml ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-code-region-matching-hash=false \ ++; RUN: -auto-tuning-input=%t.unroll0.yaml | FileCheck %s --check-prefix=DISABLE ++ ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ ++; RUN: -auto-tuning-code-region-matching-hash=false \ ++; RUN: -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop --disable-output ++; RUN: FileCheck %s --input-file %t.unroll_opp/loop_peel.ll.yaml -check-prefix=TEST-1 ++ ++define i32 @invariant_backedge_1(i32 %a, i32 %b) { ++; CHECK-LABEL: @invariant_backedge_1 ++; CHECK-NOT: %plus = phi ++; CHECK: loop.peel: ++; CHECK: loop: ++; CHECK: %i = phi ++; CHECK: %sum = phi ++; DISABLE-LABEL: @invariant_backedge_1 ++; DISABLE-NOT: loop.peel: ++entry: ++ br label %loop ++ ++loop: ++ %i = phi i32 [ 0, %entry ], [ %inc, %loop ] ++ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] ++ %plus = phi i32 [ %a, %entry ], [ %b, %loop ] ++ ++ %incsum = add i32 %sum, %plus ++ %inc = add i32 %i, 1 ++ %cmp = icmp slt i32 %i, 1000 ++ ++ br i1 %cmp, label %loop, label %exit ++ ++exit: ++ ret i32 %sum ++} ++ ++; Check for dynamic values when UnrollCount is set to 1: ++; TEST-1: Pass: loop-unroll ++; TEST-1-NEXT: Name: loop ++; TEST-1-NEXT: Function: invariant_backedge_1 ++; TEST-1-NEXT: CodeRegionType: loop ++; TEST-1-NEXT: CodeRegionHash: {{[0-9]+}} ++; TEST-1-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 2 ] } +diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll +new file mode 100644 +index 000000000000..843b8e28f3d8 +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll +@@ -0,0 +1,129 @@ ++; RUN: rm %t.unroll_opp -rf ++; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ ++; RUN: -passes='require,loop(loop-unroll-full)' --disable-output ++; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 ++; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 ++ ++; RUN: rm %t.unroll_opp -rf ++; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ ++; RUN: -passes='require,function(loop-unroll)' --disable-output ++; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 ++; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 ++ ++; This function contains two loops. loop for.body is defined with a pragma ++; unroll_count(4) and loop for.body9 is without a pragama. AutoTuner will only ++; consider for.body9 as a tuning opportunity. ++ ++; ModuleID = 'loop-unroll.c' ++source_filename = "loop-unroll.c" ++target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ++target triple = "aarch64-unknown-linux-gnu" ++ ++; Function Attrs: nofree norecurse nounwind uwtable ++define dso_local void @loop(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32* noalias nocapture %d, i32 %len) local_unnamed_addr #0 !dbg !10 { ++entry: ++ %cmp34 = icmp slt i32 0, %len, !dbg !12 ++ br i1 %cmp34, label %for.body.lr.ph, label %for.cond6.preheader, !dbg !13 ++ ++for.body.lr.ph: ; preds = %entry ++ br label %for.body, !dbg !13 ++ ++for.cond.for.cond6.preheader_crit_edge: ; preds = %for.body ++ br label %for.cond6.preheader, !dbg !13 ++ ++for.cond6.preheader: ; preds = %for.cond.for.cond6.preheader_crit_edge, %entry ++ %cmp732 = icmp slt i32 0, %len, !dbg !14 ++ br i1 %cmp732, label %for.body9.lr.ph, label %for.cond.cleanup8, !dbg !15 ++ ++for.body9.lr.ph: ; preds = %for.cond6.preheader ++ br label %for.body9, !dbg !15 ++ ++for.body: ; preds = %for.body.lr.ph, %for.body ++ %i.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] ++ %idxprom = zext i32 %i.035 to i64, !dbg !16 ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom, !dbg !16 ++ %0 = load i32, i32* %arrayidx, align 4, !dbg !16, !tbaa !17 ++ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom, !dbg !21 ++ %1 = load i32, i32* %arrayidx2, align 4, !dbg !21, !tbaa !17 ++ %add = add nsw i32 %1, %0, !dbg !22 ++ %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %idxprom, !dbg !23 ++ store i32 %add, i32* %arrayidx4, align 4, !dbg !24, !tbaa !17 ++ %inc = add nuw nsw i32 %i.035, 1, !dbg !25 ++ %cmp = icmp slt i32 %inc, %len, !dbg !12 ++ br i1 %cmp, label %for.body, label %for.cond.for.cond6.preheader_crit_edge, !dbg !13, !llvm.loop !26 ++ ++for.cond6.for.cond.cleanup8_crit_edge: ; preds = %for.body9 ++ br label %for.cond.cleanup8, !dbg !15 ++ ++for.cond.cleanup8: ; preds = %for.cond6.for.cond.cleanup8_crit_edge, %for.cond6.preheader ++ ret void, !dbg !30 ++ ++for.body9: ; preds = %for.body9.lr.ph, %for.body9 ++ %i5.033 = phi i32 [ 0, %for.body9.lr.ph ], [ %inc17, %for.body9 ] ++ %idxprom10 = zext i32 %i5.033 to i64, !dbg !31 ++ %arrayidx11 = getelementptr inbounds i32, i32* %a, i64 %idxprom10, !dbg !31 ++ %2 = load i32, i32* %arrayidx11, align 4, !dbg !31, !tbaa !17 ++ %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 %idxprom10, !dbg !32 ++ %3 = load i32, i32* %arrayidx13, align 4, !dbg !32, !tbaa !17 ++ %mul = mul nsw i32 %3, %2, !dbg !33 ++ %arrayidx15 = getelementptr inbounds i32, i32* %d, i64 %idxprom10, !dbg !34 ++ store i32 %mul, i32* %arrayidx15, align 4, !dbg !35, !tbaa !17 ++ %inc17 = add nuw nsw i32 %i5.033, 1, !dbg !36 ++ %cmp7 = icmp slt i32 %inc17, %len, !dbg !14 ++ br i1 %cmp7, label %for.body9, label %for.cond6.for.cond.cleanup8_crit_edge, !dbg !15, !llvm.loop !37 ++} ++ ++attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} ++!llvm.ident = !{!9} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "loop-unroll.c", directory: "/home/AutoTuner/") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 1, !"branch-target-enforcement", i32 0} ++!6 = !{i32 1, !"sign-return-address", i32 0} ++!7 = !{i32 1, !"sign-return-address-all", i32 0} ++!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} ++!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)"} ++!10 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ++!11 = !DISubroutineType(types: !2) ++!12 = !DILocation(line: 3, column: 20, scope: !10) ++!13 = !DILocation(line: 3, column: 5, scope: !10) ++!14 = !DILocation(line: 7, column: 20, scope: !10) ++!15 = !DILocation(line: 7, column: 5, scope: !10) ++!16 = !DILocation(line: 4, column: 16, scope: !10) ++!17 = !{!18, !18, i64 0} ++!18 = !{!"int", !19, i64 0} ++!19 = !{!"omnipotent char", !20, i64 0} ++!20 = !{!"Simple C/C++ TBAA"} ++!21 = !DILocation(line: 4, column: 23, scope: !10) ++!22 = !DILocation(line: 4, column: 21, scope: !10) ++!23 = !DILocation(line: 4, column: 9, scope: !10) ++!24 = !DILocation(line: 4, column: 14, scope: !10) ++!25 = !DILocation(line: 3, column: 28, scope: !10) ++!26 = distinct !{!26, !13, !27, !28, !29} ++!27 = !DILocation(line: 5, column: 5, scope: !10) ++!28 = !{!"llvm.loop.mustprogress"} ++!29 = !{!"llvm.loop.unroll.count", i32 4} ++!30 = !DILocation(line: 10, column: 1, scope: !10) ++!31 = !DILocation(line: 8, column: 16, scope: !10) ++!32 = !DILocation(line: 8, column: 23, scope: !10) ++!33 = !DILocation(line: 8, column: 21, scope: !10) ++!34 = !DILocation(line: 8, column: 9, scope: !10) ++!35 = !DILocation(line: 8, column: 14, scope: !10) ++!36 = !DILocation(line: 7, column: 28, scope: !10) ++!37 = distinct !{!37, !15, !38, !28} ++!38 = !DILocation(line: 9, column: 5, scope: !10) ++ ++ ++; TEST-1: Pass: loop-unroll ++; TEST-1-NOT: Pass: loop-unroll ++ ++; TEST-2: Name: for.body9 ++; TEST-2-NEXT: DebugLoc: { File: loop-unroll.c, Line: 7, Column: 5 } ++; TEST-2-NEXT: Function: loop ++; TEST-2-NEXT: CodeRegionType: loop +diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll.ll b/llvm/test/AutoTuning/LoopUnroll/unroll.ll +new file mode 100644 +index 000000000000..ba5c89fffaff +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/unroll.ll +@@ -0,0 +1,101 @@ ++; RUN: opt %s -S -passes=loop-unroll | FileCheck %s -check-prefix=DISABLE ++ ++; RUN: rm %t.unroll0.yaml -rf ++; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ ++; RUN: %S/Inputs/unroll_template.yaml > %t.unroll0.yaml ++; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ ++; RUN: -auto-tuning-code-region-matching-hash=false | \ ++; RUN: FileCheck %s -check-prefix=UNROLL0 ++ ++; RUN: rm %t.unroll0.yaml -rf ++; RUN: sed 's#\[number\]#0#g; s#\[hash\]#14791762861362113823#g' \ ++; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll0.yaml ++; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ ++; RUN: -auto-tuning-omit-metadata | \ ++; RUN: FileCheck %s -check-prefix=UNROLL0 ++ ++; RUN: rm %t.result1 %t.unroll1.yaml -rf ++; RUN: sed 's#\[number\]#1#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ ++; RUN: %S/Inputs/unroll_template.yaml > %t.unroll1.yaml ++; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml | \ ++; RUN: FileCheck %s -check-prefix=UNROLL1 ++ ++; RUN: rm %t.result1 %t.unroll1.yaml -rf ++; RUN: sed 's#\[number\]#1#g; s#\[hash\]#14791762861362113823#g' \ ++; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll1.yaml ++; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml \ ++; RUN: -auto-tuning-omit-metadata | \ ++; RUN: FileCheck %s -check-prefix=UNROLL1 ++ ++; RUN: rm %t.result4 %t.unroll4.yaml -rf ++; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ ++; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml ++; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml | \ ++; RUN: FileCheck %s -check-prefix=UNROLL4 ++ ++; RUN: rm %t.result4 %t.unroll4.yaml -rf ++; RUN: sed 's#\[number\]#4#g; s#\[hash\]#14791762861362113823#g' \ ++; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll4.yaml ++; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml \ ++; RUN: -auto-tuning-omit-metadata | \ ++; RUN: FileCheck %s -check-prefix=UNROLL4 ++ ++; UNSUPPORTED: windows ++ ++define void @foo(i32* nocapture %a) { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv ++ %0 = load i32, i32* %arrayidx, align 4 ++ %inc = add nsw i32 %0, 1 ++ store i32 %inc, i32* %arrayidx, align 4 ++ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ++ %exitcond = icmp eq i64 %indvars.iv.next, 64 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret void ++} ++ ++; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled ++; ++; DISABLE-LABEL: @foo( ++; DISABLE: store i32 ++; DISABLE-NOT: store i32 ++; DISABLE: br i1 ++; DISABLE-NOT: llvm.loop.unroll.disable ++ ++ ++; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled ++; when unroll count explicitly set to be 0. ++; ++; UNROLL0-LABEL: @foo( ++; UNROLL0: store i32 ++; UNROLL0-NOT: store i32 ++; UNROLL0: br i1 ++; UNROLL0-NOT: llvm.loop.unroll.disable ++ ++ ++; Auto-tuning-enabled loop unrolling - Requesting UnrollCount = 1 will perform ++; Loop Peeling, and if Loop Peeling isn't possible/beneficial then Unroll Count ++; is unchanged. ++; ++; UNROLL1-LABEL: @foo( ++; UNROLL1: store i32 ++; UNROLL1-NOT: store i32 ++; UNROLL1: br i1 ++; UNROLL1: llvm.loop.unroll.disable ++ ++; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 ++; when explicitly requested. ++; ++; UNROLL4-LABEL: @foo( ++; UNROLL4: store i32 ++; UNROLL4: store i32 ++; UNROLL4: store i32 ++; UNROLL4: store i32 ++; UNROLL4: br i1 ++; UNROLL4: llvm.loop.unroll.disable +diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll +new file mode 100644 +index 000000000000..480ccad640ae +--- /dev/null ++++ b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll +@@ -0,0 +1,113 @@ ++; Test loop unrolling using auto-tuning YAML api with IRs generated when ASSERTION=OFF ++; The IRs generated when ASSERTION=OFF usually only use slot numbers as variable names. ++ ++; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' | \ ++; RUN: FileCheck %s -check-prefix=DISABLE ++ ++; RUN: rm %t.result1_raw %t.unroll1_raw.yaml -rf ++; RUN: sed 's#\[number\]#1#g; s#\[hash\]#18159364858606519094#g' \ ++; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll1_raw.yaml ++; RUN: opt %s -S -passes='require,function(loop-unroll)' \ ++; RUN: -auto-tuning-input=%t.unroll1_raw.yaml | FileCheck %s -check-prefix=UNROLL1 ++ ++; RUN: rm %t.result2_raw %t.unroll2_raw.yaml -rf ++; RUN: sed 's#\[number\]#2#g; s#\[hash\]#18159364858606519094#g' \ ++; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll2_raw.yaml ++; RUN: opt %s -S -passes='require,function(loop-unroll)' \ ++; RUN: -auto-tuning-input=%t.unroll2_raw.yaml | FileCheck %s -check-prefix=UNROLL2 ++ ++; RUN: rm %t.result4_raw %t.unroll4_raw.yaml -rf ++; RUN: sed 's#\[number\]#4#g; s#\[hash\]#18159364858606519094#g' \ ++; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll4_raw.yaml ++; RUN: opt %s -S -passes='require,function(loop-unroll)' \ ++; RUN: -auto-tuning-input=%t.unroll4_raw.yaml | FileCheck %s -check-prefix=UNROLL4 ++ ++; UNSUPPORTED: windows ++ ++; ModuleID = 't.ll' ++source_filename = "t.ll" ++ ++@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ++ ++define void @test(i32*) { ++ %2 = alloca i32*, align 8 ++ store i32* %0, i32** %2, align 8 ++ %3 = load i32*, i32** %2, align 8 ++ %4 = load i32, i32* %3, align 4 ++ %5 = add nsw i32 %4, 2 ++ %6 = load i32*, i32** %2, align 8 ++ store i32 %5, i32* %6, align 4 ++ ret void ++} ++ ++define i32 @main() { ++ %1 = alloca i32, align 4 ++ %2 = alloca i32, align 4 ++ store i32 0, i32* %1, align 4 ++ store i32 8, i32* %2, align 4 ++ %3 = load i32, i32* %2, align 4 ++ %4 = icmp sle i32 %3, 88 ++ br i1 %4, label %.lr.ph, label %13 ++ ++.lr.ph: ; preds = %0 ++ br label %5 ++ ++;