Date: Sat, 20 Apr 2024 10:32:35 GMT From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: 471f3ba46090 - stable/14 - Merge llvm-project release/18.x llvmorg-18.1.1-0-gdba2a75e9c7e Message-ID: <202404201032.43KAWZMF011372@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by dim: URL: https://cgit.FreeBSD.org/src/commit/?id=471f3ba460906d3bca33d0bff87d0e73a9f93e9c commit 471f3ba460906d3bca33d0bff87d0e73a9f93e9c Author: Dimitry Andric <dim@FreeBSD.org> AuthorDate: 2024-03-10 12:37:50 +0000 Commit: Dimitry Andric <dim@FreeBSD.org> CommitDate: 2024-04-19 21:14:17 +0000 Merge llvm-project release/18.x llvmorg-18.1.1-0-gdba2a75e9c7e This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project release/18.x llvmorg-18.1.1-0-gdba2a75e9c7e. PR: 276104 MFC after: 1 month (cherry picked from commit 4c2d3b022a1d543dbbff75a0c53e8d3d7242216d) --- .../llvm-project/clang/include/clang/Sema/Sema.h | 4 +- .../clang/lib/Basic/Targets/AArch64.cpp | 1 - .../llvm-project/clang/lib/Basic/Targets/Mips.h | 4 +- .../llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp | 11 +- contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp | 4 +- contrib/llvm-project/libcxx/include/csetjmp | 8 +- .../llvm/include/llvm/ADT/iterator_range.h | 4 +- .../llvm/include/llvm/IR/Attributes.td | 28 +- .../llvm/TargetParser/AArch64TargetParser.h | 8 +- contrib/llvm-project/llvm/lib/Analysis/Loads.cpp | 6 +- contrib/llvm-project/llvm/lib/IR/Attributes.cpp | 5 + .../llvm/lib/Target/AArch64/AArch64.td | 27 + .../llvm/lib/Target/AArch64/AArch64SchedA53.td | 2 +- .../llvm/lib/Target/AArch64/AArch64SchedA57.td | 2 +- .../llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 3 +- .../lib/Target/AArch64/AArch64SchedAmpere1B.td | 1149 ++++++++++++++++++++ .../llvm/lib/Target/AArch64/AArch64SchedCyclone.td | 2 +- .../lib/Target/AArch64/AArch64SchedExynosM3.td | 2 +- .../lib/Target/AArch64/AArch64SchedExynosM4.td | 2 +- .../lib/Target/AArch64/AArch64SchedExynosM5.td | 2 +- .../llvm/lib/Target/AArch64/AArch64SchedFalkor.td | 2 +- .../llvm/lib/Target/AArch64/AArch64SchedKryo.td | 2 +- .../lib/Target/AArch64/AArch64SchedNeoverseN1.td | 2 +- .../lib/Target/AArch64/AArch64SchedNeoverseN2.td | 2 +- .../lib/Target/AArch64/AArch64SchedNeoverseV1.td | 3 +- .../lib/Target/AArch64/AArch64SchedNeoverseV2.td | 3 +- .../llvm/lib/Target/AArch64/AArch64SchedTSV110.td | 2 +- .../lib/Target/AArch64/AArch64SchedThunderX.td | 2 +- .../lib/Target/AArch64/AArch64SchedThunderX2T99.td | 2 +- .../Target/AArch64/AArch64SchedThunderX3T110.td | 2 +- .../llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + .../llvm/lib/Target/AArch64/AArch64Subtarget.h | 1 + .../lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 78 +- .../llvm/lib/Target/Mips/MipsISelLowering.cpp | 10 +- .../lib/Target/SystemZ/SystemZISelLowering.cpp | 5 +- .../llvm-project/llvm/lib/TargetParser/Host.cpp | 1 + .../llvm/lib/Transforms/Utils/FlattenCFG.cpp | 14 +- .../llvm/lib/Transforms/Utils/Local.cpp | 8 +- .../llvm/tools/llvm-cov/SourceCoverageView.cpp | 2 +- .../llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp | 9 +- .../llvm/tools/llvm-cov/SourceCoverageViewText.cpp | 3 +- .../llvm/tools/llvm-readobj/ObjDumper.cpp | 26 +- .../llvm/tools/llvm-readobj/ObjDumper.h | 4 +- .../llvm-project/llvm/tools/llvm-readobj/Opts.td | 2 + .../llvm/tools/llvm-readobj/llvm-readobj.cpp | 6 +- .../llvm/utils/TableGen/Attributes.cpp | 6 +- lib/clang/include/VCSVersion.inc | 6 +- lib/clang/include/clang/Basic/Version.inc | 6 +- lib/clang/include/lld/Common/Version.inc | 2 +- lib/clang/include/lldb/Version/Version.inc | 6 +- lib/clang/include/llvm/Config/config.h | 4 +- lib/clang/include/llvm/Config/llvm-config.h | 4 +- lib/clang/include/llvm/Support/VCSRevision.h | 2 +- lib/clang/liblldb/LLDBWrapLua.cpp | 162 +-- 54 files changed, 1501 insertions(+), 163 deletions(-) diff --git a/contrib/llvm-project/clang/include/clang/Sema/Sema.h b/contrib/llvm-project/clang/include/clang/Sema/Sema.h index 1f1cbd11ff73..6adb8fb7966b 100644 --- a/contrib/llvm-project/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm-project/clang/include/clang/Sema/Sema.h @@ -1090,7 +1090,9 @@ public: if (FD) { FD->setWillHaveBody(true); S.ExprEvalContexts.back().InImmediateFunctionContext = - FD->isImmediateFunction(); + FD->isImmediateFunction() || + S.ExprEvalContexts[S.ExprEvalContexts.size() - 2] + .isConstantEvaluated(); S.ExprEvalContexts.back().InImmediateEscalatingFunctionContext = S.getLangOpts().CPlusPlus20 && FD->isImmediateEscalating(); } else diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp index 3036f461c1de..f5a5d689fa09 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp +++ b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp @@ -258,7 +258,6 @@ void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); Builder.defineMacro("__ARM_FEATURE_JCVT", "1"); - Builder.defineMacro("__ARM_FEATURE_PAUTH", "1"); // Also include the Armv8.2 defines getTargetDefinesARMV82A(Opts, Builder); } diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h b/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h index f46b95abfd75..23d4e1b598fa 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h +++ b/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h @@ -237,12 +237,14 @@ public: case 'r': // CPU registers. case 'd': // Equivalent to "r" unless generating MIPS16 code. case 'y': // Equivalent to "r", backward compatibility only. - case 'f': // floating-point registers. case 'c': // $25 for indirect jumps case 'l': // lo register case 'x': // hilo register pair Info.setAllowsRegister(); return true; + case 'f': // floating-point registers. + Info.setAllowsRegister(); + return FloatABI != SoftFloat; case 'I': // Signed 16-bit constant case 'J': // Integer 0 case 'K': // Unsigned 16-bit constant diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp index 5d7c38477457..fb4e86e8bd80 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp @@ -240,9 +240,12 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { if (MCDCMaxCond == 0) return true; - /// At the top of the logical operator nest, reset the number of conditions. - if (LogOpStack.empty()) + /// At the top of the logical operator nest, reset the number of conditions, + /// also forget previously seen split nesting cases. + if (LogOpStack.empty()) { NumCond = 0; + SplitNestedLogicalOp = false; + } if (const Expr *E = dyn_cast<Expr>(S)) { const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); @@ -293,7 +296,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { "contains an operation with a nested boolean expression. " "Expression will not be covered"); Diag.Report(S->getBeginLoc(), DiagID); - return false; + return true; } /// Was the maximum number of conditions encountered? @@ -304,7 +307,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { "number of conditions (%0) exceeds max (%1). " "Expression will not be covered"); Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond; - return false; + return true; } // Otherwise, allocate the number of bytes required for the bitmap diff --git a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp index 0d9c087ed0cd..4cce0abc2315 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp @@ -18294,7 +18294,6 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) { } void Sema::MarkExpressionAsImmediateEscalating(Expr *E) { - assert(!FunctionScopes.empty() && "Expected a function scope"); assert(getLangOpts().CPlusPlus20 && ExprEvalContexts.back().InImmediateEscalatingFunctionContext && "Cannot mark an immediate escalating expression outside of an " @@ -18311,7 +18310,8 @@ void Sema::MarkExpressionAsImmediateEscalating(Expr *E) { } else { assert(false && "expected an immediately escalating expression"); } - getCurFunction()->FoundImmediateEscalatingExpression = true; + if (FunctionScopeInfo *FI = getCurFunction()) + FI->FoundImmediateEscalatingExpression = true; } ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { diff --git a/contrib/llvm-project/libcxx/include/csetjmp b/contrib/llvm-project/libcxx/include/csetjmp index d219c8e6cb22..9012cad22ebe 100644 --- a/contrib/llvm-project/libcxx/include/csetjmp +++ b/contrib/llvm-project/libcxx/include/csetjmp @@ -33,7 +33,13 @@ void longjmp(jmp_buf env, int val); #include <__assert> // all public C++ headers provide the assertion handler #include <__config> -#include <setjmp.h> +// <setjmp.h> is not provided by libc++ +#if __has_include(<setjmp.h>) +# include <setjmp.h> +# ifdef _LIBCPP_SETJMP_H +# error "If libc++ starts defining <setjmp.h>, the __has_include check should move to libc++'s <setjmp.h>" +# endif +#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h index 2dc227935984..7d288ea4506b 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h @@ -43,8 +43,8 @@ class iterator_range { IteratorT begin_iterator, end_iterator; public: -#if __GNUC__ == 7 - // Be careful no to break gcc-7 on the mlir target. +#if __GNUC__ == 7 || (__GNUC__ == 8 && __GNUC_MINOR__ < 4) + // Be careful no to break gcc-7 and gcc-8 < 8.4 on the mlir target. // See https://github.com/llvm/llvm-project/issues/63843 template <typename Container> #else diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td index 864f87f33838..d22eb76d2292 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td @@ -339,14 +339,26 @@ def UseSampleProfile : StrBoolAttr<"use-sample-profile">; def DenormalFPMath : ComplexStrAttr<"denormal-fp-math", [FnAttr]>; def DenormalFPMathF32 : ComplexStrAttr<"denormal-fp-math-f32", [FnAttr]>; +// Attribute compatiblity rules are generated to check the attribute of the +// caller and callee and decide whether inlining should be allowed. CompatRule +// and child classes are used for the rule generation. CompatRule takes only a +// compare function which could be templated with the attribute type. +// CompatRuleStrAttr takes the compare function and the string attribute for +// checking compatibility for inline substitution. class CompatRule<string F> { - // The name of the function called to check the attribute of the caller and - // callee and decide whether inlining should be allowed. The function's - // signature must match "bool(const Function&, const Function &)", where the - // first parameter is the reference to the caller and the second parameter is - // the reference to the callee. It must return false if the attributes of the - // caller and callee are incompatible, and true otherwise. + // The function's signature must match "bool(const Function&, const + // Function&)", where the first parameter is the reference to the caller and + // the second parameter is the reference to the callee. It must return false + // if the attributes of the caller and callee are incompatible, and true + // otherwise. string CompatFunc = F; + string AttrName = ""; +} + +class CompatRuleStrAttr<string F, string Attr> : CompatRule<F> { + // The checker function is extended with an third argument as the function + // attribute string "bool(const Function&, const Function&, const StringRef&)". + string AttrName = Attr; } def : CompatRule<"isEqual<SanitizeAddressAttr>">; @@ -359,7 +371,9 @@ def : CompatRule<"isEqual<ShadowCallStackAttr>">; def : CompatRule<"isEqual<UseSampleProfileAttr>">; def : CompatRule<"isEqual<NoProfileAttr>">; def : CompatRule<"checkDenormMode">; - +def : CompatRuleStrAttr<"isEqual", "sign-return-address">; +def : CompatRuleStrAttr<"isEqual", "sign-return-address-key">; +def : CompatRuleStrAttr<"isEqual", "branch-protection-pauth-lr">; class MergeRule<string F> { // The name of the function called to merge the attributes of the caller and diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 6d82748d8004..c10f92e28717 100644 --- a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -478,7 +478,7 @@ inline constexpr ArchInfo ARMV8_1A = { VersionTuple{8, 1}, AProfile, "armv8.1-a inline constexpr ArchInfo ARMV8_2A = { VersionTuple{8, 2}, AProfile, "armv8.2-a", "+v8.2a", (ARMV8_1A.DefaultExts | AArch64::ExtensionBitset({AArch64::AEK_RAS}))}; inline constexpr ArchInfo ARMV8_3A = { VersionTuple{8, 3}, AProfile, "armv8.3-a", "+v8.3a", (ARMV8_2A.DefaultExts | - AArch64::ExtensionBitset({AArch64::AEK_RCPC, AArch64::AEK_JSCVT, AArch64::AEK_FCMA}))}; + AArch64::ExtensionBitset({AArch64::AEK_FCMA, AArch64::AEK_JSCVT, AArch64::AEK_PAUTH, AArch64::AEK_RCPC}))}; inline constexpr ArchInfo ARMV8_4A = { VersionTuple{8, 4}, AProfile, "armv8.4-a", "+v8.4a", (ARMV8_3A.DefaultExts | AArch64::ExtensionBitset({AArch64::AEK_DOTPROD}))}; inline constexpr ArchInfo ARMV8_5A = { VersionTuple{8, 5}, AProfile, "armv8.5-a", "+v8.5a", (ARMV8_4A.DefaultExts)}; @@ -805,6 +805,12 @@ inline constexpr CpuInfo CpuInfos[] = { {AArch64::AEK_FP16, AArch64::AEK_RAND, AArch64::AEK_SM4, AArch64::AEK_SHA3, AArch64::AEK_SHA2, AArch64::AEK_AES, AArch64::AEK_MTE, AArch64::AEK_SB, AArch64::AEK_SSBS}))}, + {"ampere1b", ARMV8_7A, + (AArch64::ExtensionBitset({AArch64::AEK_FP16, AArch64::AEK_RAND, + AArch64::AEK_SM4, AArch64::AEK_SHA3, + AArch64::AEK_SHA2, AArch64::AEK_AES, + AArch64::AEK_MTE, AArch64::AEK_SB, + AArch64::AEK_SSBS, AArch64::AEK_CSSC}))}, }; // An alias for a CPU. diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp index 6bf0d2f56eb4..5916d2ab48ec 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp @@ -364,7 +364,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, if (Size.getBitWidth() > 64) return false; - const uint64_t LoadSize = Size.getZExtValue(); + const TypeSize LoadSize = TypeSize::getFixed(Size.getZExtValue()); // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored @@ -414,11 +414,11 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, // Handle trivial cases. if (AccessedPtr == V && - LoadSize <= DL.getTypeStoreSize(AccessedTy)) + TypeSize::isKnownLE(LoadSize, DL.getTypeStoreSize(AccessedTy))) return true; if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && - LoadSize <= DL.getTypeStoreSize(AccessedTy)) + TypeSize::isKnownLE(LoadSize, DL.getTypeStoreSize(AccessedTy))) return true; } return false; diff --git a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp index fd5160209506..19076771ff2e 100644 --- a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp @@ -2045,6 +2045,11 @@ static bool isEqual(const Function &Caller, const Function &Callee) { Callee.getFnAttribute(AttrClass::getKind()); } +static bool isEqual(const Function &Caller, const Function &Callee, + const StringRef &AttrName) { + return Caller.getFnAttribute(AttrName) == Callee.getFnAttribute(AttrName); +} + /// Compute the logical AND of the attributes of the caller and the /// callee. /// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index 36700f73df4b..feabd137c0cf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -837,6 +837,7 @@ include "AArch64SchedA64FX.td" include "AArch64SchedThunderX3T110.td" include "AArch64SchedTSV110.td" include "AArch64SchedAmpere1.td" +include "AArch64SchedAmpere1B.td" include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" include "AArch64SchedNeoverseV1.td" @@ -1376,6 +1377,24 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", FeatureLdpAlignedOnly, FeatureStpAlignedOnly]>; +def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", + "Ampere Computing Ampere-1B processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLFast, + FeatureALULSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; + def ProcessorFeatures { list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeaturePerfMon]; @@ -1529,6 +1548,11 @@ def ProcessorFeatures { FeatureMTE, FeatureSSBS, FeatureRandGen, FeatureSB, FeatureSM4, FeatureSHA2, FeatureSHA3, FeatureAES]; + list<SubtargetFeature> Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon, + FeatureMTE, FeatureSSBS, FeatureRandGen, + FeatureSB, FeatureSM4, FeatureSHA2, + FeatureSHA3, FeatureAES, FeatureCSSC, + FeatureWFxT, FeatureFullFP16]; // ETE and TRBE are future architecture extensions. We temporarily enable them // by default for users targeting generic AArch64. The extensions do not @@ -1696,6 +1720,9 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, [TuneAmpere1A]>; +def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, + [TuneAmpere1B]>; + //===----------------------------------------------------------------------===// // Assembly parser //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td index 3e4168f5f445..c714bad92b7f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td index 277ec772cf0f..ebbc3b72b506 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index 7edce4b61605..d6fe84a2c9c9 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -22,7 +22,8 @@ def A64FXModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F, [HasMTE, HasMatMulInt8, HasBF16, - HasPAuth, HasPAuthLR, HasCPA]); + HasPAuth, HasPAuthLR, HasCPA, + HasCSSC]); let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td new file mode 100644 index 000000000000..9c4f000cf351 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td @@ -0,0 +1,1149 @@ +//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Ampere Computing Ampere-1B to +// support instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +// The Ampere-1B core is an out-of-order micro-architecture. The front +// end has branch prediction, with a 10-cycle recovery time from a +// mispredicted branch. Instructions coming out of the front end are +// decoded into internal micro-ops (uops). + +def Ampere1BModel : SchedMachineModel { + let IssueWidth = 12; // Maximum micro-ops dispatch rate. + let MicroOpBufferSize = 192; // micro-op re-order buffer size + let LoadLatency = 3; // Optimistic load latency + let MispredictPenalty = 10; // Branch mispredict penalty + let LoopMicroOpBufferSize = 32; // Instruction queue size + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + SMEUnsupported.F, + PAUnsupported.F); +} + +let SchedModel = Ampere1BModel in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Ampere-1B. + +def Ampere1BUnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w +def Ampere1BUnitB : ProcResource<2>; // integer single-cycle, and complex shifts +def Ampere1BUnitBS : ProcResource<1>; // integer multi-cycle +def Ampere1BUnitL : ProcResource<2>; // load +def Ampere1BUnitS : ProcResource<2>; // store address calculation +def Ampere1BUnitX : ProcResource<1>; // FP and vector operations, and flag write +def Ampere1BUnitY : ProcResource<1>; // FP and vector operations, and crypto +def Ampere1BUnitZ : ProcResource<1>; // FP store data and FP-to-integer moves + +def Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>; +def Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>; + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Ampere-1. + +def Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB, + Ampere1BUnitS, + Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1BWrite_2cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ, + Ampere1BUnitZ]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_2L : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY, + Ampere1BUnitS, + Ampere1BUnitZ]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def Ampere1BWrite_4cyc_3S_3Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitBS]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} + +def Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 5; + let NumMicroOps = 6; +} + +def Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA, + Ampere1BUnitA]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1BWrite_6cyc_1L_1XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_2L_2XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 6; + let NumMicroOps = 6; +} + +def Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 6; + let NumMicroOps = 9; +} + +def Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_1X_1Z : SchedWriteRes<[Ampere1BUnitX, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 6; +} + +def Ampere1BWrite_7cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 8; +} + +def Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 12; +} + +def Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_2L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 5; +} + +def Ampere1BWrite_8cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 6; +} + +def Ampere1BWrite_8cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 8; +} + +def Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 9; + let NumMicroOps = 14; +} + +def Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_9cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 6; +} + +def Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 9; + let NumMicroOps = 1; +} + +def Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_10cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 10; + let NumMicroOps = 12; +} + +def Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 11; + let NumMicroOps = 3; +} + +def Ampere1BWrite_11cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 11; + let NumMicroOps = 12; +} + +def Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 12; + let NumMicroOps = 1; +} + +def Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 13; + let NumMicroOps = 2; +} + +def Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 17; + let NumMicroOps = 1; +} + +def Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS, + Ampere1BUnitBS, + Ampere1BUnitX]> { + let Latency = 13; + let NumMicroOps = 3; +} + +def Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 19; + let NumMicroOps = 1; +} + +def Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 21; + let NumMicroOps = 1; +} + +def Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 33; + let NumMicroOps = 1; +} + +def Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 39; + let NumMicroOps = 1; +} + +def Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 63; + let NumMicroOps = 1; +} + +// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), +// which are a single uop, and for extended registers, which have full flexibility +// across Unit A or B for both uops. +def Ampere1BWrite_Arith : SchedWriteVariant<[ + SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, + SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, + SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; + +def Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[ + SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, + SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, + SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latencies for Ampere-1. +// This provides a coarse model, which is then specialised below. + +def : WriteRes<WriteImm, [Ampere1BUnitAB]>; // MOVN, MOVZ +def : WriteRes<WriteI, [Ampere1BUnitAB]>; // ALU +def : WriteRes<WriteISReg, [Ampere1BUnitB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Shifted-Reg +def : WriteRes<WriteIEReg, [Ampere1BUnitAB, Ampere1BUnitAB]> { *** 1567 LINES SKIPPED ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202404201032.43KAWZMF011372>