Date: Fri, 16 Feb 2018 20:45:32 +0000 (UTC) From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r329410 - in head: contrib/compiler-rt/lib/asan contrib/llvm/include/llvm/IR contrib/llvm/include/llvm/MC contrib/llvm/include/llvm/Support contrib/llvm/lib/CodeGen contrib/llvm/lib/Cod... Message-ID: <201802162045.w1GKjWf2020258@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: dim Date: Fri Feb 16 20:45:32 2018 New Revision: 329410 URL: https://svnweb.freebsd.org/changeset/base/329410 Log: Upgrade our copies of clang, llvm, lld, lldb, compiler-rt and libc++ to 6.0.0 (branches/release_60 r325330). MFC after: 3 months X-MFC-With: r327952 PR: 224669 Added: head/contrib/llvm/include/llvm/MC/MCAsmMacro.h - copied unchanged from r329405, vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h Modified: head/contrib/compiler-rt/lib/asan/asan_linux.cc head/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td head/contrib/llvm/include/llvm/MC/MCContext.h head/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h head/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp head/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp head/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp head/contrib/llvm/lib/CodeGen/SplitKit.cpp head/contrib/llvm/lib/CodeGen/SplitKit.h head/contrib/llvm/lib/IR/AutoUpgrade.cpp head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td head/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp head/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h head/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp head/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp head/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h head/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp head/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp head/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h head/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp head/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp head/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp head/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp head/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp head/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp head/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h head/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h head/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h head/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp head/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp head/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp head/contrib/llvm/tools/lld/COFF/PDB.cpp head/contrib/llvm/tools/lld/ELF/Driver.cpp head/contrib/llvm/tools/lld/ELF/InputFiles.cpp head/contrib/llvm/tools/lld/ELF/Options.td head/contrib/llvm/tools/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp head/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp head/lib/clang/include/clang/Basic/Version.inc head/lib/clang/include/lld/Common/Version.inc head/lib/clang/include/llvm/Support/VCSRevision.h Directory Properties: head/contrib/compiler-rt/ (props changed) head/contrib/libc++/ (props changed) head/contrib/llvm/ (props changed) head/contrib/llvm/tools/clang/ (props changed) head/contrib/llvm/tools/lld/ (props changed) head/contrib/llvm/tools/lldb/ (props changed) Modified: head/contrib/compiler-rt/lib/asan/asan_linux.cc ============================================================================== --- head/contrib/compiler-rt/lib/asan/asan_linux.cc Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/compiler-rt/lib/asan/asan_linux.cc Fri Feb 16 20:45:32 2018 (r329410) @@ -32,6 +32,7 @@ #include <sys/types.h> #include <dlfcn.h> #include <fcntl.h> +#include <limits.h> #include <pthread.h> #include <stdio.h> #include <unistd.h> @@ -214,7 +215,7 @@ void AsanCheckIncompatibleRT() { // the functions in dynamic ASan runtime instead of the functions in // system libraries, causing crashes later in ASan initialization. MemoryMappingLayout proc_maps(/*cache_enabled*/true); - char filename[128]; + char filename[PATH_MAX]; MemoryMappedSegment segment(filename, sizeof(filename)); while (proc_maps.Next(&segment)) { if (IsDynamicRTName(segment.filename)) { Modified: head/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td ============================================================================== --- head/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td Fri Feb 16 20:45:32 2018 (r329410) @@ -238,6 +238,26 @@ def int_amdgcn_cvt_pkrtz : Intrinsic< [IntrNoMem, IntrSpeculatable] >; +def int_amdgcn_cvt_pknorm_i16 : Intrinsic< + [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] +>; + +def int_amdgcn_cvt_pknorm_u16 : Intrinsic< + [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] +>; + +def int_amdgcn_cvt_pk_i16 : Intrinsic< + [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + +def int_amdgcn_cvt_pk_u16 : Intrinsic< + [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + def int_amdgcn_class : Intrinsic< [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] Modified: head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td ============================================================================== --- head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/include/llvm/IR/IntrinsicsX86.td Fri Feb 16 20:45:32 2018 (r329410) @@ -3738,6 +3738,15 @@ let TargetPrefix = "x86" in { // All intrinsics start def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; Copied: head/contrib/llvm/include/llvm/MC/MCAsmMacro.h (from r329405, vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/contrib/llvm/include/llvm/MC/MCAsmMacro.h Fri Feb 16 20:45:32 2018 (r329410, copy of r329405, vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h) @@ -0,0 +1,38 @@ +//===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMMACRO_H +#define LLVM_MC_MCASMMACRO_H + +#include "llvm/MC/MCParser/MCAsmLexer.h" + +namespace llvm { + +struct MCAsmMacroParameter { + StringRef Name; + std::vector<AsmToken> Value; + bool Required = false; + bool Vararg = false; + + MCAsmMacroParameter() = default; +}; + +typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters; +struct MCAsmMacro { + StringRef Name; + StringRef Body; + MCAsmMacroParameters Parameters; + +public: + MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P) + : Name(N), Body(B), Parameters(std::move(P)) {} +}; +}; // namespace llvm + +#endif Modified: head/contrib/llvm/include/llvm/MC/MCContext.h ============================================================================== --- head/contrib/llvm/include/llvm/MC/MCContext.h Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/include/llvm/MC/MCContext.h Fri Feb 16 20:45:32 2018 (r329410) @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SectionKind.h" @@ -268,6 +269,9 @@ namespace llvm { unsigned UniqueID, const MCSymbolELF *Associated); + /// \brief Map of currently defined macros. + StringMap<MCAsmMacro> MacroMap; + public: explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI, const MCObjectFileInfo *MOFI, @@ -618,6 +622,17 @@ namespace llvm { // FIXME: We should really do something about that. LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, const Twine &Msg); + + const MCAsmMacro *lookupMacro(StringRef Name) { + StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name); + return (I == MacroMap.end()) ? nullptr : &I->getValue(); + } + + void defineMacro(StringRef Name, MCAsmMacro Macro) { + MacroMap.insert(std::make_pair(Name, std::move(Macro))); + } + + void undefineMacro(StringRef Name) { MacroMap.erase(Name); } }; } // end namespace llvm Modified: head/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h ============================================================================== --- head/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h Fri Feb 16 20:45:32 2018 (r329410) @@ -698,24 +698,20 @@ struct SemiNCAInfo { return; // Recalculate the set of roots. - DT.Roots = FindRoots(DT, BUI); - for (const NodePtr R : DT.Roots) { - const TreeNodePtr TN = DT.getNode(R); - // A CFG node was selected as a tree root, but the corresponding tree node - // is not connected to the virtual root. This is because the incremental - // algorithm does not really know or use the set of roots and can make a - // different (implicit) decision about which nodes within an infinite loop - // becomes a root. - if (TN && !DT.isVirtualRoot(TN->getIDom())) { - DEBUG(dbgs() << "Root " << BlockNamePrinter(R) - << " is not virtual root's child\n" - << "The entire tree needs to be rebuilt\n"); - // It should be possible to rotate the subtree instead of recalculating - // the whole tree, but this situation happens extremely rarely in - // practice. - CalculateFromScratch(DT, BUI); - return; - } + auto Roots = FindRoots(DT, BUI); + if (DT.Roots.size() != Roots.size() || + !std::is_permutation(DT.Roots.begin(), DT.Roots.end(), Roots.begin())) { + // The roots chosen in the CFG have changed. This is because the + // incremental algorithm does not really know or use the set of roots and + // can make a different (implicit) decision about which node within an + // infinite loop becomes a root. + + DEBUG(dbgs() << "Roots are different in updated trees\n" + << "The entire tree needs to be rebuilt\n"); + // It may be possible to update the tree without recalculating it, but + // we do not know yet how to do it, and it happens rarely in practise. + CalculateFromScratch(DT, BUI); + return; } } Modified: head/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp ============================================================================== --- head/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -163,7 +163,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITyp DIType *BaseType = DDTy->getBaseType().resolve(); - assert(BaseType && "Unexpected invalid base type"); + if (!BaseType) + return 0; // If this is a derived type, go ahead and get the base type, unless it's a // reference then it's just the size of the field. Pointer types have no need Modified: head/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp ============================================================================== --- head/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -1391,7 +1391,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, resolve(DT->getBaseType())); + if (DIType *Resolved = resolve(DT->getBaseType())) + addType(MemberDie, Resolved); addSourceLine(MemberDie, DT); Modified: head/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp ============================================================================== --- head/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -205,14 +205,18 @@ void LivePhysRegs::addPristines(const MachineFunction } void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { - if (!MBB.succ_empty()) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addBlockLiveIns(*Succ); - } else if (MBB.isReturnBlock()) { - // For the return block: Add all callee saved registers that are saved and - // restored (somewhere); This does not include callee saved registers that - // are unused and hence not saved and restored; they are called pristine. + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*Succ); + if (MBB.isReturnBlock()) { + // Return blocks are a special case because we currently don't mark up + // return instructions completely: specifically, there is no explicit + // use for callee-saved registers. So we add all callee saved registers + // that are saved and restored (somewhere). This does not include + // callee saved registers that are unused and hence not saved and + // restored; they are called pristine. + // FIXME: PEI should add explicit markings to return instructions + // instead of implicitly handling them here. const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) { @@ -225,15 +229,8 @@ void LivePhysRegs::addLiveOutsNoPristines(const Machin void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - if (!MBB.succ_empty()) { - addPristines(MF); - addLiveOutsNoPristines(MBB); - } else if (MBB.isReturnBlock()) { - // For the return block: Add all callee saved registers. - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) - addCalleeSavedRegs(*this, MF); - } + addPristines(MF); + addLiveOutsNoPristines(MBB); } void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { Modified: head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ============================================================================== --- head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -16409,7 +16409,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0).getOperand(1) == N2 && N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == - VT.getVectorNumElements()) { + VT.getVectorNumElements() && + N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == + VT.getSizeInBits()) { return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); } Modified: head/contrib/llvm/lib/CodeGen/SplitKit.cpp ============================================================================== --- head/contrib/llvm/lib/CodeGen/SplitKit.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/CodeGen/SplitKit.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -491,9 +491,8 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, return VNI; } -void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { - assert(ParentVNI && "Mapping NULL value"); - ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)]; +void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) { + ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI.id)]; VNInfo *VNI = VFP.getPointer(); // ParentVNI was either unmapped or already complex mapped. Either way, just @@ -777,7 +776,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { // the source live range. The spiller also won't try to hoist this copy. if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) && MI->readsVirtualRegister(Edit->getReg())) { - forceRecompute(0, ParentVNI); + forceRecompute(0, *ParentVNI); defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); return Idx; } @@ -835,7 +834,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotInd // The complement interval will be extended as needed by LRCalc.extend(). if (ParentVNI) - forceRecompute(0, ParentVNI); + forceRecompute(0, *ParentVNI); DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); DEBUG(dump()); @@ -878,7 +877,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNI unsigned RegIdx = AssignI.value(); if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); - forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); + forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def)); } else { SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot(); DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); @@ -982,7 +981,7 @@ void SplitEditor::computeRedundantBackCopies( } } if (!DominatedVNIs.empty()) { - forceRecompute(0, ParentVNI); + forceRecompute(0, *ParentVNI); for (auto VNI : DominatedVNIs) { BackCopies.push_back(VNI); } @@ -1102,7 +1101,7 @@ void SplitEditor::hoistCopies() { NotToHoistSet.count(ParentVNI->id)) continue; BackCopies.push_back(VNI); - forceRecompute(0, ParentVNI); + forceRecompute(0, *ParentVNI); } // If it is not beneficial to hoist all the BackCopies, simply remove @@ -1428,6 +1427,41 @@ void SplitEditor::deleteRematVictims() { Edit->eliminateDeadDefs(Dead, None, &AA); } +void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { + // Fast-path for common case. + if (!ParentVNI.isPHIDef()) { + for (unsigned I = 0, E = Edit->size(); I != E; ++I) + forceRecompute(I, ParentVNI); + return; + } + + // Trace value through phis. + SmallPtrSet<const VNInfo *, 8> Visited; ///< whether VNI was/is in worklist. + SmallVector<const VNInfo *, 4> WorkList; + Visited.insert(&ParentVNI); + WorkList.push_back(&ParentVNI); + + const LiveInterval &ParentLI = Edit->getParent(); + const SlotIndexes &Indexes = *LIS.getSlotIndexes(); + do { + const VNInfo &VNI = *WorkList.back(); + WorkList.pop_back(); + for (unsigned I = 0, E = Edit->size(); I != E; ++I) + forceRecompute(I, VNI); + if (!VNI.isPHIDef()) + continue; + + MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(VNI.def); + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred); + VNInfo *PredVNI = ParentLI.getVNInfoBefore(PredEnd); + assert(PredVNI && "Value available in PhiVNI predecessor"); + if (Visited.insert(PredVNI).second) + WorkList.push_back(PredVNI); + } + } while(!WorkList.empty()); +} + void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ++NumFinished; @@ -1444,8 +1478,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LR // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. if (Edit->didRematerialize(ParentVNI)) - for (unsigned i = 0, e = Edit->size(); i != e; ++i) - forceRecompute(i, ParentVNI); + forceRecomputeVNI(*ParentVNI); } // Hoist back-copies to the complement interval when in spill mode. Modified: head/contrib/llvm/lib/CodeGen/SplitKit.h ============================================================================== --- head/contrib/llvm/lib/CodeGen/SplitKit.h Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/CodeGen/SplitKit.h Fri Feb 16 20:45:32 2018 (r329410) @@ -357,7 +357,11 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor { (private) /// recomputed by LiveRangeCalc::extend regardless of the number of defs. /// This is used for values whose live range doesn't match RegAssign exactly. /// They could have rematerialized, or back-copies may have been moved. - void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI); + void forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI); + + /// Calls forceRecompute() on any affected regidx and on ParentVNI + /// predecessors in case of a phi definition. + void forceRecomputeVNI(const VNInfo &ParentVNI); /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. Modified: head/contrib/llvm/lib/IR/AutoUpgrade.cpp ============================================================================== --- head/contrib/llvm/lib/IR/AutoUpgrade.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/IR/AutoUpgrade.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -75,7 +75,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, Str Name=="ssse3.pabs.d.128" || // Added in 6.0 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 - Name.startswith("avx512.kunpck") || //added in 6.0 Name.startswith("avx2.pabs.") || // Added in 6.0 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 Name.startswith("avx512.broadcastm") || // Added in 6.0 @@ -1063,12 +1062,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { - uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2; - uint64_t And = (1ULL << Shift) - 1; - Value* LowBits = Builder.CreateAnd(CI->getArgOperand(0), And); - Value* HighBits = Builder.CreateShl(CI->getArgOperand(1), Shift); - Rep = Builder.CreateOr(LowBits, HighBits); } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { Type *I32Ty = Type::getInt32Ty(C); Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), Modified: head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp ============================================================================== --- head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/MC/MCParser/AsmParser.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -83,27 +83,6 @@ namespace { typedef std::vector<AsmToken> MCAsmMacroArgument; typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments; -struct MCAsmMacroParameter { - StringRef Name; - MCAsmMacroArgument Value; - bool Required = false; - bool Vararg = false; - - MCAsmMacroParameter() = default; -}; - -typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters; - -struct MCAsmMacro { - StringRef Name; - StringRef Body; - MCAsmMacroParameters Parameters; - -public: - MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P) - : Name(N), Body(B), Parameters(std::move(P)) {} -}; - /// \brief Helper class for storing information about an active macro /// instantiation. struct MacroInstantiation { @@ -164,9 +143,6 @@ class AsmParser : public MCAsmParser { (private) /// addDirectiveHandler. StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap; - /// \brief Map of currently defined macros. - StringMap<MCAsmMacro> MacroMap; - /// \brief Stack of active macro instantiations. std::vector<MacroInstantiation*> ActiveMacros; @@ -308,17 +284,6 @@ class AsmParser : public MCAsmParser { (private) /// \brief Control a flag in the parser that enables or disables macros. void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;} - /// \brief Lookup a previously defined macro. - /// \param Name Macro name. - /// \returns Pointer to macro. NULL if no such macro was defined. - const MCAsmMacro* lookupMacro(StringRef Name); - - /// \brief Define a new macro with the given name and information. - void defineMacro(StringRef Name, MCAsmMacro Macro); - - /// \brief Undefine a macro. If no such macro was defined, it's a no-op. - void undefineMacro(StringRef Name); - /// \brief Are we inside a macro instantiation? bool isInsideMacroInstantiation() {return !ActiveMacros.empty();} @@ -1841,7 +1806,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Inf // If macros are enabled, check to see if this is a macro instantiation. if (areMacrosEnabled()) - if (const MCAsmMacro *M = lookupMacro(IDVal)) { + if (const MCAsmMacro *M = getContext().lookupMacro(IDVal)) { return handleMacroEntry(M, IDLoc); } @@ -2720,17 +2685,6 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro * return TokError("too many positional arguments"); } -const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) { - StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name); - return (I == MacroMap.end()) ? nullptr : &I->getValue(); -} - -void AsmParser::defineMacro(StringRef Name, MCAsmMacro Macro) { - MacroMap.insert(std::make_pair(Name, std::move(Macro))); -} - -void AsmParser::undefineMacro(StringRef Name) { MacroMap.erase(Name); } - bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { // Arbitrarily limit macro nesting depth (default matches 'as'). We can // eliminate this, although we should protect against infinite loops. @@ -4249,7 +4203,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc eatToEndOfStatement(); } - if (lookupMacro(Name)) { + if (getContext().lookupMacro(Name)) { return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); } @@ -4257,7 +4211,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc const char *BodyEnd = EndToken.getLoc().getPointer(); StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); checkForBadMacro(DirectiveLoc, Name, Body, Parameters); - defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters))); + getContext().defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters))); return false; } @@ -4416,10 +4370,10 @@ bool AsmParser::parseDirectivePurgeMacro(SMLoc Directi "unexpected token in '.purgem' directive")) return true; - if (!lookupMacro(Name)) + if (!getContext().lookupMacro(Name)) return Error(DirectiveLoc, "macro '" + Name + "' is not defined"); - undefineMacro(Name); + getContext().undefineMacro(Name); return false; } Modified: head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -3957,6 +3957,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(un NODE_NAME_CASE(CVT_F32_UBYTE2) NODE_NAME_CASE(CVT_F32_UBYTE3) NODE_NAME_CASE(CVT_PKRTZ_F16_F32) + NODE_NAME_CASE(CVT_PKNORM_I16_F32) + NODE_NAME_CASE(CVT_PKNORM_U16_F32) + NODE_NAME_CASE(CVT_PK_I16_I32) + NODE_NAME_CASE(CVT_PK_U16_U32) NODE_NAME_CASE(FP_TO_FP16) NODE_NAME_CASE(FP16_ZEXT) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) Modified: head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Feb 16 20:45:32 2018 (r329410) @@ -417,6 +417,10 @@ enum NodeType : unsigned { // Convert two float 32 numbers into a single register holding two packed f16 // with round to zero. CVT_PKRTZ_F16_F32, + CVT_PKNORM_I16_F32, + CVT_PKNORM_U16_F32, + CVT_PK_I16_I32, + CVT_PK_U16_U32, // Same as the standard node, except the high bits of the resulting integer // are known 0. Modified: head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -108,3 +108,21 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) cons return MCOp; } + +// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. +bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) { + const Value *Ptr = MMO->getValue(); + // UndefValue means this is a load of a kernel input. These are uniform. + // Sometimes LDS instructions have constant pointers. + // If Ptr is null, then that means this mem operand contains a + // PseudoSourceValue like GOT. + if (!Ptr || isa<UndefValue>(Ptr) || + isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) + return true; + + if (const Argument *Arg = dyn_cast<Argument>(Ptr)) + return AMDGPU::isArgPassedInSGPR(Arg); + + const Instruction *I = dyn_cast<Instruction>(Ptr); + return I && I->getMetadata("amdgpu.uniform"); +} Modified: head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h Fri Feb 16 20:45:32 2018 (r329410) @@ -50,6 +50,8 @@ class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { (p /// Return -1 if the target-specific opcode for the pseudo instruction does /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; + + static bool isUniformMMO(const MachineMemOperand *MMO); }; } // End llvm namespace Modified: head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Feb 16 20:45:32 2018 (r329410) @@ -35,6 +35,10 @@ def AMDGPUFPPackOp : SDTypeProfile<1, 2, [SDTCisFP<1>, SDTCisSameAs<1, 2>] >; +def AMDGPUIntPackOp : SDTypeProfile<1, 2, + [SDTCisInt<1>, SDTCisSameAs<1, 2>] +>; + def AMDGPUDivScaleOp : SDTypeProfile<2, 3, [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] >; @@ -142,6 +146,10 @@ def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", S def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; +def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; +def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; Modified: head/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -120,7 +120,7 @@ static bool isInstrUniform(const MachineInstr &MI) { return false; const MachineMemOperand *MMO = *MI.memoperands_begin(); - return AMDGPU::isUniformMMO(MMO); + return AMDGPUInstrInfo::isUniformMMO(MMO); } const RegisterBankInfo::InstructionMapping & Modified: head/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -205,6 +205,7 @@ SITargetLowering::SITargetLowering(const TargetMachine setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -1085,7 +1086,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned S bool SITargetLowering::isMemOpUniform(const SDNode *N) const { const MemSDNode *MemNode = cast<MemSDNode>(N); - return AMDGPU::isUniformMMO(MemNode->getMemOperand()); + return AMDGPUInstrInfo::isUniformMMO(MemNode->getMemOperand()); } TargetLoweringBase::LegalizeTypeAction @@ -3517,7 +3518,8 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - if (IID == Intrinsic::amdgcn_cvt_pkrtz) { + switch (IID) { + case Intrinsic::amdgcn_cvt_pkrtz: { SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); SDLoc SL(N); @@ -3526,6 +3528,29 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt)); return; } + case Intrinsic::amdgcn_cvt_pknorm_i16: + case Intrinsic::amdgcn_cvt_pknorm_u16: + case Intrinsic::amdgcn_cvt_pk_i16: + case Intrinsic::amdgcn_cvt_pk_u16: { + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + SDLoc SL(N); + unsigned Opcode; + + if (IID == Intrinsic::amdgcn_cvt_pknorm_i16) + Opcode = AMDGPUISD::CVT_PKNORM_I16_F32; + else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16) + Opcode = AMDGPUISD::CVT_PKNORM_U16_F32; + else if (IID == Intrinsic::amdgcn_cvt_pk_i16) + Opcode = AMDGPUISD::CVT_PK_I16_I32; + else + Opcode = AMDGPUISD::CVT_PK_U16_U32; + + SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt)); + return; + } + } break; } case ISD::SELECT: { @@ -4424,10 +4449,27 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDVa case Intrinsic::amdgcn_ubfe: return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::amdgcn_cvt_pkrtz: { - // FIXME: Stop adding cast if v2f16 legal. + case Intrinsic::amdgcn_cvt_pkrtz: + case Intrinsic::amdgcn_cvt_pknorm_i16: + case Intrinsic::amdgcn_cvt_pknorm_u16: + case Intrinsic::amdgcn_cvt_pk_i16: + case Intrinsic::amdgcn_cvt_pk_u16: { + // FIXME: Stop adding cast if v2f16/v2i16 are legal. EVT VT = Op.getValueType(); - SDValue Node = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, DL, MVT::i32, + unsigned Opcode; + + if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz) + Opcode = AMDGPUISD::CVT_PKRTZ_F16_F32; + else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16) + Opcode = AMDGPUISD::CVT_PKNORM_I16_F32; + else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16) + Opcode = AMDGPUISD::CVT_PKNORM_U16_F32; + else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16) + Opcode = AMDGPUISD::CVT_PK_I16_I32; + else + Opcode = AMDGPUISD::CVT_PK_U16_U32; + + SDValue Node = DAG.getNode(Opcode, DL, MVT::i32, Op.getOperand(1), Op.getOperand(2)); return DAG.getNode(ISD::BITCAST, DL, VT, Node); } Modified: head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -871,24 +871,6 @@ bool isArgPassedInSGPR(const Argument *A) { } } -// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. -bool isUniformMMO(const MachineMemOperand *MMO) { - const Value *Ptr = MMO->getValue(); - // UndefValue means this is a load of a kernel input. These are uniform. - // Sometimes LDS instructions have constant pointers. - // If Ptr is null, then that means this mem operand contains a - // PseudoSourceValue like GOT. - if (!Ptr || isa<UndefValue>(Ptr) || - isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) - return true; - - if (const Argument *Arg = dyn_cast<Argument>(Ptr)) - return isArgPassedInSGPR(Arg); - - const Instruction *I = dyn_cast<Instruction>(Ptr); - return I && I->getMetadata("amdgpu.uniform"); -} - int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { if (isGCN3Encoding(ST)) return ByteOffset; Modified: head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Fri Feb 16 20:45:32 2018 (r329410) @@ -363,7 +363,6 @@ LLVM_READNONE bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); bool isArgPassedInSGPR(const Argument *Arg); -bool isUniformMMO(const MachineMemOperand *MMO); /// \returns The encoding that will be used for \p ByteOffset in the SMRD /// offset field. Modified: head/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td ============================================================================== --- head/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td Fri Feb 16 20:45:32 2018 (r329410) @@ -407,11 +407,11 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b3 defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" -defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>; -defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>; +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_i16_f32>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_u16_f32>; defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>; -defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>>; -defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>>; +defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_u16_u32>; +defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_i16_i32>; } // End SubtargetPredicate = isGCN Modified: head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp ============================================================================== --- head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -396,10 +396,14 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst & // rip-relative addressing is actually relative to the *next* instruction. // Since an immediate can follow the mod/rm byte for an instruction, this - // means that we need to bias the immediate field of the instruction with - // the size of the immediate field. If we have this case, add it into the + // means that we need to bias the displacement field of the instruction with + // the size of the immediate field. If we have this case, add it into the // expression to emit. - int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; + // Note: rip-relative addressing using immediate displacement values should + // not be adjusted, assuming it was the user's intent. + int ImmSize = !Disp.isImm() && X86II::hasImm(TSFlags) + ? X86II::getSizeOfImm(TSFlags) + : 0; EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); Modified: head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp ============================================================================== --- head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -370,6 +370,8 @@ static void printIntelMemReference(X86AsmPrinter &P, c static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { unsigned Reg = MO.getReg(); + bool EmitPercent = true; + switch (Mode) { default: return true; // Unknown mode. case 'b': // Print QImode register @@ -384,6 +386,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const case 'k': // Print SImode register Reg = getX86SubSuperRegister(Reg, 32); break; + case 'V': + EmitPercent = false; + LLVM_FALLTHROUGH; case 'q': // Print 64-bit register names if 64-bit integer registers are available. // Otherwise, print 32-bit register names. @@ -391,7 +396,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const break; } - O << '%' << X86ATTInstPrinter::getRegisterName(Reg); + if (EmitPercent) + O << '%'; + + O << X86ATTInstPrinter::getRegisterName(Reg); return false; } @@ -464,6 +472,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr case 'w': // Print HImode register case 'k': // Print SImode register case 'q': // Print DImode register + case 'V': // Print native register without '%' if (MO.isReg()) return printAsmMRegister(*this, MO, ExtraCode[0], O); printOperand(*this, MI, OpNo, O); Modified: head/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp ============================================================================== --- head/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -663,8 +663,10 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR32rr, X86::KXORDrr); createReplacer(X86::XOR64rr, X86::KXORQrr); - createReplacer(X86::TEST32rr, X86::KTESTDrr); - createReplacer(X86::TEST64rr, X86::KTESTQrr); + // TODO: KTEST is not a replacement for TEST due to flag differences. Need + // to prove only Z flag is used. + //createReplacer(X86::TEST32rr, X86::KTESTDrr); + //createReplacer(X86::TEST64rr, X86::KTESTQrr); } if (STI->hasDQI()) { @@ -684,8 +686,10 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::SHR8ri, X86::KSHIFTRBri); createReplacer(X86::SHL8ri, X86::KSHIFTLBri); - createReplacer(X86::TEST8rr, X86::KTESTBrr); - createReplacer(X86::TEST16rr, X86::KTESTWrr); + // TODO: KTEST is not a replacement for TEST due to flag differences. Need + // to prove only Z flag is used. + //createReplacer(X86::TEST8rr, X86::KTESTBrr); + //createReplacer(X86::TEST16rr, X86::KTESTWrr); createReplacer(X86::XOR8rr, X86::KXORBrr); } Modified: head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp ============================================================================== --- head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Fri Feb 16 20:44:50 2018 (r329409) +++ head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Fri Feb 16 20:45:32 2018 (r329410) @@ -17017,24 +17017,6 @@ static bool hasNonFlagsUse(SDValue Op) { return false; } -// Emit KTEST instruction for bit vectors on AVX-512 -static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - if (Op.getOpcode() == ISD::BITCAST) { - auto hasKTEST = [&](MVT VT) { - unsigned SizeInBits = VT.getSizeInBits(); - return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) || - (Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64)); - }; - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType().getSimpleVT(); - if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 && - hasKTEST(Op0VT)) - return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0); - } - return SDValue(); -} - /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, @@ -17079,9 +17061,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsign // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. if (Op.getResNo() != 0 || NeedOF || NeedCF) { - // Emit KTEST for bit vectors - if (auto Node = EmitKTEST(Op, DAG, Subtarget)) - return Node; // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); @@ -17310,10 +17289,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsign } if (Opcode == 0) { - // Emit KTEST for bit vectors - if (auto Node = EmitKTEST(Op, DAG, Subtarget)) - return Node; - // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); @@ -18093,6 +18068,34 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtar return Result; } +// Try to select this as a KTEST+SETCC if possible. +static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // Only support equality comparisons. + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + + // Must be a bitcast from vXi1. + if (Op0.getOpcode() != ISD::BITCAST) + return SDValue(); + + Op0 = Op0.getOperand(0); + MVT VT = Op0.getSimpleValueType(); + if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) && + !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) + return SDValue(); + + X86::CondCode X86CC; + if (isNullConstant(Op1)) { + X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; + } else + return SDValue(); + + SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0); + return getSETCC(X86CC, KTEST, dl, DAG); +} + SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -18115,6 +18118,10 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, Sele return NewSetCC; } + // Try to lower using KTEST. + if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget)) + return NewSetCC; + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if ((isOneConstant(Op1) || isNullConstant(Op1)) && @@ -20525,6 +20532,18 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDV Mask = DAG.getBitcast(MaskVT, Mask); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask); } + case KUNPCK: { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2); + + SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); + SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); + // Arguments should be swapped. + SDValue Res = DAG.getNode(IntrData->Opc0, dl, + MVT::getVectorVT(MVT::i1, VT.getSizeInBits()), + Src2, Src1); + return DAG.getBitcast(VT, Res); + } case MASK_BINOP: { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); @@ -27094,28 +27113,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, unsigned Reg) { + if (Subtarget.useRetpolineExternalThunk()) { + // When using an external thunk for retpolines, we pick names that match the + // names GCC happens to use as well. This helps simplify the implementation + // of the thunks for kernels where they have no easy ability to create + // aliases and are doing non-trivial configuration of the thunk's body. For + // example, the Linux kernel will do boot-time hot patching of the thunk + // bodies and cannot easily export aliases of these to loaded modules. + // + // Note that at any point in the future, we may need to change the semantics + // of how we implement retpolines and at that time will likely change the + // name of the called thunk. Essentially, there is no hard guarantee that + // LLVM will generate calls to specific thunks, we merely make a best-effort + // attempt to help out kernels and other systems where duplicating the + // thunks is costly. + switch (Reg) { + case X86::EAX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__x86_indirect_thunk_eax"; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802162045.w1GKjWf2020258>