Date: Tue, 22 Jan 2019 20:13:43 +0000 (UTC) From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r343313 - in projects/clang800-import/contrib/llvm: include/llvm/CodeGen include/llvm/IR lib/CodeGen/AsmPrinter lib/CodeGen/SelectionDAG lib/MC lib/Target/AArch64 lib/Target/AMDGPU lib/... Message-ID: <201901222013.x0MKDh2a032066@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: dim Date: Tue Jan 22 20:13:43 2019 New Revision: 343313 URL: https://svnweb.freebsd.org/changeset/base/343313 Log: Merge llvm release_80 branch r351543, and resolve conflicts. Modified: projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Directory Properties: projects/clang800-import/contrib/llvm/ (props changed) Modified: projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h ============================================================================== --- projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h Tue Jan 22 20:13:43 2019 (r343313) @@ -329,6 +329,7 @@ class MachineFunction { bool CallsUnwindInit = false; bool HasEHScopes = false; bool HasEHFunclets = false; + bool HasLocalEscape = false; /// List of C++ TypeInfo used. std::vector<const GlobalValue *> TypeInfos; @@ -810,6 +811,9 @@ class MachineFunction { bool hasEHFunclets() const { return HasEHFunclets; } void setHasEHFunclets(bool V) { HasEHFunclets = V; } + + bool hasLocalEscape() const { return HasLocalEscape; } + void setHasLocalEscape(bool V) { HasLocalEscape = V; } /// Find or create an LandingPadInfo for the specified MachineBasicBlock. LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad); Modified: projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td ============================================================================== --- projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td Tue Jan 22 20:13:43 2019 (r343313) @@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin<string clang_builtin> : [IntrArgMemOnly, NoCapture<0>] >; +class AMDGPUDSOrderedIntrinsic : Intrinsic< + [llvm_i32_ty], + // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that + // the bit packing can be optimized at the IR level. + [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0) + llvm_i32_ty, // value to add or swap + llvm_i32_ty, // ordering + llvm_i32_ty, // scope + llvm_i1_ty, // isVolatile + llvm_i32_ty, // ordered count index (OA index), also added to the address + llvm_i1_ty, // wave release, usually set to 1 + llvm_i1_ty], // wave done, set to 1 for the last ordered instruction + [NoCapture<0>] +>; + +def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic; +def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; + def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">; def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">; def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">; Modified: projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -545,15 +545,17 @@ void WinException::emitCSpecificHandlerTable(const Mac OS.AddComment(Comment); }; - // Emit a label assignment with the SEH frame offset so we can use it for - // llvm.eh.recoverfp. - StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); - MCSymbol *ParentFrameOffset = - Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); - const MCExpr *MCOffset = - MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + if (!isAArch64) { + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.eh.recoverfp. + StringRef FLinkageName = + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + } // Use the assembler to compute the number of table entries through label // difference and division. @@ -937,6 +939,9 @@ void WinException::emitEHRegistrationOffsetLabel(const if (FI != INT_MAX) { const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); unsigned UnusedReg; + // FIXME: getFrameIndexReference needs to match the behavior of + // AArch64RegisterInfo::hasBasePointer in which one of the scenarios where + // SP is used is if frame size >= 256. Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg); } Modified: projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -6182,6 +6182,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst .addFrameIndex(FI); } + MF.setHasLocalEscape(true); + return nullptr; } Modified: projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -453,6 +453,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, } } +// Returns the epilog symbol of an epilog with the exact same unwind code +// sequence, if it exists. Otherwise, returns nulltpr. +// EpilogInstrs - Unwind codes for the current epilog. +// Epilogs - Epilogs that potentialy match the current epilog. +static MCSymbol* +FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs, + const std::vector<MCSymbol *>& Epilogs, + const WinEH::FrameInfo *info) { + for (auto *EpilogStart : Epilogs) { + auto InstrsIter = info->EpilogMap.find(EpilogStart); + assert(InstrsIter != info->EpilogMap.end() && + "Epilog not found in EpilogMap"); + const auto &Instrs = InstrsIter->second; + + if (Instrs.size() != EpilogInstrs.size()) + continue; + + bool Match = true; + for (unsigned i = 0; i < Instrs.size(); ++i) + if (Instrs[i].Operation != EpilogInstrs[i].Operation || + Instrs[i].Offset != EpilogInstrs[i].Offset || + Instrs[i].Register != EpilogInstrs[i].Register) { + Match = false; + break; + } + + if (Match) + return EpilogStart; + } + return nullptr; +} + // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { @@ -477,12 +509,28 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, // Process epilogs. MapVector<MCSymbol *, uint32_t> EpilogInfo; + // Epilogs processed so far. + std::vector<MCSymbol *> AddedEpilogs; + for (auto &I : info->EpilogMap) { MCSymbol *EpilogStart = I.first; auto &EpilogInstrs = I.second; uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs); - EpilogInfo[EpilogStart] = TotalCodeBytes; - TotalCodeBytes += CodeBytes; + + MCSymbol* MatchingEpilog = + FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info); + if (MatchingEpilog) { + assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && + "Duplicate epilog not found"); + EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog]; + // Clear the unwind codes in the EpilogMap, so that they don't get output + // in the logic below. + EpilogInstrs.clear(); + } else { + EpilogInfo[EpilogStart] = TotalCodeBytes; + TotalCodeBytes += CodeBytes; + AddedEpilogs.push_back(EpilogStart); + } } // Code Words, Epilog count, E, X, Vers, Function Length Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -694,6 +694,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineI switch (MI->getOpcode()) { default: break; + case AArch64::MOVMCSym: { + unsigned DestReg = MI->getOperand(0).getReg(); + const MachineOperand &MO_Sym = MI->getOperand(1); + MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym); + MCOperand Hi_MCSym, Lo_MCSym; + + Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S); + Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC); + + MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym); + MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym); + + MCInst MovZ; + MovZ.setOpcode(AArch64::MOVZXi); + MovZ.addOperand(MCOperand::createReg(DestReg)); + MovZ.addOperand(Hi_MCSym); + MovZ.addOperand(MCOperand::createImm(16)); + EmitToStreamer(*OutStreamer, MovZ); + + MCInst MovK; + MovK.setOpcode(AArch64::MOVKXi); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(Lo_MCSym); + MovK.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, MovK); + return; + } case AArch64::MOVIv2d_ns: // If the target has <rdar://problem/16473581>, lower this // instruction to movi.16b instead. Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -228,6 +228,10 @@ bool AArch64FrameLowering::hasFP(const MachineFunction MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; + // Win64 SEH requires frame pointer if funclets are present. + if (MF.hasLocalEscape()) + return true; + return false; } Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -2743,6 +2743,34 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN case Intrinsic::aarch64_neon_umin: return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::localaddress: { + // Returns one of the stack, base, or frame pointer registers, depending on + // which is used to reference local variables. + MachineFunction &MF = DAG.getMachineFunction(); + const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned Reg; + if (RegInfo->hasBasePointer(MF)) + Reg = RegInfo->getBaseRegister(); + else // This function handles the SP or FP case. + Reg = RegInfo->getFrameRegister(MF); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, + Op.getSimpleValueType()); + } + + case Intrinsic::eh_recoverfp: { + // FIXME: This needs to be implemented to correctly handle highly aligned + // stack objects. For now we simply return the incoming FP. Refer D53541 + // for more details. + SDValue FnOp = Op.getOperand(1); + SDValue IncomingFPOp = Op.getOperand(2); + GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); + auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); + if (!Fn) + report_fatal_error( + "llvm.eh.recoverfp must take a function as the first argument"); + return IncomingFPOp; + } } } Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td Tue Jan 22 20:13:43 2019 (r343313) @@ -133,7 +133,11 @@ def UseNegativeImmediates : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates", "NegativeImmediates">; +def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", + SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisInt<1>]>>; + //===----------------------------------------------------------------------===// // AArch64-specific DAG Nodes. // @@ -6800,6 +6804,9 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$F (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; + +def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; +def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -466,6 +466,13 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineB // Modify MI as necessary to handle as much of 'Offset' as possible Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg); + + if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) { + MachineOperand &FI = MI.getOperand(FIOperandNum); + FI.ChangeToImmediate(Offset); + return; + } + if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h Tue Jan 22 20:13:43 2019 (r343313) @@ -254,7 +254,7 @@ namespace AMDGPUAS { FLAT_ADDRESS = 0, ///< Address space for flat memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - REGION_ADDRESS = 2, ///< Address space for region memory. + REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2) LOCAL_ADDRESS = 3, ///< Address space for local memory. Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(un NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) + NODE_NAME_CASE(DS_ORDERED_COUNT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Jan 22 20:13:43 2019 (r343313) @@ -474,6 +474,7 @@ enum NodeType : unsigned { TBUFFER_STORE_FORMAT_D16, TBUFFER_LOAD_FORMAT, TBUFFER_LOAD_FORMAT_D16, + DS_ORDERED_COUNT, ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td Tue Jan 22 20:13:43 2019 (r343313) @@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor> def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>; def : SourceOfDivergence<int_amdgcn_ps_live>; def : SourceOfDivergence<int_amdgcn_ds_swizzle>; +def : SourceOfDivergence<int_amdgcn_ds_ordered_add>; +def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>; foreach intr = AMDGPUImageDimAtomicIntrinsics in def : SourceOfDivergence<intr>; Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Ins switch (Inst->getIntrinsicID()) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td Tue Jan 22 20:13:43 2019 (r343313) @@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_ defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">; +def : Pat < + (SIds_ordered_count i32:$value, i16:$offset), + (DS_ORDERED_COUNT $value, (as_i16imm $offset)) +>; + //===----------------------------------------------------------------------===// // Real instructions //===----------------------------------------------------------------------===// Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) { } } -static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) { +static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, + const MachineInstr &MI) { + if (TII.isAlwaysGDS(MI.getOpcode())) + return true; + switch (MI.getOpcode()) { case AMDGPU::S_SENDMSG: case AMDGPU::S_SENDMSGHALT: case AMDGPU::S_TTRACEDATA: return true; + // These DS opcodes don't support GDS. + case AMDGPU::DS_NOP: + case AMDGPU::DS_PERMUTE_B32: + case AMDGPU::DS_BPERMUTE_B32: + return false; default: - // TODO: GDS + if (TII.isDS(MI.getOpcode())) { + int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::gds); + if (MI.getOperand(GDS).getImm()) + return true; + } return false; } } @@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stal checkReadM0Hazards(MI) > 0) return NoopHazard; - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) && + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && checkReadM0Hazards(MI) > 0) return NoopHazard; @@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInst isSMovRel(MI->getOpcode()))) return std::max(WaitStates, checkReadM0Hazards(MI)); - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI)) + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) return std::max(WaitStates, checkReadM0Hazards(MI)); return WaitStates; Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInf switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { @@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicI switch (II->getIntrinsicID()) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { @@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDVal SDLoc DL(Op); switch (IntrID) { + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: { + MemSDNode *M = cast<MemSDNode>(Op); + SDValue Chain = M->getOperand(0); + SDValue M0 = M->getOperand(2); + SDValue Value = M->getOperand(3); + unsigned OrderedCountIndex = M->getConstantOperandVal(7); + unsigned WaveRelease = M->getConstantOperandVal(8); + unsigned WaveDone = M->getConstantOperandVal(9); + unsigned ShaderType; + unsigned Instruction; + + switch (IntrID) { + case Intrinsic::amdgcn_ds_ordered_add: + Instruction = 0; + break; + case Intrinsic::amdgcn_ds_ordered_swap: + Instruction = 1; + break; + } + + if (WaveDone && !WaveRelease) + report_fatal_error("ds_ordered_count: wave_done requires wave_release"); + + switch (DAG.getMachineFunction().getFunction().getCallingConv()) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + ShaderType = 0; + break; + case CallingConv::AMDGPU_PS: + ShaderType = 1; + break; + case CallingConv::AMDGPU_VS: + ShaderType = 2; + break; + case CallingConv::AMDGPU_GS: + ShaderType = 3; + break; + default: + report_fatal_error("ds_ordered_count unsupported for this calling conv"); + } + + unsigned Offset0 = OrderedCountIndex << 2; + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | + (Instruction << 4); + unsigned Offset = Offset0 | (Offset1 << 8); + + SDValue Ops[] = { + Chain, + Value, + DAG.getTargetConstant(Offset, DL, MVT::i16), + copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue + }; + return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL, + M->getVTList(), Ops, M->getMemoryVT(), + M->getMemOperand()); + } case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: case Intrinsic::amdgcn_ds_fadd: Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -536,11 +536,14 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo CurrScore); } if (Inst.mayStore()) { - setExpScore( - &Inst, TII, TRI, MRI, - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), - CurrScore); if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::data0) != -1) { + setExpScore( + &Inst, TII, TRI, MRI, + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), + CurrScore); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data1) != -1) { setExpScore(&Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), @@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(Machine // bracket and the destination operand scores. // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere. if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) { - if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { + if (TII->isAlwaysGDS(Inst.getOpcode()) || + TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst); ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst); } else { Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineIn changesVGPRIndexingMode(MI); } +bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { + return Opcode == AMDGPU::DS_ORDERED_COUNT || + Opcode == AMDGPU::DS_GWS_INIT || + Opcode == AMDGPU::DS_GWS_SEMA_V || + Opcode == AMDGPU::DS_GWS_SEMA_BR || + Opcode == AMDGPU::DS_GWS_SEMA_P || + Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || + Opcode == AMDGPU::DS_GWS_BARRIER; +} + bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); @@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(cons // EXEC = 0, but checking for that case here seems not worth it // given the typical code patterns. if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || - Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE) + Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE || + Opcode == AMDGPU::DS_ORDERED_COUNT) return true; if (MI.isInlineAsm()) Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h Tue Jan 22 20:13:43 2019 (r343313) @@ -450,6 +450,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::DS; } + bool isAlwaysGDS(uint16_t Opcode) const; + static bool isMIMG(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::MIMG; } Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td Tue Jan 22 20:13:43 2019 (r343313) @@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", [SDNPMayLoad, SDNPMemOperand] >; +def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, + [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] +>; + def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; Modified: projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -17,6 +17,7 @@ #include "MSP430InstrInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -28,6 +29,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" @@ -44,6 +46,8 @@ namespace { StringRef getPassName() const override { return "MSP430 Assembly Printer"; } + bool runOnMachineFunction(MachineFunction &MF) override; + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char* Modifier = nullptr); void printSrcMemOperand(const MachineInstr *MI, int OpNum, @@ -55,6 +59,8 @@ namespace { unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; void EmitInstruction(const MachineInstr *MI) override; + + void EmitInterruptVectorSection(MachineFunction &ISR); }; } // end of anonymous namespace @@ -151,6 +157,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineIn MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); +} + +void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { + MCSection *Cur = OutStreamer->getCurrentSectionOnly(); + const auto *F = &ISR.getFunction(); + assert(F->hasFnAttribute("interrupt") && + "Functions with MSP430_INTR CC should have 'interrupt' attribute"); + StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString(); + MCSection *IV = OutStreamer->getContext().getELFSection( + "__interrupt_vector_" + IVIdx, + ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); + OutStreamer->SwitchSection(IV); + + const MCSymbol *FunctionSymbol = getSymbol(F); + OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + OutStreamer->SwitchSection(Cur); +} + +bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + // Emit separate section for an interrupt vector if ISR + if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR) + EmitInterruptVectorSection(MF); + + SetupMachineFunction(MF); + EmitFunctionBody(); + return false; } // Force static initialization. Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -27202,6 +27202,8 @@ const char *X86TargetLowering::getTargetNodeName(unsig case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; + case X86ISD::VSHLV: return "X86ISD::VSHLV"; + case X86ISD::VSRLV: return "X86ISD::VSRLV"; case X86ISD::VSRAV: return "X86ISD::VSRAV"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h Tue Jan 22 20:13:43 2019 (r343313) @@ -315,10 +315,8 @@ namespace llvm { // Vector shift elements VSHL, VSRL, VSRA, - // Vector variable shift right arithmetic. - // Unlike ISD::SRA, in case shift count greater then element size - // use sign bit to fill destination data element. - VSRAV, + // Vector variable shift + VSHLV, VSRLV, VSRAV, // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td Tue Jan 22 20:13:43 2019 (r343313) @@ -6445,52 +6445,53 @@ defm : avx512_var_shift_lowering<avx512vl_i16_info, "V defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>; // Special handing for handling VPSRAV intrinsics. -multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _, - list<Predicate> p> { +multiclass avx512_var_shift_int_lowering<string InstrStr, SDNode OpNode, + X86VectorVTInfo _, list<Predicate> p> { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), + def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)), (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))), + def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), (!cast<Instruction>(InstrStr#_.ZSuffix##rm) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), + (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)), (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.RC:$src0)), (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), + (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.ImmAllZerosV)), (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, _.RC:$src1, addr:$src2)>; } } -multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _, - list<Predicate> p> : - avx512_var_shift_int_lowering<InstrStr, _, p> { +multiclass avx512_var_shift_int_lowering_mb<string InstrStr, SDNode OpNode, + X86VectorVTInfo _, + list<Predicate> p> : + avx512_var_shift_int_lowering<InstrStr, OpNode, _, p> { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, + def : Pat<(_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), (!cast<Instruction>(InstrStr#_.ZSuffix##rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.RC:$src0)), (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)), (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, @@ -6498,15 +6499,47 @@ multiclass avx512_var_shift_int_lowering_mb<string Ins } } -defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; +multiclass avx512_var_shift_int_lowering_vl<string InstrStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, + Predicate p> { + defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info512, [p]>; + defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info256, + [HasVLX, p]>; + defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info128, + [HasVLX, p]>; +} + +multiclass avx512_var_shift_int_lowering_mb_vl<string InstrStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, + Predicate p> { + defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info512, [p]>; + defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info256, + [HasVLX, p]>; + defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info128, + [HasVLX, p]>; +} + +defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info, + HasBWI>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav, + avx512vl_i32_info, HasAVX512>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav, + avx512vl_i64_info, HasAVX512>; + +defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info, + HasBWI>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv, + avx512vl_i32_info, HasAVX512>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv, + avx512vl_i64_info, HasAVX512>; + +defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info, + HasBWI>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv, + avx512vl_i32_info, HasAVX512>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv, + avx512vl_i64_info, HasAVX512>; + // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Jan 22 20:13:43 2019 (r343313) @@ -198,6 +198,8 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftunifo def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<0>]>; +def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>; +def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>; def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>; def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>; Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td Tue Jan 22 20:13:43 2019 (r343313) @@ -8318,7 +8318,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src)) // Variable Bit Shifts // multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType vt128, ValueType vt256> { + SDNode IntrinNode, ValueType vt128, ValueType vt256> { def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -8347,23 +8347,23 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeSt (vt256 (load addr:$src2)))))]>, VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, SchedWriteVarVecShift.YMM.ReadAfterFold]>; + + def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)), + (!cast<Instruction>(NAME#"rr") VR128:$src1, VR128:$src2)>; + def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))), + (!cast<Instruction>(NAME#"rm") VR128:$src1, addr:$src2)>; + def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)), + (!cast<Instruction>(NAME#"Yrr") VR256:$src1, VR256:$src2)>; + def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))), + (!cast<Instruction>(NAME#"Yrm") VR256:$src1, addr:$src2)>; } let Predicates = [HasAVX2, NoVLX] in { - defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; - defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; - defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; - defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; - defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; - - def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)), - (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))), - (VPSRAVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)), - (VPSRAVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))), - (VPSRAVDYrm VR256:$src1, addr:$src2)>; + defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>; + defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W; + defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>; + defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W; + defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>; } //===----------------------------------------------------------------------===// Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h Tue Jan 22 20:13:43 2019 (r343313) @@ -389,10 +389,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0), @@ -405,10 +405,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -943,11 +943,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0), @@ -971,11 +971,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), Modified: projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -3065,9 +3065,11 @@ static bool TryToSinkInstruction(Instruction *I, Basic I->isTerminator()) return false; - // Do not sink alloca instructions out of the entry block. - if (isa<AllocaInst>(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) + // Do not sink static or dynamic alloca instructions. Static allocas must + // remain in the entry block, and dynamic allocas must not be sunk in between + // a stacksave / stackrestore pair, which would incorrectly shorten its + // lifetime. + if (isa<AllocaInst>(I)) return false; // Do not sink into catchswitch blocks. Modified: projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -3031,7 +3031,10 @@ class llvm::sroa::AllocaSliceRewriter (private) ConstantInt *Size = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), NewEndOffset - NewBeginOffset); - Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); + // Lifetime intrinsics always expect an i8* so directly get such a pointer + // for the new alloca slice. + Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace()); + Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); Value *New; if (II.getIntrinsicID() == Intrinsic::lifetime_start) New = IRB.CreateLifetimeStart(Ptr, Size); Modified: projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ============================================================================== --- projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Tue Jan 22 20:08:25 2019 (r343312) +++ projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Tue Jan 22 20:13:43 2019 (r343313) @@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsi // If any of the scalars is marked as a value that needs to stay scalar, then // we need to gather the scalars. + // The reduction nodes (stored in UserIgnoreList) also should stay scalar. for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (MustGather.count(VL[i])) { + if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); newTreeEntry(VL, false, UserTreeIdx); return;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201901222013.x0MKDh2a032066>