Date: Wed, 24 Aug 2016 17:43:09 +0000 (UTC) From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r304770 - in projects/clang390-import/contrib/llvm: include/llvm/Transforms/Scalar lib/Analysis lib/Target/AArch64 lib/Target/PowerPC lib/Transforms/Scalar lib/Transforms/Utils lib/Tran... Message-ID: <201608241743.u7OHh9uL040304@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: dim Date: Wed Aug 24 17:43:08 2016 New Revision: 304770 URL: https://svnweb.freebsd.org/changeset/base/304770 Log: Update llvm to release_39 branch r279477. Modified: projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Directory Properties: projects/clang390-import/contrib/llvm/ (props changed) Modified: projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h ============================================================================== --- projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h Wed Aug 24 17:43:08 2016 (r304770) @@ -65,7 +65,7 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &); private: - void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT); + void BuildRankMap(Function &F); unsigned getRank(Value *V); void canonicalizeOperands(Instruction *I); void ReassociateExpression(BinaryOperator *I); Modified: projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp ============================================================================== --- projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp Wed Aug 24 17:43:08 2016 (r304770) @@ -4822,6 +4822,10 @@ bool ScalarEvolution::isSCEVExprNeverPoi // from different loops, so that we know which loop to prove that I is // executed in. for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) { + // I could be an extractvalue from a call to an overflow intrinsic. + // TODO: We can do better here in some cases. + if (!isSCEVable(I->getOperand(OpIndex)->getType())) + return false; const SCEV *Op = getSCEV(I->getOperand(OpIndex)); if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { bool AllOtherOpsLoopInvariant = true; Modified: projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp ============================================================================== --- projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Wed Aug 24 17:43:08 2016 (r304770) @@ -1258,8 +1258,11 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma if (MIIsUnscaled) { // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together: bail and keep looking. - if (MIOffset % MemSize) + if (MIOffset % MemSize) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + MemInsns.push_back(&MI); continue; + } MIOffset /= MemSize; } else { MIOffset *= MemSize; @@ -1424,9 +1427,6 @@ bool AArch64LoadStoreOpt::isMatchingUpda default: break; case AArch64::SUBXri: - // Negate the offset for a SUB instruction. - Offset *= -1; - // FALLTHROUGH case AArch64::ADDXri: // Make sure it's a vanilla immediate operand, not a relocation or // anything else we can't handle. @@ -1444,6 +1444,9 @@ bool AArch64LoadStoreOpt::isMatchingUpda bool IsPairedInsn = isPairedLdSt(MemMI); int UpdateOffset = MI.getOperand(2).getImm(); + if (MI.getOpcode() == AArch64::SUBXri) + UpdateOffset = -UpdateOffset; + // For non-paired load/store instructions, the immediate must fit in a // signed 9-bit integer. if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) @@ -1458,13 +1461,13 @@ bool AArch64LoadStoreOpt::isMatchingUpda break; int ScaledOffset = UpdateOffset / Scale; - if (ScaledOffset > 64 || ScaledOffset < -64) + if (ScaledOffset > 63 || ScaledOffset < -64) break; } // If we have a non-zero Offset, we check that it matches the amount // we're adding to the register. - if (!Offset || Offset == MI.getOperand(2).getImm()) + if (!Offset || Offset == UpdateOffset) return true; break; } Modified: projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp ============================================================================== --- projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp Wed Aug 24 17:43:08 2016 (r304770) @@ -4033,11 +4033,18 @@ PPCTargetLowering::IsEligibleForTailCall if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) return false; - // Functions containing by val parameters are not supported. + // Caller contains any byval parameter is not supported. if (std::any_of(Ins.begin(), Ins.end(), [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); })) return false; + // Callee contains any byval parameter is not supported, too. + // Note: This is a quick work around, because in some cases, e.g. + // caller's stack size > callee's stack size, we are still able to apply + // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 + if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) + return false; + // No TCO/SCO on indirect call because Caller have to restore its TOC if (!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) Modified: projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp ============================================================================== --- projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp Wed Aug 24 17:43:08 2016 (r304770) @@ -145,8 +145,7 @@ static BinaryOperator *isReassociableOp( return nullptr; } -void ReassociatePass::BuildRankMap( - Function &F, ReversePostOrderTraversal<Function *> &RPOT) { +void ReassociatePass::BuildRankMap(Function &F) { unsigned i = 2; // Assign distinct ranks to function arguments. @@ -155,6 +154,7 @@ void ReassociatePass::BuildRankMap( DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n"); } + ReversePostOrderTraversal<Function *> RPOT(&F); for (BasicBlock *BB : RPOT) { unsigned BBRank = RankMap[BB] = ++i << 16; @@ -2172,28 +2172,13 @@ void ReassociatePass::ReassociateExpress } PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { - // Reassociate needs for each instruction to have its operands already - // processed, so we first perform a RPOT of the basic blocks so that - // when we process a basic block, all its dominators have been processed - // before. - ReversePostOrderTraversal<Function *> RPOT(&F); - BuildRankMap(F, RPOT); + // Calculate the rank map for F. + BuildRankMap(F); MadeChange = false; - for (BasicBlock *BI : RPOT) { - // Use a worklist to keep track of which instructions have been processed - // (and which insts won't be optimized again) so when redoing insts, - // optimize insts rightaway which won't be processed later. - SmallSet<Instruction *, 8> Worklist; - - // Insert all instructions in the BB - for (Instruction &I : *BI) - Worklist.insert(&I); - + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Optimize every instruction in the basic block. - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) { - // This instruction has been processed. - Worklist.erase(&*II); + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) if (isInstructionTriviallyDead(&*II)) { EraseInst(&*II++); } else { @@ -2202,22 +2187,27 @@ PreservedAnalyses ReassociatePass::run(F ++II; } - // If the above optimizations produced new instructions to optimize or - // made modifications which need to be redone, do them now if they won't - // be handled later. - while (!RedoInsts.empty()) { - Instruction *I = RedoInsts.pop_back_val(); - // Process instructions that won't be processed later, either - // inside the block itself or in another basic block (based on rank), - // since these will be processed later. - if ((I->getParent() != BI || !Worklist.count(I)) && - RankMap[I->getParent()] <= RankMap[BI]) { - if (isInstructionTriviallyDead(I)) - EraseInst(I); - else - OptimizeInst(I); - } - } + // Make a copy of all the instructions to be redone so we can remove dead + // instructions. + SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts); + // Iterate over all instructions to be reevaluated and remove trivially dead + // instructions. If any operand of the trivially dead instruction becomes + // dead mark it for deletion as well. Continue this process until all + // trivially dead instructions have been removed. + while (!ToRedo.empty()) { + Instruction *I = ToRedo.pop_back_val(); + if (isInstructionTriviallyDead(I)) + RecursivelyEraseDeadInsts(I, ToRedo); + } + + // Now that we have removed dead instructions, we can reoptimize the + // remaining instructions. + while (!RedoInsts.empty()) { + Instruction *I = RedoInsts.pop_back_val(); + if (isInstructionTriviallyDead(I)) + EraseInst(I); + else + OptimizeInst(I); } } Modified: projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp ============================================================================== --- projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp Wed Aug 24 17:43:08 2016 (r304770) @@ -566,6 +566,12 @@ void llvm::CloneAndPruneIntoFromInst(Fun if (!I) continue; + // Skip over non-intrinsic callsites, we don't want to remove any nodes from + // the CGSCC. + CallSite CS = CallSite(I); + if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic()) + continue; + // See if this instruction simplifies. Value *SimpleV = SimplifyInstruction(I, DL); if (!SimpleV) Modified: projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ============================================================================== --- projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Aug 24 17:39:40 2016 (r304769) +++ projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Aug 24 17:43:08 2016 (r304770) @@ -82,8 +82,13 @@ static cl::opt<int> MinVectorRegSizeOpti "slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")); -// FIXME: Set this via cl::opt to allow overriding. -static const unsigned RecursionMaxDepth = 12; +static cl::opt<unsigned> RecursionMaxDepth( + "slp-recursion-max-depth", cl::init(12), cl::Hidden, + cl::desc("Limit the recursion depth when building a vectorizable tree")); + +static cl::opt<unsigned> MinTreeSize( + "slp-min-tree-size", cl::init(3), cl::Hidden, + cl::desc("Only vectorize small trees if they are fully vectorizable")); // Limit the number of alias checks. The limit is chosen so that // it has no negative effect on the llvm benchmarks. @@ -1842,7 +1847,7 @@ int BoUpSLP::getTreeCost() { VectorizableTree.size() << ".\n"); // We only vectorize tiny trees if it is fully vectorizable. - if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) { + if (VectorizableTree.size() < MinTreeSize && !isFullyVectorizableTinyTree()) { if (VectorizableTree.empty()) { assert(!ExternalUses.size() && "We should not have any external users"); } @@ -2124,11 +2129,61 @@ void BoUpSLP::reorderInputsAccordingToOp } void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) { - Instruction *VL0 = cast<Instruction>(VL[0]); - BasicBlock::iterator NextInst(VL0); - ++NextInst; - Builder.SetInsertPoint(VL0->getParent(), NextInst); - Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + + // Get the basic block this bundle is in. All instructions in the bundle + // should be in this block. + auto *Front = cast<Instruction>(VL.front()); + auto *BB = Front->getParent(); + assert(all_of(make_range(VL.begin(), VL.end()), [&](Value *V) -> bool { + return cast<Instruction>(V)->getParent() == BB; + })); + + // The last instruction in the bundle in program order. + Instruction *LastInst = nullptr; + + // Find the last instruction. The common case should be that BB has been + // scheduled, and the last instruction is VL.back(). So we start with + // VL.back() and iterate over schedule data until we reach the end of the + // bundle. The end of the bundle is marked by null ScheduleData. + if (BlocksSchedules.count(BB)) { + auto *Bundle = BlocksSchedules[BB]->getScheduleData(VL.back()); + if (Bundle && Bundle->isPartOfBundle()) + for (; Bundle; Bundle = Bundle->NextInBundle) + LastInst = Bundle->Inst; + } + + // LastInst can still be null at this point if there's either not an entry + // for BB in BlocksSchedules or there's no ScheduleData available for + // VL.back(). This can be the case if buildTree_rec aborts for various + // reasons (e.g., the maximum recursion depth is reached, the maximum region + // size is reached, etc.). ScheduleData is initialized in the scheduling + // "dry-run". + // + // If this happens, we can still find the last instruction by brute force. We + // iterate forwards from Front (inclusive) until we either see all + // instructions in the bundle or reach the end of the block. If Front is the + // last instruction in program order, LastInst will be set to Front, and we + // will visit all the remaining instructions in the block. + // + // One of the reasons we exit early from buildTree_rec is to place an upper + // bound on compile-time. Thus, taking an additional compile-time hit here is + // not ideal. However, this should be exceedingly rare since it requires that + // we both exit early from buildTree_rec and that the bundle be out-of-order + // (causing us to iterate all the way to the end of the block). + if (!LastInst) { + SmallPtrSet<Value *, 16> Bundle(VL.begin(), VL.end()); + for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) { + if (Bundle.erase(&I)) + LastInst = &I; + if (Bundle.empty()) + break; + } + } + + // Set the insertion point after the last instruction in the bundle. Set the + // debug location to Front. + Builder.SetInsertPoint(BB, next(BasicBlock::iterator(LastInst))); + Builder.SetCurrentDebugLocation(Front->getDebugLoc()); } Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) { @@ -2206,7 +2261,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry if (E->NeedToGather) { setInsertPointAfterBundle(E->Scalars); - return Gather(E->Scalars, VecTy); + auto *V = Gather(E->Scalars, VecTy); + E->VectorizedValue = V; + return V; } unsigned Opcode = getSameOpcode(E->Scalars); @@ -2253,7 +2310,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry E->VectorizedValue = V; return V; } - return Gather(E->Scalars, VecTy); + setInsertPointAfterBundle(E->Scalars); + auto *V = Gather(E->Scalars, VecTy); + E->VectorizedValue = V; + return V; } case Instruction::ExtractValue: { if (canReuseExtract(E->Scalars, Instruction::ExtractValue)) { @@ -2265,7 +2325,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry E->VectorizedValue = V; return propagateMetadata(V, E->Scalars); } - return Gather(E->Scalars, VecTy); + setInsertPointAfterBundle(E->Scalars); + auto *V = Gather(E->Scalars, VecTy); + E->VectorizedValue = V; + return V; } case Instruction::ZExt: case Instruction::SExt:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201608241743.u7OHh9uL040304>