From owner-svn-src-all@freebsd.org Sun Feb 5 19:37:34 2017 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 4012DCD21CE; Sun, 5 Feb 2017 19:37:34 +0000 (UTC) (envelope-from dim@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id E570E168C; Sun, 5 Feb 2017 19:37:33 +0000 (UTC) (envelope-from dim@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v15JbXs1038737; Sun, 5 Feb 2017 19:37:33 GMT (envelope-from dim@FreeBSD.org) Received: (from dim@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v15JbVg8038724; Sun, 5 Feb 2017 19:37:31 GMT (envelope-from dim@FreeBSD.org) Message-Id: <201702051937.v15JbVg8038724@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: dim set sender to dim@FreeBSD.org using -f From: Dimitry Andric Date: Sun, 5 Feb 2017 19:37:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r313289 - in vendor/llvm/dist: lib/CodeGen/SelectionDAG lib/Transforms/InstCombine lib/Transforms/Scalar test/CodeGen/AMDGPU test/CodeGen/NVPTX test/CodeGen/PowerPC test/Object/Inputs t... X-SVN-Group: vendor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 05 Feb 2017 19:37:34 -0000 Author: dim Date: Sun Feb 5 19:37:31 2017 New Revision: 313289 URL: https://svnweb.freebsd.org/changeset/base/313289 Log: Vendor import of llvm release_40 branch r294123: https://llvm.org/svn/llvm-project/llvm/branches/release_40@294123 Added: vendor/llvm/dist/test/Object/Inputs/phdr-note.elf-x86-64 (contents, props changed) vendor/llvm/dist/test/Object/Inputs/phdrs.elf-x86-64 (contents, props changed) vendor/llvm/dist/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll vendor/llvm/dist/test/tools/llvm-objdump/X86/openbsd-headers.test vendor/llvm/dist/test/tools/llvm-objdump/X86/phdrs.test Modified: vendor/llvm/dist/lib/CodeGen/SelectionDAG/DAGCombiner.cpp vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineCompares.cpp vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp vendor/llvm/dist/test/CodeGen/AMDGPU/fma-combine.ll vendor/llvm/dist/test/CodeGen/AMDGPU/mad-combine.ll vendor/llvm/dist/test/CodeGen/NVPTX/fma-assoc.ll vendor/llvm/dist/test/CodeGen/PowerPC/fma-assoc.ll vendor/llvm/dist/test/Transforms/InstCombine/minmax-fold.ll vendor/llvm/dist/tools/llvm-objdump/ELFDump.cpp vendor/llvm/dist/utils/release/test-release.sh Modified: vendor/llvm/dist/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ============================================================================== --- vendor/llvm/dist/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Feb 5 19:37:31 2017 (r313289) @@ -8123,9 +8123,12 @@ SDValue DAGCombiner::visitFADDForFMAComb } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8137,7 +8140,10 @@ SDValue DAGCombiner::visitFADDForFMAComb } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1->getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL && N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8367,10 +8373,13 @@ SDValue DAGCombiner::visitFSUBForFMAComb } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8384,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMAComb // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); Modified: vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineCompares.cpp ============================================================================== --- vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineCompares.cpp Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineCompares.cpp Sun Feb 5 19:37:31 2017 (r313289) @@ -4039,11 +4039,6 @@ Instruction *InstCombiner::foldICmpUsing Constant *CMinus1 = ConstantInt::get(Op0->getType(), *CmpC - 1); return new ICmpInst(ICmpInst::ICMP_EQ, Op0, CMinus1); } - // (x (x >s -1) -> true if sign bit clear - if (CmpC->isMinSignedValue()) { - Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); - } } break; } @@ -4063,11 +4058,6 @@ Instruction *InstCombiner::foldICmpUsing if (*CmpC == Op0Max - 1) return new ICmpInst(ICmpInst::ICMP_EQ, Op0, ConstantInt::get(Op1->getType(), *CmpC + 1)); - - // (x >u 2147483647) -> (x true if sign bit set - if (CmpC->isMaxSignedValue()) - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Constant::getNullValue(Op0->getType())); } break; } @@ -4299,6 +4289,27 @@ Instruction *InstCombiner::visitICmpInst (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) return nullptr; + // FIXME: We only do this after checking for min/max to prevent infinite + // looping caused by a reverse canonicalization of these patterns for min/max. + // FIXME: The organization of folds is a mess. These would naturally go into + // canonicalizeCmpWithConstant(), but we can't move all of the above folds + // down here after the min/max restriction. + ICmpInst::Predicate Pred = I.getPredicate(); + const APInt *C; + if (match(Op1, m_APInt(C))) { + // For i32: x >u 2147483647 -> x true if sign bit set + if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) { + Constant *Zero = Constant::getNullValue(Op0->getType()); + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero); + } + + // For i32: x x >s -1 -> true if sign bit clear + if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) { + Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); + } + } + if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; Modified: vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp ============================================================================== --- vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp Sun Feb 5 19:37:31 2017 (r313289) @@ -158,8 +158,9 @@ struct MemAccessTy { bool operator!=(MemAccessTy Other) const { return !(*this == Other); } - static MemAccessTy getUnknown(LLVMContext &Ctx) { - return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace); + static MemAccessTy getUnknown(LLVMContext &Ctx, + unsigned AS = UnknownAddressSpace) { + return MemAccessTy(Type::getVoidTy(Ctx), AS); } }; @@ -2279,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSR // TODO: Be less conservative when the type is similar and can use the same // addressing modes. if (Kind == LSRUse::Address) { - if (AccessTy != LU.AccessTy) - NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext()); + if (AccessTy.MemTy != LU.AccessTy.MemTy) { + NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), + AccessTy.AddrSpace); + } } // Conservatively assume HasBaseReg is true for now. Modified: vendor/llvm/dist/test/CodeGen/AMDGPU/fma-combine.ll ============================================================================== --- vendor/llvm/dist/test/CodeGen/AMDGPU/fma-combine.ll Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/test/CodeGen/AMDGPU/fma-combine.ll Sun Feb 5 19:37:31 2017 (r313289) @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s ; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be ; beneficial even without fp32 denormals, but they do require no-infs-fp-math @@ -308,8 +308,14 @@ define void @combine_to_fma_fsub_2_f64_2 ; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} ; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} -; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]] -; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] + +; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]] +; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[TMP0]] +; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP1]], -[[Z]] + +; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]] +; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] + ; SI: buffer_store_dwordx2 [[RESULT]] define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -343,8 +349,14 @@ define void @aggressive_combine_to_fma_f ; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} ; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} -; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]] -; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] + +; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]] +; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[Y]], [[Z]], [[TMP0]] +; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[TMP1]] + +; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]] +; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] + ; SI: buffer_store_dwordx2 [[RESULT]] define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 Modified: vendor/llvm/dist/test/CodeGen/AMDGPU/mad-combine.ll ============================================================================== --- vendor/llvm/dist/test/CodeGen/AMDGPU/mad-combine.ll Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/test/CodeGen/AMDGPU/mad-combine.ll Sun Feb 5 19:37:31 2017 (r313289) @@ -1,12 +1,12 @@ ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma. -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s ; Make sure we don't form mad with denormals -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 @@ -21,7 +21,7 @@ declare float @llvm.fmuladd.f32(float, f ; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] ; SI-DENORM-SLOWFMAF-NOT: v_fma ; SI-DENORM-SLOWFMAF-NOT: v_mad @@ -58,8 +58,8 @@ define void @combine_to_mad_f32_0(float ; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]] ; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]] @@ -100,7 +100,7 @@ define void @combine_to_mad_f32_0_2use(f ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] @@ -131,7 +131,7 @@ define void @combine_to_mad_f32_1(float ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]] @@ -164,8 +164,8 @@ define void @combine_to_mad_fsub_0_f32(f ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]] @@ -203,7 +203,7 @@ define void @combine_to_mad_fsub_0_f32_2 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] @@ -235,8 +235,8 @@ define void @combine_to_mad_fsub_1_f32(f ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]] @@ -275,7 +275,7 @@ define void @combine_to_mad_fsub_1_f32_2 ; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]] ; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]] @@ -309,8 +309,8 @@ define void @combine_to_mad_fsub_2_f32(f ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]] ; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]] @@ -352,8 +352,8 @@ define void @combine_to_mad_fsub_2_f32_2 ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] -; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]] @@ -399,12 +399,9 @@ define void @combine_to_mad_fsub_2_f32_2 ; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] ; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]] -; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]] - -; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] -; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] -; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]] +; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] +; SI-DENORM: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]] ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { @@ -444,12 +441,9 @@ define void @aggressive_combine_to_mad_f ; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] ; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]] -; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]] - -; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] -; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] -; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]] +; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] +; SI-DENORM: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]] ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm @@ -485,19 +479,23 @@ define void @aggressive_combine_to_mad_f ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} -; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]] -; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]] - -; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]] +; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[A]] +; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP0]] + +; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], [[D]], [[E]], -[[C]] +; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[B]], [[A]] + +; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] +; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]] ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]] ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]] ; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]] -; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -532,11 +530,16 @@ define void @aggressive_combine_to_mad_f ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} -; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]] -; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]] - -; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]] -; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]] +; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[C]], [[B]] +; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[A]] + +; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]] +; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]] + +; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] +; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]] ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] ; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]] Modified: vendor/llvm/dist/test/CodeGen/NVPTX/fma-assoc.ll ============================================================================== --- vendor/llvm/dist/test/CodeGen/NVPTX/fma-assoc.ll Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/test/CodeGen/NVPTX/fma-assoc.ll Sun Feb 5 19:37:31 2017 (r313289) @@ -1,9 +1,10 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE define ptx_device float @t1_f32(float %x, float %y, float %z, float %u, float %v) { -; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; ; CHECK: ret; %a = fmul float %x, %y %b = fmul float %u, %v @@ -14,8 +15,8 @@ define ptx_device float @t1_f32(float %x define ptx_device double @t1_f64(double %x, double %y, double %z, double %u, double %v) { -; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; ; CHECK: ret; %a = fmul double %x, %y %b = fmul double %u, %v Modified: vendor/llvm/dist/test/CodeGen/PowerPC/fma-assoc.ll ============================================================================== --- vendor/llvm/dist/test/CodeGen/PowerPC/fma-assoc.ll Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/test/CodeGen/PowerPC/fma-assoc.ll Sun Feb 5 19:37:31 2017 (r313289) @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SAFE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX -check-prefix=CHECK-VSX-SAFE %s +; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -enable-unsafe-fp-math -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK -check-prefix=CHECK-UNSAFE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -enable-unsafe-fp-math -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX -check-prefix=CHECK-UNSAFE-VSX %s define double @test_FMADD_ASSOC1(double %A, double %B, double %C, double %D, double %E) { @@ -8,16 +10,28 @@ define double @test_FMADD_ASSOC1(double %H = fadd double %F, %G ; [#uses=1] %I = fadd double %H, %E ; [#uses=1] ret double %I -; CHECK-LABEL: test_FMADD_ASSOC1: -; CHECK: fmadd -; CHECK-NEXT: fmadd -; CHECK-NEXT: blr - -; CHECK-VSX-LABEL: test_FMADD_ASSOC1: -; CHECK-VSX: xsmaddmdp -; CHECK-VSX-NEXT: xsmaddadp -; CHECK-VSX-NEXT: fmr -; CHECK-VSX-NEXT: blr +; CHECK-SAFE-LABEL: test_FMADD_ASSOC1: +; CHECK-SAFE: fmul +; CHECK-SAFE-NEXT: fmadd +; CHECK-SAFE-NEXT: fadd +; CHECK-SAFE-NEXT: blr + +; CHECK-UNSAFE-LABEL: test_FMADD_ASSOC1: +; CHECK-UNSAFE: fmadd +; CHECK-UNSAFE-NEXT: fmadd +; CHECK-UNSAFE-NEXT: blr + +; CHECK-VSX-SAFE-LABEL: test_FMADD_ASSOC1: +; CHECK-VSX-SAFE: xsmuldp +; CHECK-VSX-SAFE-NEXT: xsmaddadp +; CHECK-VSX-SAFE-NEXT: xsadddp +; CHECK-VSX-SAFE-NEXT: blr + +; CHECK-VSX-UNSAFE-LABEL: test_FMADD_ASSOC1: +; CHECK-VSX-UNSAFE: xsmaddmdp +; CHECK-VSX-UNSAFE-NEXT: xsmaddadp +; CHECK-VSX-UNSAFE-NEXT: fmr +; CHECK-VSX-UNSAFE-NEXT: blr } define double @test_FMADD_ASSOC2(double %A, double %B, double %C, @@ -27,16 +41,28 @@ define double @test_FMADD_ASSOC2(double %H = fadd double %F, %G ; [#uses=1] %I = fadd double %E, %H ; [#uses=1] ret double %I -; CHECK-LABEL: test_FMADD_ASSOC2: -; CHECK: fmadd -; CHECK-NEXT: fmadd -; CHECK-NEXT: blr - -; CHECK-VSX-LABEL: test_FMADD_ASSOC2: -; CHECK-VSX: xsmaddmdp -; CHECK-VSX-NEXT: xsmaddadp -; CHECK-VSX-NEXT: fmr -; CHECK-VSX-NEXT: blr +; CHECK-SAFE-LABEL: test_FMADD_ASSOC2: +; CHECK-SAFE: fmul +; CHECK-SAFE-NEXT: fmadd +; CHECK-SAFE-NEXT: fadd +; CHECK-SAFE-NEXT: blr + +; CHECK-UNSAFE-LABEL: test_FMADD_ASSOC2: +; CHECK-UNSAFE: fmadd +; CHECK-UNSAFE-NEXT: fmadd +; CHECK-UNSAFE-NEXT: blr + +; CHECK-VSX-SAFE-LABEL: test_FMADD_ASSOC2: +; CHECK-VSX-SAFE: xsmuldp +; CHECK-VSX-SAFE-NEXT: xsmaddadp +; CHECK-VSX-SAFE-NEXT: xsadddp +; CHECK-VSX-SAFE-NEXT: blr + +; CHECK-VSX-UNSAFE-LABEL: test_FMADD_ASSOC2: +; CHECK-VSX-UNSAFE: xsmaddmdp +; CHECK-VSX-UNSAFE-NEXT: xsmaddadp +; CHECK-VSX-UNSAFE-NEXT: fmr +; CHECK-VSX-UNSAFE-NEXT: blr } define double @test_FMSUB_ASSOC1(double %A, double %B, double %C, @@ -46,16 +72,28 @@ define double @test_FMSUB_ASSOC1(double %H = fadd double %F, %G ; [#uses=1] %I = fsub double %H, %E ; [#uses=1] ret double %I -; CHECK-LABEL: test_FMSUB_ASSOC1: -; CHECK: fmsub -; CHECK-NEXT: fmadd -; CHECK-NEXT: blr - -; CHECK-VSX-LABEL: test_FMSUB_ASSOC1: -; CHECK-VSX: xsmsubmdp -; CHECK-VSX-NEXT: xsmaddadp -; CHECK-VSX-NEXT: fmr -; CHECK-VSX-NEXT: blr +; CHECK-SAFE-LABEL: test_FMSUB_ASSOC1: +; CHECK-SAFE: fmul +; CHECK-SAFE-NEXT: fmadd +; CHECK-SAFE-NEXT: fsub +; CHECK-SAFE-NEXT: blr + +; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC1: +; CHECK-UNSAFE: fmsub +; CHECK-UNSAFE-NEXT: fmadd +; CHECK-UNSAFE-NEXT: blr + +; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC1: +; CHECK-SAFE-VSX: xsmuldp +; CHECK-SAFE-VSX-NEXT: xsmaddadp +; CHECK-SAFE-VSX-NEXT: xssubdp +; CHECK-SAFE-VSX-NEXT: blr + +; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC1: +; CHECK-UNSAFE-VSX: xsmsubmdp +; CHECK-UNSAFE-VSX-NEXT: xsmaddadp +; CHECK-UNSAFE-VSX-NEXT: fmr +; CHECK-UNSAFE-VSX-NEXT: blr } define double @test_FMSUB_ASSOC2(double %A, double %B, double %C, @@ -65,16 +103,28 @@ define double @test_FMSUB_ASSOC2(double %H = fadd double %F, %G ; [#uses=1] %I = fsub double %E, %H ; [#uses=1] ret double %I -; CHECK-LABEL: test_FMSUB_ASSOC2: -; CHECK: fnmsub -; CHECK-NEXT: fnmsub -; CHECK-NEXT: blr - -; CHECK-VSX-LABEL: test_FMSUB_ASSOC2: -; CHECK-VSX: xsnmsubmdp -; CHECK-VSX-NEXT: xsnmsubadp -; CHECK-VSX-NEXT: fmr -; CHECK-VSX-NEXT: blr +; CHECK-SAFE-LABEL: test_FMSUB_ASSOC2: +; CHECK-SAFE: fmul +; CHECK-SAFE-NEXT: fmadd +; CHECK-SAFE-NEXT: fsub +; CHECK-SAFE-NEXT: blr + +; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2: +; CHECK-UNSAFE: fnmsub +; CHECK-UNSAFE-NEXT: fnmsub +; CHECK-UNSAFE-NEXT: blr + +; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2: +; CHECK-SAFE-VSX: xsmuldp +; CHECK-SAFE-VSX-NEXT: xsmaddadp +; CHECK-SAFE-VSX-NEXT: xssubdp +; CHECK-SAFE-VSX-NEXT: blr + +; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2: +; CHECK-UNSAFE-VSX: xsnmsubmdp +; CHECK-UNSAFE-VSX-NEXT: xsnmsubadp +; CHECK-UNSAFE-VSX-NEXT: fmr +; CHECK-UNSAFE-VSX-NEXT: blr } define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C, Added: vendor/llvm/dist/test/Object/Inputs/phdr-note.elf-x86-64 ============================================================================== Binary file. No diff available. Added: vendor/llvm/dist/test/Object/Inputs/phdrs.elf-x86-64 ============================================================================== Binary file. No diff available. Modified: vendor/llvm/dist/test/Transforms/InstCombine/minmax-fold.ll ============================================================================== --- vendor/llvm/dist/test/Transforms/InstCombine/minmax-fold.ll Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/test/Transforms/InstCombine/minmax-fold.ll Sun Feb 5 19:37:31 2017 (r313289) @@ -339,3 +339,84 @@ define i32 @test75(i32 %x) { ret i32 %retval } +; The next 3 min tests should canonicalize to the same form...and not infinite loop. + +define double @PR31751_umin1(i32 %x) { +; CHECK-LABEL: @PR31751_umin1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %x, 2147483647 +; CHECK-NEXT: [[CONV1:%.*]] = select i1 [[TMP1]], i32 %x, i32 2147483647 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[CONV1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %cmp = icmp slt i32 %x, 0 + %sel = select i1 %cmp, i32 2147483647, i32 %x + %conv = sitofp i32 %sel to double + ret double %conv +} + +define double @PR31751_umin2(i32 %x) { +; CHECK-LABEL: @PR31751_umin2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %x, 2147483647 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 2147483647 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: ret double [[CONV]] +; + %cmp = icmp ult i32 %x, 2147483647 + %sel = select i1 %cmp, i32 %x, i32 2147483647 + %conv = sitofp i32 %sel to double + ret double %conv +} + +define double @PR31751_umin3(i32 %x) { +; CHECK-LABEL: @PR31751_umin3( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %x, 2147483647 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 %x, i32 2147483647 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: ret double [[CONV]] +; + %cmp = icmp ugt i32 %x, 2147483647 + %sel = select i1 %cmp, i32 2147483647, i32 %x + %conv = sitofp i32 %sel to double + ret double %conv +} + +; The next 3 max tests should canonicalize to the same form...and not infinite loop. + +define double @PR31751_umax1(i32 %x) { +; CHECK-LABEL: @PR31751_umax1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 %x, -2147483648 +; CHECK-NEXT: [[CONV1:%.*]] = select i1 [[TMP1]], i32 %x, i32 -2147483648 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[CONV1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %cmp = icmp sgt i32 %x, -1 + %sel = select i1 %cmp, i32 2147483648, i32 %x + %conv = sitofp i32 %sel to double + ret double %conv +} + +define double @PR31751_umax2(i32 %x) { +; CHECK-LABEL: @PR31751_umax2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %x, -2147483648 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 -2147483648 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: ret double [[CONV]] +; + %cmp = icmp ugt i32 %x, 2147483648 + %sel = select i1 %cmp, i32 %x, i32 2147483648 + %conv = sitofp i32 %sel to double + ret double %conv +} + +define double @PR31751_umax3(i32 %x) { +; CHECK-LABEL: @PR31751_umax3( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 %x, -2147483648 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 %x, i32 -2147483648 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: ret double [[CONV]] +; + %cmp = icmp ult i32 %x, 2147483648 + %sel = select i1 %cmp, i32 2147483648, i32 %x + %conv = sitofp i32 %sel to double + ret double %conv +} Added: vendor/llvm/dist/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll Sun Feb 5 19:37:31 2017 (r313289) @@ -0,0 +1,54 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -loop-reduce %s | FileCheck %s + +; Test for assert resulting from inconsistent isLegalAddressingMode +; answers when the address space was dropped from the query. + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +%0 = type { i32, double, i32, float } + +; CHECK-LABEL: @lsr_crash_preserve_addrspace_unknown_type( +; CHECK: %tmp4 = bitcast %0 addrspace(3)* %tmp to double addrspace(3)* +; CHECK: %scevgep5 = getelementptr double, double addrspace(3)* %tmp4, i32 1 +; CHEC: load double, double addrspace(3)* %scevgep5 + +; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4 +; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep +define void @lsr_crash_preserve_addrspace_unknown_type() #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb17, %bb + %tmp = phi %0 addrspace(3)* [ undef, %bb ], [ %tmp18, %bb17 ] + %tmp2 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 1 + %tmp3 = load double, double addrspace(3)* %tmp2, align 8 + br label %bb4 + +bb4: ; preds = %bb1 + br i1 undef, label %bb8, label %bb5 + +bb5: ; preds = %bb4 + unreachable + +bb8: ; preds = %bb4 + %tmp9 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 0 + %tmp10 = load i32, i32 addrspace(3)* %tmp9, align 4 + %tmp11 = icmp eq i32 0, %tmp10 + br i1 %tmp11, label %bb12, label %bb17 + +bb12: ; preds = %bb8 + %tmp13 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 2 + %tmp14 = load i32, i32 addrspace(3)* %tmp13, align 4 + %tmp15 = icmp eq i32 0, %tmp14 + br i1 %tmp15, label %bb16, label %bb17 + +bb16: ; preds = %bb12 + unreachable + +bb17: ; preds = %bb12, %bb8 + %tmp18 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 2 + br label %bb1 +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Added: vendor/llvm/dist/test/tools/llvm-objdump/X86/openbsd-headers.test ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/test/tools/llvm-objdump/X86/openbsd-headers.test Sun Feb 5 19:37:31 2017 (r313289) @@ -0,0 +1,20 @@ +## openbsd-phdrs.elf-x86-64 was generated using GNU ld (GNU Binutils for Ubuntu) 2.26.1. +## llvm-mc -filetype=obj -triple=x86_64-pc-linux test.s -o main.o +## ld -script linker.script main.o -o openbsd-phdrs.elf-x86-64 +## +## test.s is an empty file. +## linker.script: +## PHDRS { text PT_LOAD FILEHDR PHDRS; foo 0x65a3dbe6; bar 0x65a3dbe7; zed 0x65a41be6; } +## Where 0x65a3dbe6 is the value of PT_OPENBSD_RANDOMIZE, +## 0x65a3dbe7 is the value of PT_OPENBSD_WXNEEDED, +## 0x65a41be6 is the value of PT_OPENBSD_BOOTDATA +## SECTIONS { . = SIZEOF_HEADERS; .all : { *(.*) } : text } +RUN: llvm-objdump -p %p/../../../Object/Inputs/openbsd-phdrs.elf-x86-64 \ +RUN: | FileCheck %s + +CHECK: OPENBSD_RANDOMIZE off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3 +CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- +CHECK-NEXT: OPENBSD_WXNEEDED off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3 +CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- +CHECK-NEXT: OPENBSD_BOOTDATA off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3 +CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- Added: vendor/llvm/dist/test/tools/llvm-objdump/X86/phdrs.test ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/test/tools/llvm-objdump/X86/phdrs.test Sun Feb 5 19:37:31 2017 (r313289) @@ -0,0 +1,32 @@ +## phdrs.elf-x86-64 was generated using lld (3.9). +## llvm-mc -filetype=obj -triple=x86_64-unknown-linux test.s -o test.o +## lld test.o -o phdrs.elf-x86-64 +## +## test.s: +## .global _start +## _start: +## +## .global d +## .section .foo,"awT",@progbits +## d: +## .long 2 +## +RUN: llvm-objdump -p %p/../../../Object/Inputs/phdrs.elf-x86-64 \ +RUN: | FileCheck %s + +CHECK: RELRO off 0x0000000000001000 vaddr 0x0000000000201000 paddr 0x0000000000201000 align 2**0 +CHECK-NEXT: filesz 0x0000000000000004 memsz 0x0000000000001000 flags r-- + +## phdr-note.elf-x86-64 was generated using lld (3.9). +## llvm-mc -filetype=obj -triple=x86_64-pc-linux test.s -o test.o +## lld test.o -o phdr-note.elf-x86-64 -shared +## +## test.s: +## .section .note.test,"a",@note +## .quad 42 + +RUN: llvm-objdump -p %p/../../../Object/Inputs/phdr-note.elf-x86-64 \ +RUN: | FileCheck %s --check-prefix=NOTE + +NOTE: NOTE off 0x0000000000000200 vaddr 0x0000000000000200 paddr 0x0000000000000200 align 2**0 +NOTE-NEXT: filesz 0x0000000000000008 memsz 0x0000000000000008 flags r-- Modified: vendor/llvm/dist/tools/llvm-objdump/ELFDump.cpp ============================================================================== --- vendor/llvm/dist/tools/llvm-objdump/ELFDump.cpp Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/tools/llvm-objdump/ELFDump.cpp Sun Feb 5 19:37:31 2017 (r313289) @@ -36,6 +36,9 @@ template void printProgramH case ELF::PT_GNU_EH_FRAME: outs() << "EH_FRAME "; break; + case ELF::PT_GNU_RELRO: + outs() << " RELRO "; + break; case ELF::PT_GNU_STACK: outs() << " STACK "; break; @@ -45,6 +48,18 @@ template void printProgramH case ELF::PT_LOAD: outs() << " LOAD "; break; + case ELF::PT_NOTE: + outs() << " NOTE "; + break; + case ELF::PT_OPENBSD_BOOTDATA: + outs() << " OPENBSD_BOOTDATA "; + break; + case ELF::PT_OPENBSD_RANDOMIZE: + outs() << " OPENBSD_RANDOMIZE "; + break; + case ELF::PT_OPENBSD_WXNEEDED: + outs() << " OPENBSD_WXNEEDED "; + break; case ELF::PT_PHDR: outs() << " PHDR "; break; Modified: vendor/llvm/dist/utils/release/test-release.sh ============================================================================== --- vendor/llvm/dist/utils/release/test-release.sh Sun Feb 5 15:46:05 2017 (r313288) +++ vendor/llvm/dist/utils/release/test-release.sh Sun Feb 5 19:37:31 2017 (r313289) @@ -36,6 +36,7 @@ do_libs="yes" do_libunwind="yes" do_test_suite="yes" do_openmp="yes" +do_lld="yes" do_lldb="no" do_polly="no" BuildDir="`pwd`" @@ -64,6 +65,7 @@ function usage() { echo " -no-libunwind Disable check-out & build libunwind" echo " -no-test-suite Disable check-out & build test-suite" echo " -no-openmp Disable check-out & build libomp" + echo " -no-lld Disable check-out & build lld" echo " -lldb Enable check-out & build lldb" echo " -no-lldb Disable check-out & build lldb (default)" echo " -polly Enable check-out & build Polly" @@ -143,6 +145,9 @@ while [ $# -gt 0 ]; do -no-openmp ) do_openmp="no" ;; + -no-lld ) + do_lld="no" + ;; -lldb ) do_lldb="yes" ;; @@ -225,6 +230,9 @@ esac if [ $do_openmp = "yes" ]; then projects="$projects openmp" fi +if [ $do_lld = "yes" ]; then + projects="$projects lld" +fi if [ $do_lldb = "yes" ]; then projects="$projects lldb" fi @@ -297,7 +305,7 @@ function export_sources() { cfe) projsrc=llvm.src/tools/clang ;; - lldb|polly) + lld|lldb|polly) projsrc=llvm.src/tools/$proj ;; clang-tools-extra)