Date: Sat, 13 Feb 2016 14:57:10 +0000 (UTC) From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r295590 - in vendor/llvm/dist: cmake/modules docs include/llvm/IR lib/Analysis lib/CodeGen/AsmPrinter lib/IR lib/Target/AArch64 lib/Target/AMDGPU lib/Target/AMDGPU/Utils lib/Target/ARM ... Message-ID: <201602131457.u1DEvAWO050264@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: dim Date: Sat Feb 13 14:57:10 2016 New Revision: 295590 URL: https://svnweb.freebsd.org/changeset/base/295590 Log: Vendor import of llvm release_38 branch r260756: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260756 Added: vendor/llvm/dist/lib/Target/AArch64/AArch64SchedM1.td vendor/llvm/dist/test/CodeGen/AMDGPU/spill-scavenge-offset.ll vendor/llvm/dist/test/CodeGen/PowerPC/inline-asm-s-modifier.ll vendor/llvm/dist/test/CodeGen/PowerPC/pr26193.ll vendor/llvm/dist/test/CodeGen/PowerPC/pr26356.ll vendor/llvm/dist/test/CodeGen/PowerPC/pr26381.ll vendor/llvm/dist/test/CodeGen/SystemZ/int-cmp-53.ll vendor/llvm/dist/test/DebugInfo/X86/PR26148.ll Modified: vendor/llvm/dist/cmake/modules/AddLLVM.cmake vendor/llvm/dist/cmake/modules/LLVM-Config.cmake vendor/llvm/dist/docs/ReleaseNotes.rst vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td vendor/llvm/dist/include/llvm/IR/Value.h vendor/llvm/dist/lib/Analysis/DemandedBits.cpp vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp vendor/llvm/dist/lib/IR/Value.cpp vendor/llvm/dist/lib/Target/AArch64/AArch64.td vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h vendor/llvm/dist/lib/Target/AMDGPU/Processors.td vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td vendor/llvm/dist/lib/Target/SystemZ/SystemZISelLowering.cpp vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineCompares.cpp vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineVectorOps.cpp vendor/llvm/dist/lib/Transforms/Utils/SimplifyCFG.cpp vendor/llvm/dist/test/Analysis/DemandedBits/basic.ll vendor/llvm/dist/test/CodeGen/AArch64/fp16-v4-instructions.ll vendor/llvm/dist/test/CodeGen/AArch64/fp16-v8-instructions.ll vendor/llvm/dist/test/CodeGen/AMDGPU/hsa-note-no-func.ll vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll vendor/llvm/dist/test/CodeGen/ARM/shifter_operand.ll vendor/llvm/dist/test/CodeGen/PowerPC/fast-isel-ret.ll vendor/llvm/dist/test/CodeGen/X86/avx512-gather-scatter-intrin.ll vendor/llvm/dist/test/CodeGen/X86/setcc-lowering.ll vendor/llvm/dist/test/Transforms/InstCombine/icmp.ll vendor/llvm/dist/test/Transforms/InstCombine/insert-extract-shuffle.ll vendor/llvm/dist/test/Transforms/InstCombine/unpack-fca.ll vendor/llvm/dist/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll vendor/llvm/dist/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll vendor/llvm/dist/tools/CMakeLists.txt vendor/llvm/dist/utils/release/test-release.sh vendor/llvm/dist/utils/unittest/CMakeLists.txt vendor/llvm/dist/utils/unittest/UnitTestMain/CMakeLists.txt Modified: vendor/llvm/dist/cmake/modules/AddLLVM.cmake ============================================================================== --- vendor/llvm/dist/cmake/modules/AddLLVM.cmake Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/cmake/modules/AddLLVM.cmake Sat Feb 13 14:57:10 2016 (r295590) @@ -468,20 +468,23 @@ function(llvm_add_library name) endif() endif() - # Add the explicit dependency information for this library. - # - # It would be nice to verify that we have the dependencies for this library - # name, but using get_property(... SET) doesn't suffice to determine if a - # property has been set to an empty value. - get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name}) - - if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_STATIC AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) - set(llvm_libs LLVM) - else() - llvm_map_components_to_libnames(llvm_libs - ${ARG_LINK_COMPONENTS} - ${LLVM_LINK_COMPONENTS} - ) + if (DEFINED LLVM_LINK_COMPONENTS OR DEFINED ARG_LINK_COMPONENTS) + if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) + set(llvm_libs LLVM) + else() + llvm_map_components_to_libnames(llvm_libs + ${ARG_LINK_COMPONENTS} + ${LLVM_LINK_COMPONENTS} + ) + endif() + else() + # Components have not been defined explicitly in CMake, so add the + # dependency information for this library as defined by LLVMBuild. + # + # It would be nice to verify that we have the dependencies for this library + # name, but using get_property(... SET) doesn't suffice to determine if a + # property has been set to an empty value. + get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name}) endif() if(CMAKE_VERSION VERSION_LESS 2.8.12) @@ -882,14 +885,11 @@ function(add_unittest test_suite test_na set(LLVM_REQUIRES_RTTI OFF) + list(APPEND LLVM_LINK_COMPONENTS Support) # gtest needs it for raw_ostream add_llvm_executable(${test_name} IGNORE_EXTERNALIZE_DEBUGINFO ${ARGN}) set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}) set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir}) - target_link_libraries(${test_name} - gtest - gtest_main - LLVMSupport # gtest needs it for raw_ostream. - ) + target_link_libraries(${test_name} gtest_main gtest) add_dependencies(${test_suite} ${test_name}) get_target_property(test_suite_folder ${test_suite} FOLDER) Modified: vendor/llvm/dist/cmake/modules/LLVM-Config.cmake ============================================================================== --- vendor/llvm/dist/cmake/modules/LLVM-Config.cmake Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/cmake/modules/LLVM-Config.cmake Sat Feb 13 14:57:10 2016 (r295590) @@ -40,10 +40,19 @@ macro(llvm_config executable) # done in case libLLVM does not contain all of the components # the target requires. # - # TODO strip LLVM_DYLIB_COMPONENTS out of link_components. + # Strip LLVM_DYLIB_COMPONENTS out of link_components. # To do this, we need special handling for "all", since that # may imply linking to libraries that are not included in # libLLVM. + + if (DEFINED link_components AND DEFINED LLVM_DYLIB_COMPONENTS) + if("${LLVM_DYLIB_COMPONENTS}" STREQUAL "all") + set(link_components "") + else() + list(REMOVE_ITEM link_components ${LLVM_DYLIB_COMPONENTS}) + endif() + endif() + target_link_libraries(${executable} LLVM) endif() Modified: vendor/llvm/dist/docs/ReleaseNotes.rst ============================================================================== --- vendor/llvm/dist/docs/ReleaseNotes.rst Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/docs/ReleaseNotes.rst Sat Feb 13 14:57:10 2016 (r295590) @@ -5,11 +5,6 @@ LLVM 3.8 Release Notes .. contents:: :local: -.. warning:: - These are in-progress notes for the upcoming LLVM 3.8 release. You may - prefer the `LLVM 3.7 Release Notes <http://llvm.org/releases/3.7.0/docs - /ReleaseNotes.html>`_. - Introduction ============ @@ -26,11 +21,6 @@ have questions or comments, the `LLVM De <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send them. -Note that if you are reading this file from a Subversion checkout or the main -LLVM web page, this document applies to the *next* release, not the current -one. To see the release notes for a specific release, please see the `releases -page <http://llvm.org/releases/>`_. - Non-comprehensive list of changes in this release ================================================= * With this release, the minimum Windows version required for running LLVM is @@ -79,6 +69,26 @@ Non-comprehensive list of changes in thi * Support for dematerializing has been dropped. +* RegisterScheduler::setDefault was removed. Targets that used to call into the + command line parser to set the DAGScheduler, and that don't have enough + control with setSchedulingPreference, should look into overriding the + SubTargetHook "getDAGScheduler()". + +* ``ilist_iterator<T>`` no longer has implicit conversions to and from ``T*``, + since ``ilist_iterator<T>`` may be pointing at the sentinel (which is usually + not of type ``T`` at all). To convert from an iterator ``I`` to a pointer, + use ``&*I``; to convert from a pointer ``P`` to an iterator, use + ``P->getIterator()``. Alternatively, explicit conversions via + ``static_cast<T>(U)`` are still available. + +* ``ilist_node<T>::getNextNode()`` and ``ilist_node<T>::getPrevNode()`` now + fail at compile time when the node cannot access its parent list. + Previously, when the sentinel was was an ``ilist_half_node<T>``, this API + could return the sentinal instead of ``nullptr``. Frustrated callers should + be updated to use ``iplist<T>::getNextNode(T*)`` instead. Alternatively, if + the node ``N`` is guaranteed not to be the last in the list, it is safe to + call ``&*++N->getIterator()`` directly. + .. NOTE For small 1-3 sentence descriptions, just add an entry at the end of this list. If your description won't fit comfortably in one bullet @@ -98,17 +108,97 @@ Non-comprehensive list of changes in thi Makes programs 10x faster by doing Special New Thing. -Changes to the ARM Backend --------------------------- - During this release ... +Changes to the ARM Backends +--------------------------- + +During this release the AArch64 target has: + +* Added support for more sanitizers (MSAN, TSAN) and made them compatible with + all VMA kernel configurations (kurrently tested on 39 and 42 bits). +* Gained initial LLD support in the new ELF back-end +* Extended the Load/Store optimiser and cleaned up some of the bad decisions + made earlier. +* Expanded LLDB support, including watchpoints, native building, Renderscript, + LLDB-server, debugging 32-bit applications. +* Added support for the ``Exynos M1`` chip. + +During this release the ARM target has: + +* Gained massive performance improvements on embedded benchmarks due to finally + running the stride vectorizer in full form, incrementing the performance gains + that we already had in the previous releases with limited stride vectorization. +* Expanded LLDB support, including watchpoints, unwind tables +* Extended the Load/Store optimiser and cleaned up some of the bad decisions + made earlier. +* Simplified code generation for global variable addresses in ELF, resulting in + a significant (4% in Chromium) reduction in code size. +* Gained some additional code size improvements, though there's still a long road + ahead, especially for older cores. +* Added some EABI floating point comparison functions to Compiler-RT +* Added support for Windows+GNU triple, +features in -mcpu/-march options. Changes to the MIPS Target -------------------------- - During this release ... +During this release the MIPS target has: + +* Significantly extended support for the Integrated Assembler. See below for + more information +* Added support for the ``P5600`` processor. +* Added support for the ``interrupt`` attribute for MIPS32R2 and later. This + attribute will generate a function which can be used as a interrupt handler + on bare metal MIPS targets using the static relocation model. +* Added support for the ``ERETNC`` instruction found in MIPS32R5 and later. +* Added support for OpenCL. See http://portablecl.org/. + + * Address spaces 1 to 255 are now reserved for software use and conversions + between them are no-op casts. + +* Removed the ``mips16`` value for the -mcpu option since it is an :abbr:`ASE + (Application Specific Extension)` and not a processor. If you were using this, + please specify another CPU and use ``-mips16`` to enable MIPS16. +* Removed ``copy_u.w`` from 32-bit MSA and ``copy_u.d`` from 64-bit MSA since + they have been removed from the MSA specification due to forward compatibility + issues. For example, 32-bit MSA code containing ``copy_u.w`` would behave + differently on a 64-bit processor supporting MSA. The corresponding intrinsics + are still available and may expand to ``copy_s.[wd]`` where this is + appropriate for forward compatibility purposes. +* Relaxed the ``-mnan`` option to allow ``-mnan=2008`` on MIPS32R2/MIPS64R2 for + compatibility with GCC. +* Made MIPS64R6 the default CPU for 64-bit Android triples. + +The MIPS target has also fixed various bugs including the following notable +fixes: + +* Fixed reversed operands on ``mthi``/``mtlo`` in the DSP :abbr:`ASE + (Application Specific Extension)`. +* The code generator no longer uses ``jal`` for calls to absolute immediate + addresses. +* Disabled fast instruction selection on MIPS32R6 and MIPS64R6 since this is not + yet supported. +* Corrected addend for ``R_MIPS_HI16`` and ``R_MIPS_PCHI16`` in MCJIT +* The code generator no longer crashes when handling subregisters of an 64-bit + FPU register with undefined value. +* The code generator no longer attempts to use ``$zero`` for operands that do + not permit ``$zero``. +* Corrected the opcode used for ``ll``/``sc`` when using MIPS32R6/MIPS64R6 and + the Integrated Assembler. +* Added support for atomic load and atomic store. +* Corrected debug info when dynamically re-aligning the stack. + +Integrated Assembler +^^^^^^^^^^^^^^^^^^^^ +We have made a large number of improvements to the integrated assembler for +MIPS. In this release, the integrated assembler isn't quite production-ready +since there are a few known issues related to bare-metal support, checking +immediates on instructions, and the N32/N64 ABI's. However, the current support +should be sufficient for many users of the O32 ABI, particularly those targeting +MIPS32 on Linux or bare-metal MIPS32. +If you would like to try the integrated assembler, please use +``-fintegrated-as``. Changes to the PowerPC Target ----------------------------- @@ -123,6 +213,20 @@ Changes to the X86 Target * TLS is enabled for Cygwin as emutls. +* Smaller code for materializing 32-bit 1 and -1 constants at ``-Os``. + +* More efficient code for wide integer compares. (E.g. 64-bit compares + on 32-bit targets.) + +* Tail call support for ``thiscall``, ``stdcall`, ``vectorcall``, and + ``fastcall`` functions. + +Changes to the AVR Target +------------------------- + +Slightly less than half of the AVR backend has been merged in at this point. It is still +missing a number large parts which cause it to be unusable, but is well on the +road to being completely merged and workable. Changes to the OCaml bindings ----------------------------- @@ -140,7 +244,19 @@ An exciting aspect of LLVM is that it is a lot of other language and tools projects. This section lists some of the projects that have already been updated to work with LLVM 3.8. -* A project +LDC - the LLVM-based D compiler +------------------------------- + +`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It +pragmatically combines efficiency, control, and modeling power, with safety and +programmer productivity. D supports powerful concepts like Compile-Time Function +Execution (CTFE) and Template Meta-Programming, provides an innovative approach +to concurrency and offers many classical paradigms. + +`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler +combined with LLVM as backend to produce efficient native code. LDC targets +x86/x86_64 systems like Linux, OS X and Windows and also PowerPC (32/64 bit) +and ARM. Ports to other architectures like AArch64 and MIPS64 are underway. Additional Information Modified: vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td ============================================================================== --- vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td Sat Feb 13 14:57:10 2016 (r295590) @@ -484,7 +484,7 @@ let TargetPrefix = "ppc" in { // All PP Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">, - Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], Modified: vendor/llvm/dist/include/llvm/IR/Value.h ============================================================================== --- vendor/llvm/dist/include/llvm/IR/Value.h Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/include/llvm/IR/Value.h Sat Feb 13 14:57:10 2016 (r295590) @@ -280,11 +280,7 @@ public: // when using them since you might not get all uses. // The methods that don't start with materialized_ assert that modules is // fully materialized. -#ifdef NDEBUG - void assertModuleIsMaterialized() const {} -#else void assertModuleIsMaterialized() const; -#endif bool use_empty() const { assertModuleIsMaterialized(); Modified: vendor/llvm/dist/lib/Analysis/DemandedBits.cpp ============================================================================== --- vendor/llvm/dist/lib/Analysis/DemandedBits.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Analysis/DemandedBits.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -242,13 +242,6 @@ void DemandedBits::determineLiveOperandB if (OperandNo != 0) AB = AOut; break; - case Instruction::ICmp: - // Count the number of leading zeroes in each operand. - ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); - auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes); - break; } } Modified: vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp ============================================================================== --- vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -555,6 +555,11 @@ bool AsmPrinter::PrintAsmOperand(const M return true; O << -MO.getImm(); return false; + case 's': // The GCC deprecated s modifier + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << ((32 - MO.getImm()) & 31); + return false; } } return true; Modified: vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp ============================================================================== --- vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -793,16 +793,27 @@ static DebugLocEntry::Value getDebugLocV llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } -/// Determine whether two variable pieces overlap. -static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) { - if (!P1->isBitPiece() || !P2->isBitPiece()) - return true; +// Determine the relative position of the pieces described by P1 and P2. +// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, +// 1 if P1 is entirely after P2. +static int pieceCmp(const DIExpression *P1, const DIExpression *P2) { unsigned l1 = P1->getBitPieceOffset(); unsigned l2 = P2->getBitPieceOffset(); unsigned r1 = l1 + P1->getBitPieceSize(); unsigned r2 = l2 + P2->getBitPieceSize(); - // True where [l1,r1[ and [r1,r2[ overlap. - return (l1 < r2) && (l2 < r1); + if (r1 <= l2) + return -1; + else if (r2 <= l1) + return 1; + else + return 0; +} + +/// Determine whether two variable pieces overlap. +static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) { + if (!P1->isBitPiece() || !P2->isBitPiece()) + return true; + return pieceCmp(P1, P2) == 0; } /// \brief If this and Next are describing different pieces of the same @@ -811,14 +822,32 @@ static bool piecesOverlap(const DIExpres /// Return true if the merge was successful. bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { - auto *Expr = cast_or_null<DIExpression>(Values[0].Expression); - auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression); - if (Expr->isBitPiece() && NextExpr->isBitPiece() && - !piecesOverlap(Expr, NextExpr)) { - addValues(Next.Values); - End = Next.End; - return true; + auto *FirstExpr = cast<DIExpression>(Values[0].Expression); + auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression); + if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece()) + return false; + + // We can only merge entries if none of the pieces overlap any others. + // In doing so, we can take advantage of the fact that both lists are + // sorted. + for (unsigned i = 0, j = 0; i < Values.size(); ++i) { + for (; j < Next.Values.size(); ++j) { + int res = pieceCmp(cast<DIExpression>(Values[i].Expression), + cast<DIExpression>(Next.Values[j].Expression)); + if (res == 0) // The two expressions overlap, we can't merge. + return false; + // Values[i] is entirely before Next.Values[j], + // so go back to the next entry of Values. + else if (res == -1) + break; + // Next.Values[j] is entirely before Values[i], so go on to the + // next entry of Next.Values. + } } + + addValues(Next.Values); + End = Next.End; + return true; } return false; } Modified: vendor/llvm/dist/lib/IR/Value.cpp ============================================================================== --- vendor/llvm/dist/lib/IR/Value.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/IR/Value.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -313,8 +313,8 @@ void Value::takeName(Value *V) { ST->reinsertValue(this); } -#ifndef NDEBUG void Value::assertModuleIsMaterialized() const { +#ifndef NDEBUG const GlobalValue *GV = dyn_cast<GlobalValue>(this); if (!GV) return; @@ -322,8 +322,10 @@ void Value::assertModuleIsMaterialized() if (!M) return; assert(M->isMaterialized()); +#endif } +#ifndef NDEBUG static bool contains(SmallPtrSetImpl<ConstantExpr *> &Cache, ConstantExpr *Expr, Constant *C) { if (!Cache.insert(Expr).second) Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64.td ============================================================================== --- vendor/llvm/dist/lib/Target/AArch64/AArch64.td Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AArch64/AArch64.td Sat Feb 13 14:57:10 2016 (r295590) @@ -90,6 +90,7 @@ def AArch64InstrInfo : InstrInfo; include "AArch64SchedA53.td" include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" +include "AArch64SchedM1.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", @@ -144,8 +145,7 @@ def : ProcessorModel<"cortex-a57", Corte // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; -// FIXME: Exynos-M1 is currently modelled without a specific SchedModel. -def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>; +def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>; //===----------------------------------------------------------------------===// // Assembly parser Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -6689,6 +6689,9 @@ SDValue AArch64TargetLowering::LowerVSET return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } + if (LHS.getValueType().getVectorElementType() == MVT::f16) + return SDValue(); + assert(LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64); Added: vendor/llvm/dist/lib/Target/AArch64/AArch64SchedM1.td ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/lib/Target/AArch64/AArch64SchedM1.td Sat Feb 13 14:57:10 2016 (r295590) @@ -0,0 +1,359 @@ +//=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Samsung Exynos-M1 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Exynos-M1 is a traditional superscalar microprocessor with a +// 4-wide in-order stage for decode and dispatch and a wider issue stage. +// The execution units and loads and stores are out-of-order. + +def ExynosM1Model : SchedMachineModel { + let IssueWidth = 4; // Up to 4 uops per cycle. + let MinLatency = 0; // OoO. + let MicroOpBufferSize = 96; // ROB size. + let LoopMicroOpBufferSize = 32; // Instruction queue size. + let LoadLatency = 4; // Optimistic load cases. + let MispredictPenalty = 14; // Minimum branch misprediction penalty. + let CompleteModel = 0; // Use the default model otherwise. +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on the Exynos-M1, +// which has 9 pipelines, each with its own queue with out-of-order dispatch. + +def M1UnitA : ProcResource<2>; // Simple integer +def M1UnitC : ProcResource<1>; // Simple and complex integer +def M1UnitB : ProcResource<2>; // Branch +def M1UnitL : ProcResource<1>; // Load +def M1UnitS : ProcResource<1>; // Store +def M1PipeF0 : ProcResource<1>; // FP #0 +def M1PipeF1 : ProcResource<1>; // FP #1 + +let Super = M1PipeF0 in { + def M1UnitFMAC : ProcResource<1>; // FP multiplication + def M1UnitFCVT : ProcResource<1>; // FP conversion + def M1UnitNAL0 : ProcResource<1>; // Simple vector. + def M1UnitNMISC : ProcResource<1>; // Miscellanea + def M1UnitNCRYPT : ProcResource<1>; // Cryptographic +} + +let Super = M1PipeF1 in { + def M1UnitFADD : ProcResource<1>; // Simple FP + let BufferSize = 1 in + def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized) + def M1UnitNAL1 : ProcResource<1>; // Simple vector. + def M1UnitFST : ProcResource<1>; // FP store +} + +let SchedModel = ExynosM1Model in { + def M1UnitALU : ProcResGroup<[M1UnitA, + M1UnitC]>; // All simple integer. + def M1UnitNALU : ProcResGroup<[M1UnitNAL0, + M1UnitNAL1]>; // All simple vector. +} + +let SchedModel = ExynosM1Model in { + +//===----------------------------------------------------------------------===// +// Coarse scheduling model for the Exynos-M1. + +// Branch instructions. +// TODO: Non-conditional direct branches take zero cycles and units. +def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; } +def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; } +// TODO: Branch and link is much different. + +// Arithmetic and logical integer instructions. +def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; } +// TODO: Shift over 3 and some extensions take 2 cycles. +def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; } +def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; } +def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; } + +// Move instructions. +def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; } + +// Divide and multiply instructions. +// TODO: Division blocks the divider inside C. +def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; } +def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; } +// TODO: Long multiplication take 5 cycles and also the ALU. +// TODO: Multiplication with accumulation can be advanced. +def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; } +// TODO: 64-bit multiplication has a throughput of 1/2. +def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; } + +// Miscellaneous instructions. +def : WriteRes<WriteExtr, [M1UnitALU, + M1UnitALU]> { let Latency = 2; } + +// TODO: The latency for the post or pre register is 1 cycle. +def : WriteRes<WriteAdr, []> { let Latency = 0; } + +// Load instructions. +def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; } +// TODO: Extended address requires also the ALU. +def : WriteRes<WriteLDIdx, [M1UnitL]> { let Latency = 5; } +def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; } + +// Store instructions. +def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; } +// TODO: Extended address requires also the ALU. +def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; } +def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; } +def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; } + +// FP data instructions. +def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; } +// TODO: FCCMP is much different. +def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; } +// TODO: DP takes longer. +def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; } +// TODO: MACC takes longer. +def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; } + +// FP miscellaneous instructions. +// TODO: Conversion between register files is much different. +def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; } +def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; } +// TODO: Copy from FPR to GPR is much different. +def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; } + +// FP load instructions. +// TODO: ASIMD loads are much different. +def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; } + +// FP store instructions. +// TODO: ASIMD stores are much different. +def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; } + +// ASIMD FP instructions. +// TODO: Other operations are much different. +def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; } + +// Other miscellaneous instructions. +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Fast forwarding. + +// TODO: Add FP register forwarding rules. + +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +// Integer multiply-accumulate. +// TODO: The forwarding for WriteIM64 saves actually 3 cycles. +def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +//===----------------------------------------------------------------------===// +// Finer scheduling model for the Exynos-M1. + +def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, + M1UnitNALU, + M1UnitFADD]> { let Latency = 9; } +def M1WriteNEONB : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 5; } +def M1WriteNEONC : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 6; } +def M1WriteNEOND : SchedWriteRes<[M1UnitNALU, + M1UnitFST, + M1UnitL]> { let Latency = 10; } +def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT, + M1UnitFST]> { let Latency = 8; } +def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT, + M1UnitFST, + M1UnitL]> { let Latency = 13; } +def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC, + M1UnitFST]> { let Latency = 6; } +def M1WriteNEONH : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 3; } +def M1WriteNEONI : SchedWriteRes<[M1UnitFST, + M1UnitL]> { let Latency = 9; } +def M1WriteALU1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } +def M1WriteB : SchedWriteRes<[M1UnitB]> { let Latency = 1; } +// FIXME: This is the worst case, conditional branch and link. +def M1WriteBL : SchedWriteRes<[M1UnitB, + M1UnitALU]> { let Latency = 1; } +// FIXME: This is the worst case, when using LR. +def M1WriteBLR : SchedWriteRes<[M1UnitB, + M1UnitALU, + M1UnitALU]> { let Latency = 2; } +def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } +def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } +def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; } +def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; } +def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; } +def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; } +def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; } +def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; } +def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; } +def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; } +def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; } +def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; } +def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; } +def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; } +def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } +def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; } +def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; } +def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; } +def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } +def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } +def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } +def M1WriteTB : SchedWriteRes<[M1UnitC, + M1UnitALU]> { let Latency = 2; } + +// Branch instructions +def : InstRW<[M1WriteB ], (instrs Bcc)>; +def : InstRW<[M1WriteBL], (instrs BL)>; +def : InstRW<[M1WriteBLR], (instrs BLR)>; +def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>; +def : InstRW<[M1WriteTB], (instregex "^TBN?Z[WX]")>; + +// Arithmetic and logical integer instructions. +def : InstRW<[M1WriteALU1], (instrs COPY)>; + +// Divide and multiply instructions. + +// Miscellaneous instructions. + +// Load instructions. + +// Store instructions. + +// FP data instructions. +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>; +def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>; +def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>; +def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>; +def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>; +def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>; +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>; +def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>; +def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>; +def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>; +def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>; +def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>; + +// FP miscellaneous instructions. +def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>; +def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; +def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>; +def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>; +def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>; +def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>; + +// FP load instructions. + +// FP store instructions. + +// ASIMD instructions. +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>; +def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>; +def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>; +def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>; +def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>; +def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; +def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>; +def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>; + +// ASIMD FP instructions. +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>; +def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>; +def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; +def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>; +def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>; +def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>; +def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>; +def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>; +def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>; +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; +def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v")>; +def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v")>; +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>; + +// ASIMD miscellaneous instructions. +def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>; +def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>; +def : InstRW<[M1WriteNALU1], (instregex "^CPY")>; +def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>; +def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>; +def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>; +def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>; +def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>; +def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>; +def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 2>], + (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 3>], + (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 4>], + (instregex "^TB[LX]v8i8Four")>; +def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 2>], + (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 3>], + (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 4>], + (instregex "^TB[LX]v16i8Four")>; +def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>; +def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>; +def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)(1|2)(v8i8|v4i16|v2i32)")>; +def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[M1WriteNALU1], (instregex "^ZIP(1|2)v")>; + +// ASIMD load instructions. + +// ASIMD store instructions. + +// Cryptography instructions. +def : InstRW<[M1WriteNCRYPT1], (instregex "^AES")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>; +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; + +// CRC instructions. +def : InstRW<[M1WriteC2], (instregex "^CRC32")>; + +} // SchedModel = ExynosM1Model Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td ============================================================================== --- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td Sat Feb 13 14:57:10 2016 (r295590) @@ -183,6 +183,7 @@ def FeatureISAVersion7_0_0 : SubtargetFe def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>; def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>; def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>; +def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>; class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< "localmemorysize"#Value, @@ -252,7 +253,7 @@ def FeatureSeaIslands : SubtargetFeature def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, - FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>; + FeatureGCN3Encoding, FeatureCIInsts]>; //===----------------------------------------------------------------------===// Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h ============================================================================== --- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h Sat Feb 13 14:57:10 2016 (r295590) @@ -53,7 +53,8 @@ public: ISAVersion7_0_0, ISAVersion7_0_1, ISAVersion8_0_0, - ISAVersion8_0_1 + ISAVersion8_0_1, + ISAVersion8_0_3 }; private: Modified: vendor/llvm/dist/lib/Target/AMDGPU/Processors.td ============================================================================== --- vendor/llvm/dist/lib/Target/AMDGPU/Processors.td Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AMDGPU/Processors.td Sat Feb 13 14:57:10 2016 (r295590) @@ -128,21 +128,23 @@ def : ProcessorModel<"mullins", SIQua //===----------------------------------------------------------------------===// def : ProcessorModel<"tonga", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0] + [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0, + FeatureLDSBankCount32] >; def : ProcessorModel<"iceland", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0] + [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0, + FeatureLDSBankCount32] >; def : ProcessorModel<"carrizo", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureISAVersion8_0_1] + [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount32] >; def : ProcessorModel<"fiji", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureISAVersion8_0_1] + [FeatureVolcanicIslands, FeatureISAVersion8_0_3, FeatureLDSBankCount32] >; def : ProcessorModel<"stoney", SIQuarterSpeedModel, - [FeatureVolcanicIslands, FeatureISAVersion8_0_1] + [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount16] >; Modified: vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -234,6 +234,7 @@ void SIRegisterInfo::buildScratchLoadSto bool IsLoad = TII->get(LoadStoreOp).mayLoad(); bool RanOutOfSGPRs = false; + bool Scavenged = false; unsigned SOffset = ScratchOffset; unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); @@ -244,6 +245,8 @@ void SIRegisterInfo::buildScratchLoadSto if (SOffset == AMDGPU::NoRegister) { RanOutOfSGPRs = true; SOffset = AMDGPU::SGPR0; + } else { + Scavenged = true; } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) .addReg(ScratchOffset) @@ -259,10 +262,14 @@ void SIRegisterInfo::buildScratchLoadSto getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : Value; + unsigned SOffsetRegState = 0; + if (i + 1 == e && Scavenged) + SOffsetRegState |= RegState::Kill; + BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .addReg(SubReg, getDefRegState(IsLoad)) .addReg(ScratchRsrcReg) - .addReg(SOffset) + .addReg(SOffset, SOffsetRegState) .addImm(Offset) .addImm(0) // glc .addImm(0) // slc Modified: vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -41,6 +41,9 @@ IsaVersion getIsaVersion(const FeatureBi if (Features.test(FeatureISAVersion8_0_1)) return {8, 0, 1}; + if (Features.test(FeatureISAVersion8_0_3)) + return {8, 0, 3}; + return {0, 0, 0}; } Modified: vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -747,7 +747,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SD // If Offset is a multiply-by-constant and it's profitable to extract a shift // and use it in a shifted operand do so. - if (Offset.getOpcode() == ISD::MUL) { + if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { @@ -1422,7 +1422,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSo // If OffReg is a multiply-by-constant and it's profitable to extract a shift // and use it in a shifted operand do so. - if (OffReg.getOpcode() == ISD::MUL) { + if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { Modified: vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp Sat Feb 13 14:57:10 2016 (r295590) @@ -1615,7 +1615,7 @@ bool PPCFastISel::SelectRet(const Instru // extension rather than sign extension. Make sure we pass the return // value extension property to integer materialization. unsigned SrcReg = - PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt); + PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); @@ -2091,25 +2091,21 @@ unsigned PPCFastISel::PPCMaterializeInt( const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); + int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue(); // If the constant is in range, use a load-immediate. - if (UseSExt && isInt<16>(CI->getSExtValue())) { + // Since LI will sign extend the constant we need to make sure that for + // our zeroext constants that the sign extended constant fits into 16-bits - + // a range of 0..0x7fff. + if (isInt<16>(Imm)) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) - .addImm(CI->getSExtValue()); - return ImmReg; - } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) { - unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; - unsigned ImmReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) - .addImm(CI->getZExtValue()); + .addImm(Imm); return ImmReg; } // Construct the constant piecewise. - int64_t Imm = CI->getZExtValue(); - if (VT == MVT::i64) return PPCMaterialize64BitInt(Imm, RC); else if (VT == MVT::i32) Modified: vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td ============================================================================== --- vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td Sat Feb 13 14:57:04 2016 (r295589) +++ vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td Sat Feb 13 14:57:10 2016 (r295590) @@ -736,7 +736,7 @@ def VPKSHSS : VX1_Int_Ty2<398, "vpkshss" *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201602131457.u1DEvAWO050264>