Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 30 Mar 2015 20:01:42 +0000 (UTC)
From:      Ed Maste <emaste@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r280865 - in head/contrib/llvm/lib/Target/AArch64: . Utils
Message-ID:  <201503302001.t2UK1gse088144@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: emaste
Date: Mon Mar 30 20:01:41 2015
New Revision: 280865
URL: https://svnweb.freebsd.org/changeset/base/280865

Log:
  llvm: Backport upstream r229195 to fix arm64 TLS relocations
  
    As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU
    linkers ld.bfd and ld.gold currently only support a subset of the
    whole range of AArch64 ELF TLS relocations. Furthermore, they assume
    that some of the code sequences to access thread-local variables are
    produced in a very specific sequence.  When the sequence is not as the
    linker expects, it can silently mis-relaxe/mis-optimize the
    instructions.
    Even if that wouldn't be the case, it's good to produce the exact
    sequence, as that ensures that linkers can perform optimizing
    relaxations.
  
    This patch:
  
    * implements support for 16MiB TLS area size instead of 4GiB TLS area
      size. Ideally clang would grow an -mtls-size option to allow support
      for both, but that's not part of this patch.
    * by default doesn't produce local dynamic access patterns, as even
      modern ld.bfd and ld.gold linkers do not support the associated
      relocations. An option (-aarch64-elf-ldtls-generation) is added to
      enable generation of local dynamic code sequence, but is off by
      default.
    * makes sure that the exact expected code sequence for local dynamic
      and general dynamic accesses is produced, by making use of a new
      pseudo instruction. The patch also removes two
      (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing
      AArch64-specific pseudo SDNode instructions that are superseded by
      the new one (TLSDESC_CALLSEQ).
  
  Submitted by:	Kristof Beyls
  Differential Revision:	https://reviews.freebsd.org/D2175

Modified:
  head/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
  head/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
  head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
  head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
  head/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
  head/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

Modified: head/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp	Mon Mar 30 20:01:41 2015	(r280865)
@@ -12,6 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
 #include "AArch64.h"
 #include "AArch64MCInstLower.h"
 #include "AArch64MachineFunctionInfo.h"
@@ -494,24 +496,57 @@ void AArch64AsmPrinter::EmitInstruction(
     EmitToStreamer(OutStreamer, TmpInst);
     return;
   }
-  case AArch64::TLSDESC_BLR: {
-    MCOperand Callee, Sym;
-    MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
-    MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
+  case AArch64::TLSDESC_CALLSEQ: {
+    /// lower this to:
+    ///    adrp  x0, :tlsdesc:var
+    ///    ldr   x1, [x0, #:tlsdesc_lo12:var]
+    ///    add   x0, x0, #:tlsdesc_lo12:var
+    ///    .tlsdesccall var
+    ///    blr   x1
+    ///    (TPIDR_EL0 offset now in x0)
+    const MachineOperand &MO_Sym = MI->getOperand(0);
+    MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+    MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+    MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+                                   AArch64II::MO_NC);
+    MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+    MCInstLowering.lowerOperand(MO_Sym, Sym);
+    MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+    MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
+
+    MCInst Adrp;
+    Adrp.setOpcode(AArch64::ADRP);
+    Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Adrp.addOperand(SymTLSDesc);
+    EmitToStreamer(OutStreamer, Adrp);
+
+    MCInst Ldr;
+    Ldr.setOpcode(AArch64::LDRXui);
+    Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+    Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Ldr.addOperand(SymTLSDescLo12);
+    Ldr.addOperand(MCOperand::CreateImm(0));
+    EmitToStreamer(OutStreamer, Ldr);
+
+    MCInst Add;
+    Add.setOpcode(AArch64::ADDXri);
+    Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Add.addOperand(SymTLSDescLo12);
+    Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+    EmitToStreamer(OutStreamer, Add);
 
-    // First emit a relocation-annotation. This expands to no code, but requests
+    // Emit a relocation-annotation. This expands to no code, but requests
     // the following instruction gets an R_AARCH64_TLSDESC_CALL.
     MCInst TLSDescCall;
     TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
     TLSDescCall.addOperand(Sym);
     EmitToStreamer(OutStreamer, TLSDescCall);
 
-    // Other than that it's just a normal indirect call to the function loaded
-    // from the descriptor.
-    MCInst BLR;
-    BLR.setOpcode(AArch64::BLR);
-    BLR.addOperand(Callee);
-    EmitToStreamer(OutStreamer, BLR);
+    MCInst Blr;
+    Blr.setOpcode(AArch64::BLR);
+    Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+    EmitToStreamer(OutStreamer, Blr);
 
     return;
   }

Modified: head/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp	Mon Mar 30 20:01:41 2015	(r280865)
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunc
     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
          ++I) {
       switch (I->getOpcode()) {
-      case AArch64::TLSDESC_BLR:
+      case AArch64::TLSDESC_CALLSEQ:
         // Make sure it's a local dynamic access.
-        if (!I->getOperand(1).isSymbol() ||
-            strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+        if (!I->getOperand(0).isSymbol() ||
+            strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
           break;
 
         if (TLSBaseAddrReg)

Modified: head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp	Mon Mar 30 20:01:41 2015	(r280865)
@@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-ext
 
 static cl::opt<bool>
 EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
-                         cl::desc("Allow AArch64 SLI/SRI formation"),
-                         cl::init(false));
+                           cl::desc("Allow AArch64 SLI/SRI formation"),
+                           cl::init(false));
+
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+    "aarch64-elf-ldtls-generation", cl::Hidden,
+    cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+    cl::init(false));
 
 
 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
@@ -760,7 +768,7 @@ const char *AArch64TargetLowering::getTa
   case AArch64ISD::CSNEG:             return "AArch64ISD::CSNEG";
   case AArch64ISD::CSINC:             return "AArch64ISD::CSINC";
   case AArch64ISD::THREAD_POINTER:    return "AArch64ISD::THREAD_POINTER";
-  case AArch64ISD::TLSDESC_CALL:      return "AArch64ISD::TLSDESC_CALL";
+  case AArch64ISD::TLSDESC_CALLSEQ:   return "AArch64ISD::TLSDESC_CALLSEQ";
   case AArch64ISD::ADC:               return "AArch64ISD::ADC";
   case AArch64ISD::SBC:               return "AArch64ISD::SBC";
   case AArch64ISD::ADDS:              return "AArch64ISD::ADDS";
@@ -3049,61 +3057,34 @@ AArch64TargetLowering::LowerDarwinGlobal
 /// When accessing thread-local variables under either the general-dynamic or
 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-///     adrp x0, :tlsdesc:thread_var
-///     ldr x8, [x0, :tlsdesc_lo12:thread_var]
-///     add x0, x0, :tlsdesc_lo12:thread_var
-///     .tlsdesccall thread_var
-///     blr x8
-///     (TPIDR_EL0 offset now in x0).
+/// is a function pointer to carry out the resolution.
 ///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
+/// The sequence is:
+///    adrp  x0, :tlsdesc:var
+///    ldr   x1, [x0, #:tlsdesc_lo12:var]
+///    add   x0, x0, #:tlsdesc_lo12:var
+///    .tlsdesccall var
+///    blr   x1
+///    (TPIDR_EL0 offset now in x0)
 ///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
-                                                   SDValue DescAddr, SDLoc DL,
-                                                   SelectionDAG &DAG) const {
+///  The above sequence must be produced unscheduled, to enable the linker to
+///  optimize/relax this sequence.
+///  Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+///  above sequence, and expanded really late in the compilation flow, to ensure
+///  the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+                                                      SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
 
-  // The function we need to call is simply the first entry in the GOT for this
-  // descriptor, load it in preparation.
-  SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
-
-  // TLS calls preserve all registers except those that absolutely must be
-  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
-  // silly).
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
-  // The function takes only one argument: the address of the descriptor itself
-  // in X0.
-  SDValue Glue, Chain;
-  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
-  Glue = Chain.getValue(1);
+  SDValue Chain = DAG.getEntryNode();
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 
-  // We're now ready to populate the argument list, as with a normal call:
-  SmallVector<SDValue, 6> Ops;
+  SmallVector<SDValue, 2> Ops;
   Ops.push_back(Chain);
-  Ops.push_back(Func);
   Ops.push_back(SymAddr);
-  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
-  Ops.push_back(DAG.getRegisterMask(Mask));
-  Ops.push_back(Glue);
 
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
-  Glue = Chain.getValue(1);
+  Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+  SDValue Glue = Chain.getValue(1);
 
   return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
 }
@@ -3114,9 +3095,18 @@ AArch64TargetLowering::LowerELFGlobalTLS
   assert(Subtarget->isTargetELF() && "This function expects an ELF target");
   assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
          "ELF TLS only supported in small memory model");
+  // Different choices can be made for the maximum size of the TLS area for a
+  // module. For the small address model, the default TLS size is 16MiB and the
+  // maximum TLS size is 4GiB.
+  // FIXME: add -mtls-size command line option and make it control the 16MiB
+  // vs. 4GiB code sequence generation.
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+  if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+    if (Model == TLSModel::LocalDynamic)
+      Model = TLSModel::GeneralDynamic;
+  }
 
   SDValue TPOff;
   EVT PtrVT = getPointerTy();
@@ -3127,17 +3117,20 @@ AArch64TargetLowering::LowerELFGlobalTLS
 
   if (Model == TLSModel::LocalExec) {
     SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
     SDValue LoVar = DAG.getTargetGlobalAddress(
         GV, DL, PtrVT, 0,
-        AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 
-    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
-                                       DAG.getTargetConstant(16, MVT::i32)),
-                    0);
-    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
-                                       DAG.getTargetConstant(0, MVT::i32)),
-                    0);
+    SDValue TPWithOff_lo =
+        SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+                                   HiVar, DAG.getTargetConstant(0, MVT::i32)),
+                0);
+    SDValue TPWithOff =
+        SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+                                   LoVar, DAG.getTargetConstant(0, MVT::i32)),
+                0);
+    return TPWithOff;
   } else if (Model == TLSModel::InitialExec) {
     TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
     TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3152,19 +3145,6 @@ AArch64TargetLowering::LowerELFGlobalTLS
         DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
     MFI->incNumLocalDynamicTLSAccesses();
 
-    // Accesses used in this sequence go via the TLS descriptor which lives in
-    // the GOT. Prepare an address we can use to handle this.
-    SDValue HiDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
-    SDValue LoDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT,
-        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
-    // First argument to the descriptor call is the address of the descriptor
-    // itself.
-    SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
     // The call needs a relocation too for linker relaxation. It doesn't make
     // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
     // the address.
@@ -3173,40 +3153,23 @@ AArch64TargetLowering::LowerELFGlobalTLS
 
     // Now we can calculate the offset from TPIDR_EL0 to this module's
     // thread-local area.
-    TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+    TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
 
     // Now use :dtprel_whatever: operations to calculate this variable's offset
     // in its thread-storage area.
     SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+        GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
     SDValue LoVar = DAG.getTargetGlobalAddress(
         GV, DL, MVT::i64, 0,
-        AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
-    SDValue DTPOff =
-        SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
-                                   DAG.getTargetConstant(16, MVT::i32)),
-                0);
-    DTPOff =
-        SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
-                                   DAG.getTargetConstant(0, MVT::i32)),
-                0);
-
-    TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
-  } else if (Model == TLSModel::GeneralDynamic) {
-    // Accesses used in this sequence go via the TLS descriptor which lives in
-    // the GOT. Prepare an address we can use to handle this.
-    SDValue HiDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
-    SDValue LoDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0,
         AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 
-    // First argument to the descriptor call is the address of the descriptor
-    // itself.
-    SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+    TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+                                       DAG.getTargetConstant(0, MVT::i32)),
+                    0);
+    TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+                                       DAG.getTargetConstant(0, MVT::i32)),
+                    0);
+  } else if (Model == TLSModel::GeneralDynamic) {
     // The call needs a relocation too for linker relaxation. It doesn't make
     // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
     // the address.
@@ -3214,7 +3177,7 @@ AArch64TargetLowering::LowerELFGlobalTLS
         DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
 
     // Finally we can make a call to calculate the offset from tpidr_el0.
-    TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+    TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
   } else
     llvm_unreachable("Unsupported ELF TLS access model");
 

Modified: head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h	Mon Mar 30 20:01:41 2015	(r280865)
@@ -29,9 +29,9 @@ enum {
   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
   CALL,         // Function call.
 
-  // Almost the same as a normal call node, except that a TLSDesc relocation is
-  // needed so the linker can relax it correctly if possible.
-  TLSDESC_CALL,
+  // Produces the full sequence of instructions for getting the thread pointer
+  // offset of a variable into X0, using the TLSDesc model.
+  TLSDESC_CALLSEQ,
   ADRP,     // Page address of a TargetGlobalAddress operand.
   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
   LOADgot,  // Load from automatically generated descriptor (e.g. Global
@@ -399,8 +399,8 @@ private:
   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
-                              SelectionDAG &DAG) const;
+  SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+                                 SelectionDAG &DAG) const;
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;

Modified: head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td	Mon Mar 30 20:01:41 2015	(r280865)
@@ -96,6 +96,19 @@ def SDT_AArch64ITOF  : SDTypeProfile<1, 
 
 def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
                                                  SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+//  adrp  x0, :tlsdesc:var
+//  ldr   x1, [x0, #:tlsdesc_lo12:var]
+//  add   x0, x0, #:tlsdesc_lo12:var
+//  .tlsdesccall var
+//  blr   x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+                                          [SDTCisPtrTy<0>]>;
+
 def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
                                         [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
                                          SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,10 +242,11 @@ def AArch64Prefetch        : SDNode<"AAr
 def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
 def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
 
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
-                                 SDT_AArch64TLSDescCall,
-                                 [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
-                                  SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+                                    SDT_AArch64TLSDescCallSeq,
+                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+                                     SDNPVariadic]>;
+
 
 def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
                                  SDT_AArch64WrapperLarge>;
@@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i6
   let AsmString = ".tlsdesccall $sym";
 }
 
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
-    : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
-             [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+    isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+    : Pseudo<(outs), (ins i64imm:$sym),
+             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+          (TLSDESC_CALLSEQ texternalsym:$sym)>;
 
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
-          (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
 //===----------------------------------------------------------------------===//
 // Conditional branch (immediate) instruction.
 //===----------------------------------------------------------------------===//

Modified: head/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp	Mon Mar 30 20:01:41 2015	(r280865)
@@ -22,9 +22,12 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
+
 AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
     : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
 
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbo
     if (MO.isGlobal()) {
       const GlobalValue *GV = MO.getGlobal();
       Model = Printer.TM.getTLSModel(GV);
+      if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+          Model == TLSModel::LocalDynamic)
+        Model = TLSModel::GeneralDynamic;
+
     } else {
       assert(MO.isSymbol() &&
              StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
              "unexpected external TLS symbol");
+      // The general dynamic access sequence is used to get the
+      // address of _TLS_MODULE_BASE_.
       Model = TLSModel::GeneralDynamic;
     }
     switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbo
     RefFlags |= AArch64MCExpr::VK_G1;
   else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
     RefFlags |= AArch64MCExpr::VK_G0;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+    RefFlags |= AArch64MCExpr::VK_HI12;
 
   if (MO.getTargetFlags() & AArch64II::MO_NC)
     RefFlags |= AArch64MCExpr::VK_NC;

Modified: head/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
==============================================================================
--- head/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h	Mon Mar 30 19:36:33 2015	(r280864)
+++ head/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h	Mon Mar 30 20:01:41 2015	(r280865)
@@ -1229,7 +1229,7 @@ namespace AArch64II {
 
     MO_NO_FLAG,
 
-    MO_FRAGMENT = 0x7,
+    MO_FRAGMENT = 0xf,
 
     /// MO_PAGE - A symbol operand with this flag represents the pc-relative
     /// offset of the 4K page containing the symbol.  This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
     /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
     MO_G0 = 6,
 
+    /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+    /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+    /// by-12-bits instruction.
+    MO_HI12 = 7,
+
     /// MO_GOT - This flag indicates that a symbol operand represents the
     /// address of the GOT entry for the symbol, rather than the address of
     /// the symbol itself.
-    MO_GOT = 8,
+    MO_GOT = 0x10,
 
     /// MO_NC - Indicates whether the linker is expected to check the symbol
     /// reference for overflow. For example in an ADRP/ADD pair of relocations
     /// the ADRP usually does check, but not the ADD.
-    MO_NC = 0x10,
+    MO_NC = 0x20,
 
     /// MO_TLS - Indicates that the operand being accessed is some kind of
     /// thread-local symbol. On Darwin, only one type of thread-local access
     /// exists (pre linker-relaxation), but on ELF the TLSModel used for the
     /// referee will affect interpretation.
-    MO_TLS = 0x20,
+    MO_TLS = 0x40,
 
     /// MO_CONSTPOOL - This flag indicates that a symbol operand represents
     /// the address of a constant pool entry for the symbol, rather than the
     /// address of the symbol itself.
-    MO_CONSTPOOL = 0x40
+    MO_CONSTPOOL = 0x80
   };
 } // end namespace AArch64II
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201503302001.t2UK1gse088144>