Date: Tue, 2 May 2017 18:31:09 +0000 (UTC) From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r317690 - in vendor/lld/dist: COFF ELF docs test/COFF test/COFF/Inputs test/ELF test/ELF/Inputs test/ELF/linkerscript Message-ID: <201705021831.v42IV9Qi001131@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: dim Date: Tue May 2 18:31:09 2017 New Revision: 317690 URL: https://svnweb.freebsd.org/changeset/base/317690 Log: Vendor import of lld trunk r301939: https://llvm.org/svn/llvm-project/lld/trunk@301939 Added: vendor/lld/dist/test/COFF/Inputs/constant-import.s (contents, props changed) vendor/lld/dist/test/COFF/constant.test vendor/lld/dist/test/COFF/icf-data.test vendor/lld/dist/test/ELF/Inputs/eh-frame-end.s (contents, props changed) vendor/lld/dist/test/ELF/debug-gnu-pubnames.s (contents, props changed) vendor/lld/dist/test/ELF/eh-frame-begin-end.s (contents, props changed) vendor/lld/dist/test/ELF/linkerscript/section-metadata.s (contents, props changed) Modified: vendor/lld/dist/COFF/Chunks.cpp vendor/lld/dist/COFF/Error.cpp vendor/lld/dist/COFF/ICF.cpp vendor/lld/dist/COFF/InputFiles.cpp vendor/lld/dist/COFF/InputFiles.h vendor/lld/dist/COFF/MapFile.cpp vendor/lld/dist/ELF/Config.h vendor/lld/dist/ELF/Driver.cpp vendor/lld/dist/ELF/Driver.h vendor/lld/dist/ELF/Error.cpp vendor/lld/dist/ELF/InputFiles.cpp vendor/lld/dist/ELF/InputFiles.h vendor/lld/dist/ELF/InputSection.cpp vendor/lld/dist/ELF/LTO.cpp vendor/lld/dist/ELF/LinkerScript.cpp vendor/lld/dist/ELF/LinkerScript.h vendor/lld/dist/ELF/MapFile.cpp vendor/lld/dist/ELF/Options.td vendor/lld/dist/ELF/OutputSections.cpp vendor/lld/dist/ELF/OutputSections.h vendor/lld/dist/ELF/Strings.h vendor/lld/dist/ELF/SymbolTable.cpp vendor/lld/dist/ELF/SyntheticSections.cpp vendor/lld/dist/ELF/Writer.cpp vendor/lld/dist/docs/index.rst vendor/lld/dist/test/COFF/lldmap.test vendor/lld/dist/test/ELF/abs-conflict.s vendor/lld/dist/test/ELF/driver-access.test vendor/lld/dist/test/ELF/gdb-index.s vendor/lld/dist/test/ELF/linkerscript/sections.s vendor/lld/dist/test/ELF/map-file.s vendor/lld/dist/test/ELF/relocation-in-merge.s Modified: vendor/lld/dist/COFF/Chunks.cpp ============================================================================== --- vendor/lld/dist/COFF/Chunks.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/COFF/Chunks.cpp Tue May 2 18:31:09 2017 (r317690) @@ -326,41 +326,38 @@ void SEHTableChunk::writeTo(uint8_t *Buf // usually loaded to that address. However, if there's already another // DLL that overlaps, the loader has to relocate it. To do that, DLLs // contain .reloc sections which contain offsets that need to be fixed -// up at runtime. If the loader find that a DLL cannot be loaded to its +// up at runtime. If the loader finds that a DLL cannot be loaded to its // desired base address, it loads it to somewhere else, and add <actual // base address> - <desired base address> to each offset that is -// specified by .reloc section. +// specified by the .reloc section. In ELF terms, .reloc sections +// contain relative relocations in REL format (as opposed to RELA.) // -// In ELF terms, .reloc sections contain arrays of relocation offsets. -// All these offsets in the section are implicitly R_*_RELATIVE, and -// addends are read from section contents (so it is REL as opposed to -// RELA). +// This already significantly reduces the size of relocations compared +// to ELF .rel.dyn, but Windows does more to reduce it (probably because +// it was invented for PCs in the late '80s or early '90s.) Offsets in +// .reloc are grouped by page where the page size is 12 bits, and +// offsets sharing the same page address are stored consecutively to +// represent them with less space. This is very similar to the page +// table which is grouped by (multiple stages of) pages. // -// This already reduce the size of relocations to 1/3 compared to ELF -// .dynrel, but Windows does more to reduce it (probably because it was -// invented for PCs in the late '80s or early '90s.) Offsets in .reloc -// are grouped by page where page size is 16 bits, and offsets sharing -// the same page address are stored consecutively to represent them with -// less space. This is a very similar to the page table which is grouped -// by (multiple stages of) pages. -// -// For example, let's say we have 0x00030, 0x00500, 0x01000, 0x01100, -// 0x20004, and 0x20008 in a .reloc section. In the section, they are -// represented like this: +// For example, let's say we have 0x00030, 0x00500, 0x00700, 0x00A00, +// 0x20004, and 0x20008 in a .reloc section for x64. The uppermost 4 +// bits have a type IMAGE_REL_BASED_DIR64 or 0xA. In the section, they +// are represented like this: // // 0x00000 -- page address (4 bytes) // 16 -- size of this block (4 bytes) -// 0x0030 -- entries (2 bytes each) -// 0x0500 -// 0x1000 -// 0x1100 +// 0xA030 -- entries (2 bytes each) +// 0xA500 +// 0xA700 +// 0xAA00 // 0x20000 -- page address (4 bytes) // 12 -- size of this block (4 bytes) -// 0x0004 -- entries (2 bytes each) -// 0x0008 +// 0xA004 -- entries (2 bytes each) +// 0xA008 // -// Usually we have a lot of relocatinos for each page, so the number of -// bytes for one .reloc entry is close to 2 bytes. +// Usually we have a lot of relocations for each page, so the number of +// bytes for one .reloc entry is close to 2 bytes on average. BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { // Block header consists of 4 byte page RVA and 4 byte block size. // Each entry is 2 byte. Last entry may be padding. Modified: vendor/lld/dist/COFF/Error.cpp ============================================================================== --- vendor/lld/dist/COFF/Error.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/COFF/Error.cpp Tue May 2 18:31:09 2017 (r317690) @@ -59,6 +59,7 @@ void log(const Twine &Msg) { if (Config->Verbose) { std::lock_guard<std::mutex> Lock(Mu); outs() << Argv0 << ": " << Msg << "\n"; + outs().flush(); } } Modified: vendor/lld/dist/COFF/ICF.cpp ============================================================================== --- vendor/lld/dist/COFF/ICF.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/COFF/ICF.cpp Tue May 2 18:31:09 2017 (r317690) @@ -71,10 +71,18 @@ uint32_t ICF::getHash(SectionChunk *C) { } // Returns true if section S is subject of ICF. +// +// Microsoft's documentation +// (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April +// 2017) says that /opt:icf folds both functions and read-only data. +// Despite that, the MSVC linker folds only functions. We found +// a few instances of programs that are not safe for data merging. +// Therefore, we merge only functions just like the MSVC tool. bool ICF::isEligible(SectionChunk *C) { bool Global = C->Sym && C->Sym->isExternal(); + bool Executable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE; bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; - return C->isCOMDAT() && C->isLive() && Global && !Writable; + return C->isCOMDAT() && C->isLive() && Global && Executable && !Writable; } // Split a range into smaller ranges by recoloring sections Modified: vendor/lld/dist/COFF/InputFiles.cpp ============================================================================== --- vendor/lld/dist/COFF/InputFiles.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/COFF/InputFiles.cpp Tue May 2 18:31:09 2017 (r317690) @@ -327,6 +327,9 @@ void ImportFile::parse() { ImpSym = cast<DefinedImportData>( Symtab->addImportData(ImpName, this)->body()); + if (Hdr->getType() == llvm::COFF::IMPORT_CONST) + ConstSym = + cast<DefinedImportData>(Symtab->addImportData(Name, this)->body()); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call Modified: vendor/lld/dist/COFF/InputFiles.h ============================================================================== --- vendor/lld/dist/COFF/InputFiles.h Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/COFF/InputFiles.h Tue May 2 18:31:09 2017 (r317690) @@ -167,6 +167,7 @@ public: static bool classof(const InputFile *F) { return F->kind() == ImportKind; } DefinedImportData *ImpSym = nullptr; + DefinedImportData *ConstSym = nullptr; DefinedImportThunk *ThunkSym = nullptr; std::string DLLName; Modified: vendor/lld/dist/COFF/MapFile.cpp ============================================================================== --- vendor/lld/dist/COFF/MapFile.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/COFF/MapFile.cpp Tue May 2 18:31:09 2017 (r317690) @@ -11,21 +11,21 @@ // hierarchically the output sections, input sections, input files and // symbol: // -// Address Size Align Out In File Symbol -// ================================================================= -// 00201000 00000015 4 .text -// 00201000 0000000e 4 .text -// 00201000 0000000e 4 test.o -// 0020100e 00000000 0 local -// 00201005 00000000 0 f(int) +// Address Size Align Out File Symbol +// 00201000 00000015 4 .text +// 00201000 0000000e 4 test.o:(.text) +// 0020100e 00000000 0 local +// 00201005 00000000 0 f(int) // //===----------------------------------------------------------------------===// #include "MapFile.h" #include "Error.h" +#include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" +#include "lld/Core/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -34,72 +34,58 @@ using namespace llvm::object; using namespace lld; using namespace lld::coff; -static void writeOutSecLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - uint64_t Align, StringRef Name) { - OS << format("%08llx %08llx %5lld ", Address, Size, Align) - << left_justify(Name, 7); -} - -static void writeInSecLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - uint64_t Align, StringRef Name) { - // Pass an empty name to align the text to the correct column. - writeOutSecLine(OS, Address, Size, Align, ""); - OS << ' ' << left_justify(Name, 7); -} - -static void writeFileLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - uint64_t Align, StringRef Name) { - // Pass an empty name to align the text to the correct column. - writeInSecLine(OS, Address, Size, Align, ""); - OS << ' ' << left_justify(Name, 7); -} +typedef DenseMap<const SectionChunk *, SmallVector<DefinedRegular *, 4>> + SymbolMapTy; -static void writeSymbolLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - StringRef Name) { - // Pass an empty name to align the text to the correct column. - writeFileLine(OS, Address, Size, 0, ""); - OS << ' ' << left_justify(Name, 7); -} - -static void writeSectionChunk(raw_fd_ostream &OS, const SectionChunk *SC, - StringRef &PrevName) { - StringRef Name = SC->getSectionName(); - if (Name != PrevName) { - writeInSecLine(OS, SC->getRVA(), SC->getSize(), SC->getAlign(), Name); - OS << '\n'; - PrevName = Name; - } - coff::ObjectFile *File = SC->File; - if (!File) - return; - writeFileLine(OS, SC->getRVA(), SC->getSize(), SC->getAlign(), - toString(File)); - OS << '\n'; - ArrayRef<SymbolBody *> Syms = File->getSymbols(); - for (SymbolBody *Sym : Syms) { - auto *DR = dyn_cast<DefinedRegular>(Sym); - if (!DR || DR->getChunk() != SC || - DR->getCOFFSymbol().isSectionDefinition()) - continue; - writeSymbolLine(OS, DR->getRVA(), SC->getSize(), toString(*Sym)); - OS << '\n'; +// Print out the first three columns of a line. +static void writeHeader(raw_ostream &OS, uint64_t Addr, uint64_t Size, + uint64_t Align) { + OS << format("%08llx %08llx %5lld ", Addr, Size, Align); +} + +static std::string indent(int Depth) { return std::string(Depth * 8, ' '); } + +// Returns a list of all symbols that we want to print out. +static std::vector<DefinedRegular *> getSymbols() { + std::vector<DefinedRegular *> V; + for (coff::ObjectFile *File : Symtab->ObjectFiles) + for (SymbolBody *B : File->getSymbols()) + if (auto *Sym = dyn_cast<DefinedRegular>(B)) + if (Sym && !Sym->getCOFFSymbol().isSectionDefinition()) + V.push_back(Sym); + return V; +} + +// Returns a map from sections to their symbols. +static SymbolMapTy getSectionSyms(ArrayRef<DefinedRegular *> Syms) { + SymbolMapTy Ret; + for (DefinedRegular *S : Syms) + Ret[S->getChunk()].push_back(S); + + // Sort symbols by address. + for (auto &It : Ret) { + SmallVectorImpl<DefinedRegular *> &V = It.second; + std::sort(V.begin(), V.end(), [](DefinedRegular *A, DefinedRegular *B) { + return A->getRVA() < B->getRVA(); + }); } + return Ret; } -static void writeMapFile2(raw_fd_ostream &OS, - ArrayRef<OutputSection *> OutputSections) { - OS << "Address Size Align Out In File Symbol\n"; - - for (OutputSection *Sec : OutputSections) { - uint32_t VA = Sec->getRVA(); - writeOutSecLine(OS, VA, Sec->getVirtualSize(), /*Align=*/PageSize, - Sec->getName()); - OS << '\n'; - StringRef PrevName = ""; - for (Chunk *C : Sec->getChunks()) - if (const auto *SC = dyn_cast<SectionChunk>(C)) - writeSectionChunk(OS, SC, PrevName); - } +// Construct a map from symbols to their stringified representations. +static DenseMap<DefinedRegular *, std::string> +getSymbolStrings(ArrayRef<DefinedRegular *> Syms) { + std::vector<std::string> Str(Syms.size()); + parallel_for((size_t)0, Syms.size(), [&](size_t I) { + raw_string_ostream OS(Str[I]); + writeHeader(OS, Syms[I]->getRVA(), 0, 0); + OS << indent(2) << toString(*Syms[I]); + }); + + DenseMap<DefinedRegular *, std::string> Ret; + for (size_t I = 0, E = Syms.size(); I < E; ++I) + Ret[Syms[I]] = std::move(Str[I]); + return Ret; } void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) { @@ -110,5 +96,30 @@ void coff::writeMapFile(ArrayRef<OutputS raw_fd_ostream OS(Config->MapFile, EC, sys::fs::F_None); if (EC) fatal("cannot open " + Config->MapFile + ": " + EC.message()); - writeMapFile2(OS, OutputSections); + + // Collect symbol info that we want to print out. + std::vector<DefinedRegular *> Syms = getSymbols(); + SymbolMapTy SectionSyms = getSectionSyms(Syms); + DenseMap<DefinedRegular *, std::string> SymStr = getSymbolStrings(Syms); + + // Print out the header line. + OS << "Address Size Align Out In Symbol\n"; + + // Print out file contents. + for (OutputSection *Sec : OutputSections) { + writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize); + OS << Sec->getName() << '\n'; + + for (Chunk *C : Sec->getChunks()) { + auto *SC = dyn_cast<SectionChunk>(C); + if (!SC) + continue; + + writeHeader(OS, SC->getRVA(), SC->getSize(), SC->getAlign()); + OS << indent(1) << SC->File->getName() << ":(" << SC->getSectionName() + << ")\n"; + for (DefinedRegular *Sym : SectionSyms[SC]) + OS << SymStr[Sym] << '\n'; + } + } } Modified: vendor/lld/dist/ELF/Config.h ============================================================================== --- vendor/lld/dist/ELF/Config.h Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/Config.h Tue May 2 18:31:09 2017 (r317690) @@ -89,7 +89,7 @@ struct Configuration { llvm::StringRef SoName; llvm::StringRef Sysroot; llvm::StringRef ThinLTOCacheDir; - std::string RPath; + std::string Rpath; std::vector<VersionDefinition> VersionDefinitions; std::vector<llvm::StringRef> AuxiliaryList; std::vector<llvm::StringRef> SearchPaths; Modified: vendor/lld/dist/ELF/Driver.cpp ============================================================================== --- vendor/lld/dist/ELF/Driver.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/Driver.cpp Tue May 2 18:31:09 2017 (r317690) @@ -124,7 +124,7 @@ static std::tuple<ELFKind, uint16_t, uin // Returns slices of MB by parsing MB as an archive file. // Each slice consists of a member file in the archive. std::vector<MemoryBufferRef> -LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { +static getArchiveMembers(MemoryBufferRef MB) { std::unique_ptr<Archive> File = check(Archive::create(MB), MB.getBufferIdentifier() + ": failed to parse archive"); @@ -242,6 +242,9 @@ static void checkOptions(opt::InputArgLi if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); + if (!Config->Shared && !Config->AuxiliaryList.empty()) + error("-f may not be used without -shared"); + if (Config->Relocatable) { if (Config->Shared) error("-r and -shared may not be used together"); @@ -396,7 +399,7 @@ static std::vector<StringRef> getArgs(op return V; } -static std::string getRPath(opt::InputArgList &Args) { +static std::string getRpath(opt::InputArgList &Args) { std::vector<StringRef> V = getArgs(Args, OPT_rpath); return llvm::join(V.begin(), V.end(), ":"); } @@ -444,16 +447,14 @@ static UnresolvedPolicy getUnresolvedSym } static Target2Policy getTarget2(opt::InputArgList &Args) { - if (auto *Arg = Args.getLastArg(OPT_target2)) { - StringRef S = Arg->getValue(); - if (S == "rel") - return Target2Policy::Rel; - if (S == "abs") - return Target2Policy::Abs; - if (S == "got-rel") - return Target2Policy::GotRel; - error("unknown --target2 option: " + S); - } + StringRef S = getString(Args, OPT_target2, "got-rel"); + if (S == "rel") + return Target2Policy::Rel; + if (S == "abs") + return Target2Policy::Abs; + if (S == "got-rel") + return Target2Policy::GotRel; + error("unknown --target2 option: " + S); return Target2Policy::GotRel; } @@ -550,6 +551,29 @@ static std::pair<bool, bool> getHashStyl return {true, true}; } +// Parse --build-id or --build-id=<style>. We handle "tree" as a +// synonym for "sha1" because all our hash functions including +// -build-id=sha1 are actually tree hashes for performance reasons. +static std::pair<BuildIdKind, std::vector<uint8_t>> +getBuildId(opt::InputArgList &Args) { + if (Args.hasArg(OPT_build_id)) + return {BuildIdKind::Fast, {}}; + + StringRef S = getString(Args, OPT_build_id_eq, "none"); + if (S == "md5") + return {BuildIdKind::Md5, {}}; + if (S == "sha1" || S == "tree") + return {BuildIdKind::Sha1, {}}; + if (S == "uuid") + return {BuildIdKind::Uuid, {}}; + if (S.startswith("0x")) + return {BuildIdKind::Hexstring, parseHex(S.substr(2))}; + + if (S != "none") + error("unknown --build-id style: " + S); + return {BuildIdKind::None, {}}; +} + static std::vector<StringRef> getLines(MemoryBufferRef MB) { SmallVector<StringRef, 0> Arr; MB.getBuffer().split(Arr, '\n'); @@ -564,14 +588,14 @@ static std::vector<StringRef> getLines(M } static bool getCompressDebugSections(opt::InputArgList &Args) { - if (auto *Arg = Args.getLastArg(OPT_compress_debug_sections)) { - StringRef S = Arg->getValue(); - if (S == "zlib") - return zlib::isAvailable(); - if (S != "none") - error("unknown --compress-debug-sections value: " + S); - } - return false; + StringRef S = getString(Args, OPT_compress_debug_sections, "none"); + if (S == "none") + return false; + if (S != "zlib") + error("unknown --compress-debug-sections value: " + S); + if (!zlib::isAvailable()) + error("--compress-debug-sections: zlib is not available"); + return true; } // Initializes Config members by the command line options. @@ -616,7 +640,7 @@ void LinkerDriver::readConfigs(opt::Inpu Config->OutputFile = getString(Args, OPT_o); Config->Pie = getArg(Args, OPT_pie, OPT_nopie, false); Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); - Config->RPath = getRPath(Args); + Config->Rpath = getRpath(Args); Config->Relocatable = Args.hasArg(OPT_relocatable); Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->SearchPaths = getArgs(Args, OPT_L); @@ -679,32 +703,7 @@ void LinkerDriver::readConfigs(opt::Inpu Config->ZRelro = false; std::tie(Config->SysvHash, Config->GnuHash) = getHashStyle(Args); - - // Parse --build-id or --build-id=<style>. We handle "tree" as a - // synonym for "sha1" because all of our hash functions including - // -build-id=sha1 are tree hashes for performance reasons. - if (Args.hasArg(OPT_build_id)) - Config->BuildId = BuildIdKind::Fast; - if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { - StringRef S = Arg->getValue(); - if (S == "md5") { - Config->BuildId = BuildIdKind::Md5; - } else if (S == "sha1" || S == "tree") { - Config->BuildId = BuildIdKind::Sha1; - } else if (S == "uuid") { - Config->BuildId = BuildIdKind::Uuid; - } else if (S == "none") { - Config->BuildId = BuildIdKind::None; - } else if (S.startswith("0x")) { - Config->BuildId = BuildIdKind::Hexstring; - Config->BuildIdVector = parseHex(S.substr(2)); - } else { - error("unknown --build-id style: " + S); - } - } - - if (!Config->Shared && !Config->AuxiliaryList.empty()) - error("-f may not be used without -shared"); + std::tie(Config->BuildId, Config->BuildIdVector) = getBuildId(Args); if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) Modified: vendor/lld/dist/ELF/Driver.h ============================================================================== --- vendor/lld/dist/ELF/Driver.h Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/Driver.h Tue May 2 18:31:09 2017 (r317690) @@ -31,7 +31,6 @@ public: void addLibrary(StringRef Name); private: - std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB); void readConfigs(llvm::opt::InputArgList &Args); void createFiles(llvm::opt::InputArgList &Args); void inferMachineType(); Modified: vendor/lld/dist/ELF/Error.cpp ============================================================================== --- vendor/lld/dist/ELF/Error.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/Error.cpp Tue May 2 18:31:09 2017 (r317690) @@ -60,6 +60,7 @@ void elf::log(const Twine &Msg) { if (Config->Verbose) { std::lock_guard<std::mutex> Lock(Mu); outs() << Argv0 << ": " << Msg << "\n"; + outs().flush(); } } Modified: vendor/lld/dist/ELF/InputFiles.cpp ============================================================================== --- vendor/lld/dist/ELF/InputFiles.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/InputFiles.cpp Tue May 2 18:31:09 2017 (r317690) @@ -123,10 +123,10 @@ std::string elf::ObjectFile<ELFT>::getLi return ""; } -// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +// Returns "<internal>", "foo.a(bar.o)" or "baz.o". std::string lld::toString(const InputFile *F) { if (!F) - return "(internal)"; + return "<internal>"; if (F->ToStringCache.empty()) { if (F->ArchiveName.empty()) @@ -137,15 +137,13 @@ std::string lld::toString(const InputFil return F->ToStringCache; } -template <class ELFT> static ELFKind getELFKind() { - if (ELFT::TargetEndianness == support::little) - return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; - return ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; -} - template <class ELFT> ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { - EKind = getELFKind<ELFT>(); + if (ELFT::TargetEndianness == support::little) + EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; + else + EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; + EMachine = getObj().getHeader()->e_machine; OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; } @@ -174,8 +172,10 @@ void ELFFileBase<ELFT>::initSymtab(Array } template <class ELFT> -elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) - : ELFFileBase<ELFT>(Base::ObjectKind, M) {} +elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M, StringRef ArchiveName) + : ELFFileBase<ELFT>(Base::ObjectKind, M) { + this->ArchiveName = ArchiveName; +} template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { @@ -361,6 +361,15 @@ InputSectionBase *elf::ObjectFile<ELFT>: return Target; } +// Create a regular InputSection class that has the same contents +// as a given section. +InputSectionBase *toRegularSection(MergeInputSection *Sec) { + auto *Ret = make<InputSection>(Sec->Flags, Sec->Type, Sec->Alignment, + Sec->Data, Sec->Name); + Ret->File = Sec->File; + return Ret; +} + template <class ELFT> InputSectionBase * elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, @@ -398,9 +407,18 @@ elf::ObjectFile<ELFT>::createInputSectio if (Target->FirstRelocation) fatal(toString(this) + ": multiple relocation sections to one section are not supported"); - if (isa<MergeInputSection>(Target)) - fatal(toString(this) + - ": relocations pointing to SHF_MERGE are not supported"); + + // Mergeable sections with relocations are tricky because relocations + // need to be taken into account when comparing section contents for + // merging. It doesn't worth supporting such mergeable sections because + // they are rare and it'd complicates the internal design (we usually + // have to determine if two sections are mergeable early in the link + // process much before applying relocations). We simply handle mergeable + // sections with relocations as non-mergeable. + if (auto *MS = dyn_cast<MergeInputSection>(Target)) { + Target = toRegularSection(MS); + this->Sections[Sec.sh_info] = Target; + } size_t NumRelocations; if (Sec.sh_type == SHT_RELA) { @@ -461,6 +479,15 @@ elf::ObjectFile<ELFT>::createInputSectio if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) return &InputSection::Discarded; + // If -gdb-index is given, LLD creates .gdb_index section, and that + // section serves the same purpose as .debug_gnu_pub{names,types} sections. + // If that's the case, we want to eliminate .debug_gnu_pub{names,types} + // because they are redundant and can waste large amount of disk space + // (for example, they are about 400 MiB in total for a clang debug build.) + if (Config->GdbIndex && + (Name == ".debug_gnu_pubnames" || Name == ".debug_gnu_pubtypes")) + return &InputSection::Discarded; + // The linkonce feature is a sort of proto-comdat. Some glibc i386 object // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce // sections. Drop those sections to avoid duplicate symbol errors. @@ -665,7 +692,7 @@ template <class ELFT> void SharedFile<EL uint64_t Val = Dyn.getVal(); if (Val >= this->StringTable.size()) fatal(toString(this) + ": invalid DT_SONAME entry"); - SoName = StringRef(this->StringTable.data() + Val); + SoName = this->StringTable.data() + Val; return; } } @@ -748,7 +775,7 @@ template <class ELFT> void SharedFile<EL // with explicit versions. if (V) { StringRef VerName = this->StringTable.data() + V->getAux()->vda_name; - Name = Saver.save(Twine(Name) + "@" + VerName); + Name = Saver.save(Name + "@" + VerName); elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); } } @@ -862,76 +889,50 @@ void BitcodeFile::parse(DenseSet<CachedH Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); } -// Small bit of template meta programming to handle the SharedFile constructor -// being the only one with a DefaultSoName parameter. -template <template <class> class T, class E> -typename std::enable_if<std::is_same<T<E>, SharedFile<E>>::value, - InputFile *>::type -createELFAux(MemoryBufferRef MB, StringRef DefaultSoName) { - return make<T<E>>(MB, DefaultSoName); -} -template <template <class> class T, class E> -typename std::enable_if<!std::is_same<T<E>, SharedFile<E>>::value, - InputFile *>::type -createELFAux(MemoryBufferRef MB, StringRef DefaultSoName) { - return make<T<E>>(MB); -} - -template <template <class> class T> -static InputFile *createELFFile(MemoryBufferRef MB, StringRef DefaultSoName) { +static ELFKind getELFKind(MemoryBufferRef MB) { unsigned char Size; unsigned char Endian; std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); + if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) fatal(MB.getBufferIdentifier() + ": invalid data encoding"); + if (Size != ELFCLASS32 && Size != ELFCLASS64) + fatal(MB.getBufferIdentifier() + ": invalid file class"); size_t BufSize = MB.getBuffer().size(); if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) fatal(MB.getBufferIdentifier() + ": file is too short"); - InputFile *Obj; - if (Size == ELFCLASS32 && Endian == ELFDATA2LSB) - Obj = createELFAux<T, ELF32LE>(MB, DefaultSoName); - else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB) - Obj = createELFAux<T, ELF32BE>(MB, DefaultSoName); - else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB) - Obj = createELFAux<T, ELF64LE>(MB, DefaultSoName); - else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB) - Obj = createELFAux<T, ELF64BE>(MB, DefaultSoName); - else - fatal(MB.getBufferIdentifier() + ": invalid file class"); - - if (!Config->FirstElf) - Config->FirstElf = Obj; - return Obj; + if (Size == ELFCLASS32) + return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; + return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; } template <class ELFT> void BinaryFile::parse() { - StringRef Buf = MB.getBuffer(); - ArrayRef<uint8_t> Data = - makeArrayRef<uint8_t>((const uint8_t *)Buf.data(), Buf.size()); - - std::string Filename = MB.getBufferIdentifier(); - std::transform(Filename.begin(), Filename.end(), Filename.begin(), - [](char C) { return isalnum(C) ? C : '_'; }); - Filename = "_binary_" + Filename; - StringRef StartName = Saver.save(Twine(Filename) + "_start"); - StringRef EndName = Saver.save(Twine(Filename) + "_end"); - StringRef SizeName = Saver.save(Twine(Filename) + "_size"); - + ArrayRef<uint8_t> Data = toArrayRef(MB.getBuffer()); auto *Section = make<InputSection>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, Data, ".data"); Sections.push_back(Section); - elf::Symtab<ELFT>::X->addRegular(StartName, STV_DEFAULT, STT_OBJECT, 0, 0, - STB_GLOBAL, Section, nullptr); - elf::Symtab<ELFT>::X->addRegular(EndName, STV_DEFAULT, STT_OBJECT, - Data.size(), 0, STB_GLOBAL, Section, - nullptr); - elf::Symtab<ELFT>::X->addRegular(SizeName, STV_DEFAULT, STT_OBJECT, - Data.size(), 0, STB_GLOBAL, nullptr, + // For each input file foo that is embedded to a result as a binary + // blob, we define _binary_foo_{start,end,size} symbols, so that + // user programs can access blobs by name. Non-alphanumeric + // characters in a filename are replaced with underscore. + std::string S = "_binary_" + MB.getBufferIdentifier().str(); + for (size_t I = 0; I < S.size(); ++I) + if (!isalnum(S[I])) + S[I] = '_'; + + elf::Symtab<ELFT>::X->addRegular(Saver.save(S + "_start"), STV_DEFAULT, + STT_OBJECT, 0, 0, STB_GLOBAL, Section, nullptr); + elf::Symtab<ELFT>::X->addRegular(Saver.save(S + "_end"), STV_DEFAULT, + STT_OBJECT, Data.size(), 0, STB_GLOBAL, + Section, nullptr); + elf::Symtab<ELFT>::X->addRegular(Saver.save(S + "_size"), STV_DEFAULT, + STT_OBJECT, Data.size(), 0, STB_GLOBAL, + nullptr, nullptr); } static bool isBitcode(MemoryBufferRef MB) { @@ -941,15 +942,36 @@ static bool isBitcode(MemoryBufferRef MB InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, uint64_t OffsetInArchive) { - InputFile *F = isBitcode(MB) - ? make<BitcodeFile>(MB, ArchiveName, OffsetInArchive) - : createELFFile<ObjectFile>(MB, ""); - F->ArchiveName = ArchiveName; - return F; + if (isBitcode(MB)) + return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive); + + switch (getELFKind(MB)) { + case ELF32LEKind: + return make<ObjectFile<ELF32LE>>(MB, ArchiveName); + case ELF32BEKind: + return make<ObjectFile<ELF32BE>>(MB, ArchiveName); + case ELF64LEKind: + return make<ObjectFile<ELF64LE>>(MB, ArchiveName); + case ELF64BEKind: + return make<ObjectFile<ELF64BE>>(MB, ArchiveName); + default: + llvm_unreachable("getELFKind"); + } } InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) { - return createELFFile<SharedFile>(MB, DefaultSoName); + switch (getELFKind(MB)) { + case ELF32LEKind: + return make<SharedFile<ELF32LE>>(MB, DefaultSoName); + case ELF32BEKind: + return make<SharedFile<ELF32BE>>(MB, DefaultSoName); + case ELF64LEKind: + return make<SharedFile<ELF64LE>>(MB, DefaultSoName); + case ELF64BEKind: + return make<SharedFile<ELF64BE>>(MB, DefaultSoName); + default: + llvm_unreachable("getELFKind"); + } } MemoryBufferRef LazyObjectFile::getBuffer() { @@ -1004,17 +1026,18 @@ std::vector<StringRef> LazyObjectFile::g if (isBitcode(this->MB)) return getBitcodeSymbols(); - unsigned char Size; - unsigned char Endian; - std::tie(Size, Endian) = getElfArchType(this->MB.getBuffer()); - if (Size == ELFCLASS32) { - if (Endian == ELFDATA2LSB) - return getElfSymbols<ELF32LE>(); + switch (getELFKind(this->MB)) { + case ELF32LEKind: + return getElfSymbols<ELF32LE>(); + case ELF32BEKind: return getElfSymbols<ELF32BE>(); - } - if (Endian == ELFDATA2LSB) + case ELF64LEKind: return getElfSymbols<ELF64LE>(); - return getElfSymbols<ELF64BE>(); + case ELF64BEKind: + return getElfSymbols<ELF64BE>(); + default: + llvm_unreachable("getELFKind"); + } } template void ArchiveFile::parse<ELF32LE>(); Modified: vendor/lld/dist/ELF/InputFiles.h ============================================================================== --- vendor/lld/dist/ELF/InputFiles.h Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/InputFiles.h Tue May 2 18:31:09 2017 (r317690) @@ -156,7 +156,7 @@ public: ArrayRef<SymbolBody *> getSymbols(); ArrayRef<SymbolBody *> getLocalSymbols(); - explicit ObjectFile(MemoryBufferRef M); + ObjectFile(MemoryBufferRef M, StringRef ArchiveName); void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); InputSectionBase *getSection(const Elf_Sym &Sym) const; Modified: vendor/lld/dist/ELF/InputSection.cpp ============================================================================== --- vendor/lld/dist/ELF/InputSection.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/InputSection.cpp Tue May 2 18:31:09 2017 (r317690) @@ -39,9 +39,7 @@ std::vector<InputSectionBase *> elf::Inp // Returns a string to construct an error message. std::string lld::toString(const InputSectionBase *Sec) { - // File can be absent if section is synthetic. - std::string FileName = Sec->File ? Sec->File->getName() : "<internal>"; - return (FileName + ":(" + Sec->Name + ")").str(); + return (toString(Sec->File) + ":(" + Sec->Name + ")").str(); } template <class ELFT> Modified: vendor/lld/dist/ELF/LTO.cpp ============================================================================== --- vendor/lld/dist/ELF/LTO.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/LTO.cpp Tue May 2 18:31:09 2017 (r317690) @@ -105,6 +105,11 @@ BitcodeCompiler::~BitcodeCompiler() = de static void undefine(Symbol *S) { replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false, STV_DEFAULT, S->body()->Type, nullptr); + // It shouldn't normally matter what the binding is, but if a bug in the LTO + // implementation causes it to fail to provide a definition for a symbol, + // setting the binding to STB_GLOBAL will cause the linker to report an + // undefined symbol error, even if the definition was weak. + S->Binding = STB_GLOBAL; } void BitcodeCompiler::add(BitcodeFile &F) { Modified: vendor/lld/dist/ELF/LinkerScript.cpp ============================================================================== --- vendor/lld/dist/ELF/LinkerScript.cpp Tue May 2 18:31:05 2017 (r317689) +++ vendor/lld/dist/ELF/LinkerScript.cpp Tue May 2 18:31:09 2017 (r317690) @@ -406,8 +406,15 @@ void LinkerScript::processCommands(Outpu } // Add input sections to an output section. - for (InputSectionBase *S : V) - Factory.addInputSec(S, Cmd->Name); + unsigned Pos = 0; + for (InputSectionBase *S : V) { + // The actual offset will be computed during + // assignAddresses. For now, use the index as a very crude + // approximation so that it is at least easy for other code to + // know the section order. + cast<InputSection>(S)->OutSecOff = Pos++; + Factory.addInputSec(S, Cmd->Name, Cmd->Sec); + } } } CurOutSec = nullptr; @@ -465,9 +472,26 @@ void LinkerScript::fabricateDefaultComma // Add sections that didn't match any sections command. void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) { - for (InputSectionBase *S : InputSections) - if (S->Live && !S->OutSec) - Factory.addInputSec(S, getOutputSectionName(S->Name)); + for (InputSectionBase *S : InputSections) { + if (!S->Live || S->OutSec) + continue; + StringRef Name = getOutputSectionName(S->Name); + auto I = std::find_if( + Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) + return Cmd->Name == Name; + return false; + }); + if (I == Opt.Commands.end()) { + Factory.addInputSec(S, Name); + } else { + auto *Cmd = cast<OutputSectionCommand>(*I); + Factory.addInputSec(S, Name, Cmd->Sec); + auto *ISD = make<InputSectionDescription>(""); + ISD->Sections.push_back(S); + Cmd->Commands.push_back(ISD); + } + } } static bool isTbss(OutputSection *Sec) { @@ -475,8 +499,6 @@ static bool isTbss(OutputSection *Sec) { } void LinkerScript::output(InputSection *S) { - if (!AlreadyOutputIS.insert(S).second) - return; bool IsTbss = isTbss(CurOutSec); uint64_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; @@ -508,19 +530,9 @@ void LinkerScript::output(InputSection * Dot = Pos; } -void LinkerScript::flush() { - assert(CurOutSec); - if (!AlreadyOutputOS.insert(CurOutSec).second) - return; - for (InputSection *I : CurOutSec->Sections) - output(I); -} - void LinkerScript::switchTo(OutputSection *Sec) { if (CurOutSec == Sec) return; - if (AlreadyOutputOS.count(Sec)) - return; CurOutSec = Sec; @@ -571,19 +583,11 @@ void LinkerScript::process(BaseCommand & if (!Sec->Live) continue; - assert(CurOutSec == Sec->OutSec || AlreadyOutputOS.count(Sec->OutSec)); + assert(CurOutSec == Sec->OutSec); output(cast<InputSection>(Sec)); } } -static OutputSection * -findSection(StringRef Name, const std::vector<OutputSection *> &Sections) { - for (OutputSection *Sec : Sections) - if (Sec->Name == Name) - return Sec; - return nullptr; -} - // This function searches for a memory region to place the given output // section in. If found, a pointer to the appropriate memory region is // returned. Otherwise, a nullptr is returned. @@ -638,19 +642,8 @@ void LinkerScript::assignOffsets(OutputS Dot = CurMemRegion->Offset; switchTo(Sec); - // flush() may add orphan sections, so the order of flush() and - // symbol assignments is important. We want to call flush() first so - // that symbols pointing the end of the current section points to - // the location after orphan sections. - auto Mid = - std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), - [](BaseCommand *Cmd) { return !isa<SymbolAssignment>(Cmd); }) - .base(); - for (auto I = Cmd->Commands.begin(); I != Mid; ++I) - process(**I); - flush(); - for (auto I = Mid, E = Cmd->Commands.end(); I != E; ++I) - process(**I); + for (BaseCommand *C : Cmd->Commands) + process(*C); } void LinkerScript::removeEmptyCommands() { @@ -663,7 +656,8 @@ void LinkerScript::removeEmptyCommands() auto Pos = std::remove_if( Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - return !Cmd->Sec; + return std::find(OutputSections->begin(), OutputSections->end(), + Cmd->Sec) == OutputSections->end(); return false; }); Opt.Commands.erase(Pos, Opt.Commands.end()); @@ -687,8 +681,7 @@ void LinkerScript::adjustSectionsBeforeS auto *Cmd = dyn_cast<OutputSectionCommand>(Base); if (!Cmd) continue; - if (OutputSection *Sec = findSection(Cmd->Name, *OutputSections)) { - Cmd->Sec = Sec; + if (OutputSection *Sec = Cmd->Sec) { Flags = Sec->Flags; Type = Sec->Type; continue; @@ -820,15 +813,24 @@ void LinkerScript::placeOrphanSections() ++CmdIndex; } + // If there is no command corresponding to this output section, + // create one and put a InputSectionDescription in it so that both + // representations agree on which input sections to use. auto Pos = std::find_if(CmdIter, E, [&](BaseCommand *Base) { auto *Cmd = dyn_cast<OutputSectionCommand>(Base); return Cmd && Cmd->Name == Name; }); if (Pos == E) { auto *Cmd = make<OutputSectionCommand>(Name); - Cmd->Sec = Sec; Opt.Commands.insert(CmdIter, Cmd); ++CmdIndex; + + Cmd->Sec = Sec; + auto *ISD = make<InputSectionDescription>(""); + for (InputSection *IS : Sec->Sections) + ISD->Sections.push_back(IS); + Cmd->Commands.push_back(ISD); + continue; } @@ -846,6 +848,51 @@ void LinkerScript::processNonSectionComm } } +// Do a last effort at synchronizing the linker script "AST" and the section +// list. This is needed to account for last minute changes, like adding a +// .ARM.exidx terminator and sorting SHF_LINK_ORDER sections. +// +// FIXME: We should instead create the "AST" earlier and the above changes would +// be done directly in the "AST". +// +// This can only handle new sections being added and sections being reordered. +void LinkerScript::synchronize() { + for (BaseCommand *Base : Opt.Commands) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201705021831.v42IV9Qi001131>