Date: Wed, 2 Jul 2025 18:09:08 GMT From: Dimitry Andric <dim@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: 257b9d0755b2 - stable/14 - contrib/llvm-project: re-add clang/tools/clang-scan-deps Message-ID: <202507021809.562I98r0020627@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by dim: URL: https://cgit.FreeBSD.org/src/commit/?id=257b9d0755b29fc06d00900efb47f5f90f7fbd61 commit 257b9d0755b29fc06d00900efb47f5f90f7fbd61 Author: Dimitry Andric <dim@FreeBSD.org> AuthorDate: 2025-06-25 17:19:51 +0000 Commit: Dimitry Andric <dim@FreeBSD.org> CommitDate: 2025-07-02 18:08:46 +0000 contrib/llvm-project: re-add clang/tools/clang-scan-deps This is in preparation for adding it as an optional tool in base. MFC after: 1 week (cherry picked from commit 7ed1628066eaf55b86f35af86efe804508201cc8) --- contrib/llvm-project/FREEBSD-Xlist | 3 +- .../clang/tools/clang-scan-deps/ClangScanDeps.cpp | 1055 ++++++++++++++++++++ .../clang/tools/clang-scan-deps/Opts.td | 42 + 3 files changed, 1098 insertions(+), 2 deletions(-) diff --git a/contrib/llvm-project/FREEBSD-Xlist b/contrib/llvm-project/FREEBSD-Xlist index 45cd2c1d6f46..43fa40070f5d 100644 --- a/contrib/llvm-project/FREEBSD-Xlist +++ b/contrib/llvm-project/FREEBSD-Xlist @@ -129,7 +129,7 @@ clang/tools/clang-offload-packager/ clang/tools/clang-refactor/ clang/tools/clang-rename/ clang/tools/clang-repl/ -clang/tools/clang-scan-deps/ +clang/tools/clang-scan-deps/CMakeLists.txt clang/tools/clang-shlib/ clang/tools/diag-build/ clang/tools/diagtool/ @@ -805,7 +805,6 @@ llvm/tools/llvm-bcanalyzer/CMakeLists.txt llvm/tools/llvm-c-test/ llvm/tools/llvm-cat/ llvm/tools/llvm-cfi-verify/ -llvm/tools/llvm-cgdata/ llvm/tools/llvm-config/ llvm/tools/llvm-cov/CMakeLists.txt llvm/tools/llvm-cvtres/ diff --git a/contrib/llvm-project/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/contrib/llvm-project/clang/tools/clang-scan-deps/ClangScanDeps.cpp new file mode 100644 index 000000000000..867df19c863f --- /dev/null +++ b/contrib/llvm-project/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -0,0 +1,1055 @@ +//===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" +#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" +#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "clang/Tooling/JSONCompilationDatabase.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/LLVMDriver.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/Timer.h" +#include "llvm/TargetParser/Host.h" +#include <mutex> +#include <optional> +#include <thread> + +#include "Opts.inc" + +using namespace clang; +using namespace tooling::dependencies; + +namespace { + +using namespace llvm::opt; +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), +#include "Opts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) \ + constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ + constexpr llvm::ArrayRef<llvm::StringLiteral> NAME( \ + NAME##_init, std::size(NAME##_init) - 1); +#include "Opts.inc" +#undef PREFIX + +const llvm::opt::OptTable::Info InfoTable[] = { +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), +#include "Opts.inc" +#undef OPTION +}; + +class ScanDepsOptTable : public llvm::opt::GenericOptTable { +public: + ScanDepsOptTable() : GenericOptTable(InfoTable) { + setGroupedShortOptions(true); + } +}; + +enum ResourceDirRecipeKind { + RDRK_ModifyCompilerPath, + RDRK_InvokeCompiler, +}; + +static std::string OutputFileName = "-"; +static ScanningMode ScanMode = ScanningMode::DependencyDirectivesScan; +static ScanningOutputFormat Format = ScanningOutputFormat::Make; +static ScanningOptimizations OptimizeArgs; +static std::string ModuleFilesDir; +static bool EagerLoadModules; +static unsigned NumThreads = 0; +static std::string CompilationDB; +static std::string ModuleName; +static std::vector<std::string> ModuleDepTargets; +static bool DeprecatedDriverCommand; +static ResourceDirRecipeKind ResourceDirRecipe; +static bool Verbose; +static bool PrintTiming; +static llvm::BumpPtrAllocator Alloc; +static llvm::StringSaver Saver{Alloc}; +static std::vector<const char *> CommandLine; + +#ifndef NDEBUG +static constexpr bool DoRoundTripDefault = true; +#else +static constexpr bool DoRoundTripDefault = false; +#endif + +static bool RoundTripArgs = DoRoundTripDefault; + +static void ParseArgs(int argc, char **argv) { + ScanDepsOptTable Tbl; + llvm::StringRef ToolName = argv[0]; + llvm::opt::InputArgList Args = + Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { + llvm::errs() << Msg << '\n'; + std::exit(1); + }); + + if (Args.hasArg(OPT_help)) { + Tbl.printHelp(llvm::outs(), "clang-scan-deps [options]", "clang-scan-deps"); + std::exit(0); + } + if (Args.hasArg(OPT_version)) { + llvm::outs() << ToolName << '\n'; + llvm::cl::PrintVersionMessage(); + std::exit(0); + } + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_mode_EQ)) { + auto ModeType = + llvm::StringSwitch<std::optional<ScanningMode>>(A->getValue()) + .Case("preprocess-dependency-directives", + ScanningMode::DependencyDirectivesScan) + .Case("preprocess", ScanningMode::CanonicalPreprocessing) + .Default(std::nullopt); + if (!ModeType) { + llvm::errs() << ToolName + << ": for the --mode option: Cannot find option named '" + << A->getValue() << "'\n"; + std::exit(1); + } + ScanMode = *ModeType; + } + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_format_EQ)) { + auto FormatType = + llvm::StringSwitch<std::optional<ScanningOutputFormat>>(A->getValue()) + .Case("make", ScanningOutputFormat::Make) + .Case("p1689", ScanningOutputFormat::P1689) + .Case("experimental-full", ScanningOutputFormat::Full) + .Default(std::nullopt); + if (!FormatType) { + llvm::errs() << ToolName + << ": for the --format option: Cannot find option named '" + << A->getValue() << "'\n"; + std::exit(1); + } + Format = *FormatType; + } + + std::vector<std::string> OptimizationFlags = + Args.getAllArgValues(OPT_optimize_args_EQ); + OptimizeArgs = ScanningOptimizations::None; + for (const auto &Arg : OptimizationFlags) { + auto Optimization = + llvm::StringSwitch<std::optional<ScanningOptimizations>>(Arg) + .Case("none", ScanningOptimizations::None) + .Case("header-search", ScanningOptimizations::HeaderSearch) + .Case("system-warnings", ScanningOptimizations::SystemWarnings) + .Case("vfs", ScanningOptimizations::VFS) + .Case("canonicalize-macros", ScanningOptimizations::Macros) + .Case("all", ScanningOptimizations::All) + .Default(std::nullopt); + if (!Optimization) { + llvm::errs() + << ToolName + << ": for the --optimize-args option: Cannot find option named '" + << Arg << "'\n"; + std::exit(1); + } + OptimizeArgs |= *Optimization; + } + if (OptimizationFlags.empty()) + OptimizeArgs = ScanningOptimizations::Default; + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_files_dir_EQ)) + ModuleFilesDir = A->getValue(); + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_o)) + OutputFileName = A->getValue(); + + EagerLoadModules = Args.hasArg(OPT_eager_load_pcm); + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_j)) { + StringRef S{A->getValue()}; + if (!llvm::to_integer(S, NumThreads, 0)) { + llvm::errs() << ToolName << ": for the -j option: '" << S + << "' value invalid for uint argument!\n"; + std::exit(1); + } + } + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_compilation_database_EQ)) + CompilationDB = A->getValue(); + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_name_EQ)) + ModuleName = A->getValue(); + + for (const llvm::opt::Arg *A : Args.filtered(OPT_dependency_target_EQ)) + ModuleDepTargets.emplace_back(A->getValue()); + + DeprecatedDriverCommand = Args.hasArg(OPT_deprecated_driver_command); + + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_resource_dir_recipe_EQ)) { + auto Kind = + llvm::StringSwitch<std::optional<ResourceDirRecipeKind>>(A->getValue()) + .Case("modify-compiler-path", RDRK_ModifyCompilerPath) + .Case("invoke-compiler", RDRK_InvokeCompiler) + .Default(std::nullopt); + if (!Kind) { + llvm::errs() << ToolName + << ": for the --resource-dir-recipe option: Cannot find " + "option named '" + << A->getValue() << "'\n"; + std::exit(1); + } + ResourceDirRecipe = *Kind; + } + + PrintTiming = Args.hasArg(OPT_print_timing); + + Verbose = Args.hasArg(OPT_verbose); + + RoundTripArgs = Args.hasArg(OPT_round_trip_args); + + if (const llvm::opt::Arg *A = Args.getLastArgNoClaim(OPT_DASH_DASH)) + CommandLine.assign(A->getValues().begin(), A->getValues().end()); +} + +class SharedStream { +public: + SharedStream(raw_ostream &OS) : OS(OS) {} + void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) { + std::unique_lock<std::mutex> LockGuard(Lock); + Fn(OS); + OS.flush(); + } + +private: + std::mutex Lock; + raw_ostream &OS; +}; + +class ResourceDirectoryCache { +public: + /// findResourceDir finds the resource directory relative to the clang + /// compiler being used in Args, by running it with "-print-resource-dir" + /// option and cache the results for reuse. \returns resource directory path + /// associated with the given invocation command or empty string if the + /// compiler path is NOT an absolute path. + StringRef findResourceDir(const tooling::CommandLineArguments &Args, + bool ClangCLMode) { + if (Args.size() < 1) + return ""; + + const std::string &ClangBinaryPath = Args[0]; + if (!llvm::sys::path::is_absolute(ClangBinaryPath)) + return ""; + + const std::string &ClangBinaryName = + std::string(llvm::sys::path::filename(ClangBinaryPath)); + + std::unique_lock<std::mutex> LockGuard(CacheLock); + const auto &CachedResourceDir = Cache.find(ClangBinaryPath); + if (CachedResourceDir != Cache.end()) + return CachedResourceDir->second; + + std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName}; + if (ClangCLMode) + PrintResourceDirArgs.push_back("/clang:-print-resource-dir"); + else + PrintResourceDirArgs.push_back("-print-resource-dir"); + + llvm::SmallString<64> OutputFile, ErrorFile; + llvm::sys::fs::createTemporaryFile("print-resource-dir-output", + "" /*no-suffix*/, OutputFile); + llvm::sys::fs::createTemporaryFile("print-resource-dir-error", + "" /*no-suffix*/, ErrorFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + llvm::FileRemover ErrorRemover(ErrorFile.c_str()); + std::optional<StringRef> Redirects[] = { + {""}, // Stdin + OutputFile.str(), + ErrorFile.str(), + }; + if (llvm::sys::ExecuteAndWait(ClangBinaryPath, PrintResourceDirArgs, {}, + Redirects)) { + auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str()); + llvm::errs() << ErrorBuf.get()->getBuffer(); + return ""; + } + + auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return ""; + StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n'); + + Cache[ClangBinaryPath] = Output.str(); + return Cache[ClangBinaryPath]; + } + +private: + std::map<std::string, std::string> Cache; + std::mutex CacheLock; +}; + +} // end anonymous namespace + +/// Takes the result of a dependency scan and prints error / dependency files +/// based on the result. +/// +/// \returns True on error. +static bool +handleMakeDependencyToolResult(const std::string &Input, + llvm::Expected<std::string> &MaybeFile, + SharedStream &OS, SharedStream &Errs) { + if (!MaybeFile) { + llvm::handleAllErrors( + MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) { + Errs.applyLocked([&](raw_ostream &OS) { + OS << "Error while scanning dependencies for " << Input << ":\n"; + OS << Err.getMessage(); + }); + }); + return true; + } + OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; }); + return false; +} + +static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) { + std::vector<llvm::StringRef> Strings; + for (auto &&I : Set) + Strings.push_back(I.getKey()); + llvm::sort(Strings); + return llvm::json::Array(Strings); +} + +// Technically, we don't need to sort the dependency list to get determinism. +// Leaving these be will simply preserve the import order. +static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) { + llvm::sort(V); + + llvm::json::Array Ret; + for (const ModuleID &MID : V) + Ret.push_back(llvm::json::Object( + {{"module-name", MID.ModuleName}, {"context-hash", MID.ContextHash}})); + return Ret; +} + +static llvm::json::Array +toJSONSorted(llvm::SmallVector<Module::LinkLibrary, 2> &LinkLibs) { + llvm::sort(LinkLibs, [](const Module::LinkLibrary &lhs, + const Module::LinkLibrary &rhs) { + return lhs.Library < rhs.Library; + }); + + llvm::json::Array Ret; + for (const Module::LinkLibrary &LL : LinkLibs) + Ret.push_back(llvm::json::Object( + {{"link-name", LL.Library}, {"isFramework", LL.IsFramework}})); + return Ret; +} + +// Thread safe. +class FullDeps { +public: + FullDeps(size_t NumInputs) : Inputs(NumInputs) {} + + void mergeDeps(StringRef Input, TranslationUnitDeps TUDeps, + size_t InputIndex) { + mergeDeps(std::move(TUDeps.ModuleGraph), InputIndex); + + InputDeps ID; + ID.FileName = std::string(Input); + ID.ContextHash = std::move(TUDeps.ID.ContextHash); + ID.FileDeps = std::move(TUDeps.FileDeps); + ID.ModuleDeps = std::move(TUDeps.ClangModuleDeps); + ID.DriverCommandLine = std::move(TUDeps.DriverCommandLine); + ID.Commands = std::move(TUDeps.Commands); + + assert(InputIndex < Inputs.size() && "Input index out of bounds"); + assert(Inputs[InputIndex].FileName.empty() && "Result already populated"); + Inputs[InputIndex] = std::move(ID); + } + + void mergeDeps(ModuleDepsGraph Graph, size_t InputIndex) { + std::vector<ModuleDeps *> NewMDs; + { + std::unique_lock<std::mutex> ul(Lock); + for (const ModuleDeps &MD : Graph) { + auto I = Modules.find({MD.ID, 0}); + if (I != Modules.end()) { + I->first.InputIndex = std::min(I->first.InputIndex, InputIndex); + continue; + } + auto Res = Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)}); + NewMDs.push_back(&Res->second); + } + // First call to \c getBuildArguments is somewhat expensive. Let's call it + // on the current thread (instead of the main one), and outside the + // critical section. + for (ModuleDeps *MD : NewMDs) + (void)MD->getBuildArguments(); + } + } + + bool roundTripCommand(ArrayRef<std::string> ArgStrs, + DiagnosticsEngine &Diags) { + if (ArgStrs.empty() || ArgStrs[0] != "-cc1") + return false; + SmallVector<const char *> Args; + for (const std::string &Arg : ArgStrs) + Args.push_back(Arg.c_str()); + return !CompilerInvocation::checkCC1RoundTrip(Args, Diags); + } + + // Returns \c true if any command lines fail to round-trip. We expect + // commands already be canonical when output by the scanner. + bool roundTripCommands(raw_ostream &ErrOS) { + IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{}; + TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts); + IntrusiveRefCntPtr<DiagnosticsEngine> Diags = + CompilerInstance::createDiagnostics(&*DiagOpts, &DiagConsumer, + /*ShouldOwnClient=*/false); + + for (auto &&M : Modules) + if (roundTripCommand(M.second.getBuildArguments(), *Diags)) + return true; + + for (auto &&I : Inputs) + for (const auto &Cmd : I.Commands) + if (roundTripCommand(Cmd.Arguments, *Diags)) + return true; + + return false; + } + + void printFullOutput(raw_ostream &OS) { + // Skip sorting modules and constructing the JSON object if the output + // cannot be observed anyway. This makes timings less noisy. + if (&OS == &llvm::nulls()) + return; + + // Sort the modules by name to get a deterministic order. + std::vector<IndexedModuleID> ModuleIDs; + for (auto &&M : Modules) + ModuleIDs.push_back(M.first); + llvm::sort(ModuleIDs); + + using namespace llvm::json; + + Array OutModules; + for (auto &&ModID : ModuleIDs) { + auto &MD = Modules[ModID]; + Object O{{"name", MD.ID.ModuleName}, + {"context-hash", MD.ID.ContextHash}, + {"file-deps", toJSONSorted(MD.FileDeps)}, + {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)}, + {"clang-modulemap-file", MD.ClangModuleMapFile}, + {"command-line", MD.getBuildArguments()}, + {"link-libraries", toJSONSorted(MD.LinkLibraries)}}; + OutModules.push_back(std::move(O)); + } + + Array TUs; + for (auto &&I : Inputs) { + Array Commands; + if (I.DriverCommandLine.empty()) { + for (const auto &Cmd : I.Commands) { + Object O{ + {"input-file", I.FileName}, + {"clang-context-hash", I.ContextHash}, + {"file-deps", I.FileDeps}, + {"clang-module-deps", toJSONSorted(I.ModuleDeps)}, + {"executable", Cmd.Executable}, + {"command-line", Cmd.Arguments}, + }; + Commands.push_back(std::move(O)); + } + } else { + Object O{ + {"input-file", I.FileName}, + {"clang-context-hash", I.ContextHash}, + {"file-deps", I.FileDeps}, + {"clang-module-deps", toJSONSorted(I.ModuleDeps)}, + {"executable", "clang"}, + {"command-line", I.DriverCommandLine}, + }; + Commands.push_back(std::move(O)); + } + TUs.push_back(Object{ + {"commands", std::move(Commands)}, + }); + } + + Object Output{ + {"modules", std::move(OutModules)}, + {"translation-units", std::move(TUs)}, + }; + + OS << llvm::formatv("{0:2}\n", Value(std::move(Output))); + } + +private: + struct IndexedModuleID { + ModuleID ID; + + // FIXME: This is mutable so that it can still be updated after insertion + // into an unordered associative container. This is "fine", since this + // field doesn't contribute to the hash, but it's a brittle hack. + mutable size_t InputIndex; + + bool operator==(const IndexedModuleID &Other) const { + return ID == Other.ID; + } + + bool operator<(const IndexedModuleID &Other) const { + /// We need the output of clang-scan-deps to be deterministic. However, + /// the dependency graph may contain two modules with the same name. How + /// do we decide which one to print first? If we made that decision based + /// on the context hash, the ordering would be deterministic, but + /// different across machines. This can happen for example when the inputs + /// or the SDKs (which both contribute to the "context" hash) live in + /// different absolute locations. We solve that by tracking the index of + /// the first input TU that (transitively) imports the dependency, which + /// is always the same for the same input, resulting in deterministic + /// sorting that's also reproducible across machines. + return std::tie(ID.ModuleName, InputIndex) < + std::tie(Other.ID.ModuleName, Other.InputIndex); + } + + struct Hasher { + std::size_t operator()(const IndexedModuleID &IMID) const { + return llvm::hash_value(IMID.ID); + } + }; + }; + + struct InputDeps { + std::string FileName; + std::string ContextHash; + std::vector<std::string> FileDeps; + std::vector<ModuleID> ModuleDeps; + std::vector<std::string> DriverCommandLine; + std::vector<Command> Commands; + }; + + std::mutex Lock; + std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher> + Modules; + std::vector<InputDeps> Inputs; +}; + +static bool handleTranslationUnitResult( + StringRef Input, llvm::Expected<TranslationUnitDeps> &MaybeTUDeps, + FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) { + if (!MaybeTUDeps) { + llvm::handleAllErrors( + MaybeTUDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) { + Errs.applyLocked([&](raw_ostream &OS) { + OS << "Error while scanning dependencies for " << Input << ":\n"; + OS << Err.getMessage(); + }); + }); + return true; + } + FD.mergeDeps(Input, std::move(*MaybeTUDeps), InputIndex); + return false; +} + +static bool handleModuleResult( + StringRef ModuleName, llvm::Expected<ModuleDepsGraph> &MaybeModuleGraph, + FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) { + if (!MaybeModuleGraph) { + llvm::handleAllErrors(MaybeModuleGraph.takeError(), + [&ModuleName, &Errs](llvm::StringError &Err) { + Errs.applyLocked([&](raw_ostream &OS) { + OS << "Error while scanning dependencies for " + << ModuleName << ":\n"; + OS << Err.getMessage(); + }); + }); + return true; + } + FD.mergeDeps(std::move(*MaybeModuleGraph), InputIndex); + return false; +} + +class P1689Deps { +public: + void printDependencies(raw_ostream &OS) { + addSourcePathsToRequires(); + // Sort the modules by name to get a deterministic order. + llvm::sort(Rules, [](const P1689Rule &A, const P1689Rule &B) { + return A.PrimaryOutput < B.PrimaryOutput; + }); + + using namespace llvm::json; + Array OutputRules; + for (const P1689Rule &R : Rules) { + Object O{{"primary-output", R.PrimaryOutput}}; + + if (R.Provides) { + Array Provides; + Object Provided{{"logical-name", R.Provides->ModuleName}, + {"source-path", R.Provides->SourcePath}, + {"is-interface", R.Provides->IsStdCXXModuleInterface}}; + Provides.push_back(std::move(Provided)); + O.insert({"provides", std::move(Provides)}); + } + + Array Requires; + for (const P1689ModuleInfo &Info : R.Requires) { + Object RequiredInfo{{"logical-name", Info.ModuleName}}; + if (!Info.SourcePath.empty()) + RequiredInfo.insert({"source-path", Info.SourcePath}); + Requires.push_back(std::move(RequiredInfo)); + } + + if (!Requires.empty()) + O.insert({"requires", std::move(Requires)}); + + OutputRules.push_back(std::move(O)); + } + + Object Output{ + {"version", 1}, {"revision", 0}, {"rules", std::move(OutputRules)}}; + + OS << llvm::formatv("{0:2}\n", Value(std::move(Output))); + } + + void addRules(P1689Rule &Rule) { + std::unique_lock<std::mutex> LockGuard(Lock); + Rules.push_back(Rule); + } + +private: + void addSourcePathsToRequires() { + llvm::DenseMap<StringRef, StringRef> ModuleSourceMapper; + for (const P1689Rule &R : Rules) + if (R.Provides && !R.Provides->SourcePath.empty()) + ModuleSourceMapper[R.Provides->ModuleName] = R.Provides->SourcePath; + + for (P1689Rule &R : Rules) { + for (P1689ModuleInfo &Info : R.Requires) { + auto Iter = ModuleSourceMapper.find(Info.ModuleName); + if (Iter != ModuleSourceMapper.end()) + Info.SourcePath = Iter->second; + } + } + } + + std::mutex Lock; + std::vector<P1689Rule> Rules; +}; + +static bool +handleP1689DependencyToolResult(const std::string &Input, + llvm::Expected<P1689Rule> &MaybeRule, + P1689Deps &PD, SharedStream &Errs) { + if (!MaybeRule) { + llvm::handleAllErrors( + MaybeRule.takeError(), [&Input, &Errs](llvm::StringError &Err) { + Errs.applyLocked([&](raw_ostream &OS) { + OS << "Error while scanning dependencies for " << Input << ":\n"; + OS << Err.getMessage(); + }); + }); + return true; + } + PD.addRules(*MaybeRule); + return false; +} + +/// Construct a path for the explicitly built PCM. +static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) { + SmallString<256> ExplicitPCMPath(OutputDir); + llvm::sys::path::append(ExplicitPCMPath, MID.ContextHash, + MID.ModuleName + "-" + MID.ContextHash + ".pcm"); + return std::string(ExplicitPCMPath); +} + +static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK, + StringRef OutputDir) { + std::string PCMPath = constructPCMPath(MID, OutputDir); + switch (MOK) { + case ModuleOutputKind::ModuleFile: + return PCMPath; + case ModuleOutputKind::DependencyFile: + return PCMPath + ".d"; + case ModuleOutputKind::DependencyTargets: + // Null-separate the list of targets. + return join(ModuleDepTargets, StringRef("\0", 1)); + case ModuleOutputKind::DiagnosticSerializationFile: + return PCMPath + ".diag"; + } + llvm_unreachable("Fully covered switch above!"); +} + +static std::string getModuleCachePath(ArrayRef<std::string> Args) { + for (StringRef Arg : llvm::reverse(Args)) { + Arg.consume_front("/clang:"); + if (Arg.consume_front("-fmodules-cache-path=")) + return std::string(Arg); + } + SmallString<128> Path; + driver::Driver::getDefaultModuleCachePath(Path); + return std::string(Path); +} + +/// Attempts to construct the compilation database from '-compilation-database' +/// or from the arguments following the positional '--'. +static std::unique_ptr<tooling::CompilationDatabase> +getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) { + ParseArgs(argc, argv); + + if (!(CommandLine.empty() ^ CompilationDB.empty())) { + llvm::errs() << "The compilation command line must be provided either via " + "'-compilation-database' or after '--'."; + return nullptr; + } + + if (!CompilationDB.empty()) + return tooling::JSONCompilationDatabase::loadFromFile( + CompilationDB, ErrorMessage, + tooling::JSONCommandLineSyntax::AutoDetect); + + llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = + CompilerInstance::createDiagnostics(new DiagnosticOptions); + driver::Driver TheDriver(CommandLine[0], llvm::sys::getDefaultTargetTriple(), + *Diags); + TheDriver.setCheckInputsExist(false); + std::unique_ptr<driver::Compilation> C( + TheDriver.BuildCompilation(CommandLine)); + if (!C || C->getJobs().empty()) + return nullptr; + + auto Cmd = C->getJobs().begin(); + auto CI = std::make_unique<CompilerInvocation>(); + CompilerInvocation::CreateFromArgs(*CI, Cmd->getArguments(), *Diags, + CommandLine[0]); + if (!CI) + return nullptr; + + FrontendOptions &FEOpts = CI->getFrontendOpts(); + if (FEOpts.Inputs.size() != 1) { + llvm::errs() + << "Exactly one input file is required in the per-file mode ('--').\n"; + return nullptr; + } + + // There might be multiple jobs for a compilation. Extract the specified + // output filename from the last job. + auto LastCmd = C->getJobs().end(); + LastCmd--; + if (LastCmd->getOutputFilenames().size() != 1) { + llvm::errs() + << "Exactly one output file is required in the per-file mode ('--').\n"; + return nullptr; + } + StringRef OutputFile = LastCmd->getOutputFilenames().front(); + + class InplaceCompilationDatabase : public tooling::CompilationDatabase { + public: + InplaceCompilationDatabase(StringRef InputFile, StringRef OutputFile, + ArrayRef<const char *> CommandLine) + : Command(".", InputFile, {}, OutputFile) { + for (auto *C : CommandLine) + Command.CommandLine.push_back(C); + } + + std::vector<tooling::CompileCommand> + getCompileCommands(StringRef FilePath) const override { + if (FilePath != Command.Filename) + return {}; + return {Command}; + } + + std::vector<std::string> getAllFiles() const override { + return {Command.Filename}; + } + + std::vector<tooling::CompileCommand> + getAllCompileCommands() const override { + return {Command}; + } + + private: + tooling::CompileCommand Command; + }; + + return std::make_unique<InplaceCompilationDatabase>( + FEOpts.Inputs[0].getFile(), OutputFile, CommandLine); +} + +int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { + llvm::InitializeAllTargetInfos(); + std::string ErrorMessage; + std::unique_ptr<tooling::CompilationDatabase> Compilations = + getCompilationDatabase(argc, argv, ErrorMessage); + if (!Compilations) { + llvm::errs() << ErrorMessage << "\n"; + return 1; + } + + llvm::cl::PrintOptionValues(); + + // Expand response files in advance, so that we can "see" all the arguments + // when adjusting below. + Compilations = expandResponseFiles(std::move(Compilations), + llvm::vfs::getRealFileSystem()); + + Compilations = inferTargetAndDriverMode(std::move(Compilations)); + + Compilations = inferToolLocation(std::move(Compilations)); + + // The command options are rewritten to run Clang in preprocessor only mode. + auto AdjustingCompilations = + std::make_unique<tooling::ArgumentsAdjustingCompilations>( + std::move(Compilations)); + ResourceDirectoryCache ResourceDirCache; + + AdjustingCompilations->appendArgumentsAdjuster( + [&ResourceDirCache](const tooling::CommandLineArguments &Args, + StringRef FileName) { + std::string LastO; + bool HasResourceDir = false; + bool ClangCLMode = false; + auto FlagsEnd = llvm::find(Args, "--"); + if (FlagsEnd != Args.begin()) { + ClangCLMode = + llvm::sys::path::stem(Args[0]).contains_insensitive("clang-cl") || + llvm::is_contained(Args, "--driver-mode=cl"); + + // Reverse scan, starting at the end or at the element before "--". + auto R = std::make_reverse_iterator(FlagsEnd); + auto E = Args.rend(); + // Don't include Args[0] in the iteration; that's the executable, not + // an option. + if (E != R) + E--; + for (auto I = R; I != E; ++I) { + StringRef Arg = *I; + if (ClangCLMode) { + // Ignore arguments that are preceded by "-Xclang". + if ((I + 1) != E && I[1] == "-Xclang") + continue; + if (LastO.empty()) { + // With clang-cl, the output obj file can be specified with + // "/opath", "/o path", "/Fopath", and the dash counterparts. + // Also, clang-cl adds ".obj" extension if none is found. + if ((Arg == "-o" || Arg == "/o") && I != R) + LastO = I[-1]; // Next argument (reverse iterator) + else if (Arg.starts_with("/Fo") || Arg.starts_with("-Fo")) + LastO = Arg.drop_front(3).str(); + else if (Arg.starts_with("/o") || Arg.starts_with("-o")) + LastO = Arg.drop_front(2).str(); + + if (!LastO.empty() && !llvm::sys::path::has_extension(LastO)) + LastO.append(".obj"); + } + } + if (Arg == "-resource-dir") + HasResourceDir = true; + } + } + tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd); + // The clang-cl driver passes "-o -" to the frontend. Inject the real + // file here to ensure "-MT" can be deduced if need be. + if (ClangCLMode && !LastO.empty()) { + AdjustedArgs.push_back("/clang:-o"); + AdjustedArgs.push_back("/clang:" + LastO); + } + + if (!HasResourceDir && ResourceDirRecipe == RDRK_InvokeCompiler) { + StringRef ResourceDir = + ResourceDirCache.findResourceDir(Args, ClangCLMode); + if (!ResourceDir.empty()) { + AdjustedArgs.push_back("-resource-dir"); + AdjustedArgs.push_back(std::string(ResourceDir)); + } + } + AdjustedArgs.insert(AdjustedArgs.end(), FlagsEnd, Args.end()); + return AdjustedArgs; + }); + + SharedStream Errs(llvm::errs()); + + std::optional<llvm::raw_fd_ostream> FileOS; + llvm::raw_ostream &ThreadUnsafeDependencyOS = [&]() -> llvm::raw_ostream & { + if (OutputFileName == "-") + return llvm::outs(); + + if (OutputFileName == "/dev/null") + return llvm::nulls(); + + std::error_code EC; + FileOS.emplace(OutputFileName, EC); + if (EC) { + llvm::errs() << "Failed to open output file '" << OutputFileName + << "': " << llvm::errorCodeToError(EC) << '\n'; + std::exit(1); + } + return *FileOS; + }(); + SharedStream DependencyOS(ThreadUnsafeDependencyOS); + + std::vector<tooling::CompileCommand> Inputs = + AdjustingCompilations->getAllCompileCommands(); + + std::atomic<bool> HadErrors(false); + std::optional<FullDeps> FD; + P1689Deps PD; + + std::mutex Lock; + size_t Index = 0; + auto GetNextInputIndex = [&]() -> std::optional<size_t> { + std::unique_lock<std::mutex> LockGuard(Lock); + if (Index < Inputs.size()) + return Index++; + return {}; + }; + + if (Format == ScanningOutputFormat::Full) + FD.emplace(ModuleName.empty() ? Inputs.size() : 0); + + auto ScanningTask = [&](DependencyScanningService &Service) { + DependencyScanningTool WorkerTool(Service); + + llvm::DenseSet<ModuleID> AlreadySeenModules; + while (auto MaybeInputIndex = GetNextInputIndex()) { + size_t LocalIndex = *MaybeInputIndex; + const tooling::CompileCommand *Input = &Inputs[LocalIndex]; *** 167 LINES SKIPPED ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202507021809.562I98r0020627>