From 10323c3b326f8752d9210584a3b3be758f7ee3b0 Mon Sep 17 00:00:00 2001 From: John Parent Date: Fri, 30 Jan 2026 19:20:57 -0500 Subject: [PATCH 1/3] Toolchain: reduce CLI processed Reduce the degree to which we decompose an incoming command line in the toolchain. The odering and context of CLI arguments is highly fragile and important to the contents and naming of build artifacts, in particular w.r.t. maintaining user expectations and logic regarding name reasoning. Instead toolchain is now a thin wrapper to pass the toolchain commands through the compiler wrapper itself, preserving inputs and order and injecting Spack libs, includes, etc safely. We leave command line parsing to specialized tools designed for the linker/compiler/etc. Signed-off-by: John Parent --- src/toolchain.cxx | 46 +++++++--------------------------------------- src/toolchain.h | 7 ++----- 2 files changed, 9 insertions(+), 44 deletions(-) diff --git a/src/toolchain.cxx b/src/toolchain.cxx index a1c103f..1d3318e 100644 --- a/src/toolchain.cxx +++ b/src/toolchain.cxx @@ -15,7 +15,7 @@ ToolChainInvocation::ToolChainInvocation(std::string command, char const* const* cli) - : command(std::move(std::move(command))) { + : command(std::move(command)) { this->ParseCommandArgs(cli); } @@ -23,10 +23,10 @@ void ToolChainInvocation::InterpolateSpackEnv(SpackEnvState& spackenv) { // inject Spack includes before the default includes for (auto& include : spackenv.SpackIncludeDirs) { auto inc_arg = ToolChainInvocation::ComposeIncludeArg(include); - this->include_args.insert(this->include_args.begin(), inc_arg); + this->inputs.push_back(inc_arg); } for (auto& lib : spackenv.SpackLdLibs) { - this->lib_args.push_back(lib); + this->inputs.push_back(lib); } this->AddExtraLibPaths(spackenv.SpackLinkDirs); this->AddExtraLibPaths(spackenv.SpackRPathDirs); @@ -36,10 +36,8 @@ void ToolChainInvocation::InterpolateSpackEnv(SpackEnvState& spackenv) { } DWORD ToolChainInvocation::InvokeToolchain() { - StrList const command_line(ToolChainInvocation::ComposeCommandLists( - {this->command_args, this->include_args, this->lib_args, - this->lib_dir_args, this->obj_args})); - this->executor = ExecuteCommand(this->command, command_line); + quoteList(this->inputs); + this->executor = ExecuteCommand(this->command, this->inputs); debug("Setting up executor for " + std::string(typeid(*this).name()) + "toolchain"); debug("Toolchain: " + this->command); @@ -53,38 +51,9 @@ DWORD ToolChainInvocation::InvokeToolchain() { } void ToolChainInvocation::ParseCommandArgs(char const* const* cli) { - // Collect include args as we need to ensure Spack - // Includes come first for (char const* const* co = cli; *co; co++) { - std::string norm_arg = std::string(*co); const std::string arg = std::string(*co); - lower(norm_arg); - if (isCommandArg(norm_arg, "i")) { - // We have an include arg - // can have an optional space - // check if there are characters after - // "/I" and if not we consider the next - // argument to be the include - if (arg.size() > 2) - this->include_args.push_back(arg); - else { - this->include_args.push_back(arg); - this->include_args.emplace_back(*(++co)); - } - } else if (endswith(norm_arg, ".lib") && - (norm_arg.find("implib:") == std::string::npos)) - // Lib args are just libraries - // provided like system32.lib on the - // command line. - // lib specification order does not matter - // on MSVC but this is useful for filtering system libs - // and adding all libs - this->lib_args.push_back(arg); - else if (endswith(norm_arg, ".obj")) - this->obj_args.push_back(arg); - else { - this->command_args.push_back(arg); - } + this->inputs.push_back(arg); } } @@ -98,8 +67,7 @@ std::string ToolChainInvocation::ComposeLibPathArg(std::string& libPath) { void ToolChainInvocation::AddExtraLibPaths(StrList paths) { for (auto& lib_dir : paths) { - this->lib_dir_args.push_back( - ToolChainInvocation::ComposeLibPathArg(lib_dir)); + this->inputs.push_back(ToolChainInvocation::ComposeLibPathArg(lib_dir)); } } diff --git a/src/toolchain.h b/src/toolchain.h index ec80e2b..c702420 100644 --- a/src/toolchain.h +++ b/src/toolchain.h @@ -32,10 +32,7 @@ class ToolChainInvocation { std::string command; std::string lang; - StrList command_args; - StrList include_args; - StrList lib_dir_args; - StrList lib_args; - StrList obj_args; + StrList inputs; + ExecuteCommand executor; }; From 970b440ad9cbb9ec9091cdfd4a6f122104f411d1 Mon Sep 17 00:00:00 2001 From: John Parent Date: Fri, 30 Jan 2026 19:34:35 -0500 Subject: [PATCH 2/3] Linker Parsing: Overhaul linker CLI processing * Stop disabiguating between obj, lib, and lo files, they're treated the same by the linker and we don't need extra logic for each, and we need to be able to process them in order. * Carefully parses the command line and .def file to determine the intended output name and ensures the internal modeling of that name in the compiler wrapper is consistent * Adds behavior to allow for extremely long command lines (Paraviews are 75kb) by composing arguments into an RSP file * Adds logic to expand the contents of an RSP file so we can inspect the full command line to better reason about naming and eventual length * Small bug fixes of QOL improvements Signed-off-by: John Parent --- src/linker_invocation.cxx | 176 +++++++++++++++++++++++++++++++------- src/linker_invocation.h | 28 +++--- 2 files changed, 161 insertions(+), 43 deletions(-) diff --git a/src/linker_invocation.cxx b/src/linker_invocation.cxx index 4ed82be..1933591 100644 --- a/src/linker_invocation.cxx +++ b/src/linker_invocation.cxx @@ -3,14 +3,19 @@ * * SPDX-License-Identifier: (Apache-2.0 OR MIT) */ +#include #include #include #include #include #include +#include #include "linker_invocation.h" +#include #include "utils.h" +enum { MaxProcessCommandLength = 32767 }; + /** * Parses the command line of a given linker invocation and stores information * about that command line and its associated behavior @@ -46,21 +51,31 @@ void LinkerInvocation::Parse() { // len 2 StrList implib_line = split(*token, ":"); this->implibname_ = implib_line[1]; - } else if (endswith(normal_token, ".lib")) { - this->libs_.push_back(*token); } else if (normal_token == "dll") { this->is_exe_ = false; } else if (startswith(normal_token, "out")) { this->output_ = split(*token, ":")[1]; - } else if (endswith(normal_token, ".obj")) { - this->objs_.push_back(*token); - } else if (startswith(normal_token, "@") && - endswith(normal_token, ".rsp")) { + } else if (endswith(normal_token, ".obj") || + endswith(normal_token, ".lib") || + endswith(normal_token, ".lo")) { + this->input_files_.push_back(*token); + } else if (startswith(normal_token, "@")) { // RSP files are used to describe object files, libraries, other CLI // Switches relevant to the tool the rsp file is being passed to // Primarily utilized by CMake and MSBuild projects to bypass // Command line length limits - this->rsp_file_ = *token; + this->rsp_files_.push_back(*token); + // Since rsp files are essentially expanded in place on the command line + // i.e objA rspA objC + // where rspA defines objB the cli would then be + // objA objB objC + // so we also need to track them in binary_files_ since the order + // of their expansion has implications for naming, i.e + // if rspA was the first input file, the dll/imp name would be objB + this->input_files_.push_back(*token); + } else if (endswith(normal_token, ".res")) { + this->rc_files_.push_back(*token); + this->input_files_.push_back(*token); } else if (startswith(normal_token, "def")) { this->def_file_ = strip(split(*token, ":", 1)[1], "\""); } else if (this->piped_args_.find(normal_token) != @@ -68,29 +83,109 @@ void LinkerInvocation::Parse() { this->piped_args_.at(normal_token).emplace_back(*token); } } - // If we have a def file and no name, attempt to - // scrape the def file for a name to be sure - // we respect the intended project name - // vs overriding via the CLI - if (!this->def_file_.empty() && this->output_.empty()) { - this->processDefFile(); - } + + // Note for the below: name will never be specified so we only have + // /out, .def files, and input files + // To determine internal dll name + // If a def file was not specified: + // /name is used + // if no /name /out is used + // if no /out or /name use first input file + + // If a def file was specified: + // LIBRARY + // otherwise fallback to previous + + // To determine output name + // /OUT is always overriding + // If not /OUT and .def file: + // LIBRARY + // if no def or no LIBRARY + // /NAME + // if no /NAME + // first input file (post rc expanion) + + this->processDefFile(); + this->processInputFiles(); std::string const ext = this->is_exe_ ? ".exe" : ".dll"; - // If output wasn't defined on the command line - // or the def file - // compute it based on the same logic as the linker - // i.e. first obj file name if (this->output_.empty()) { // with no "out" argument, the linker // will place the file in the CWD - std::string const name_obj = this->objs_.front(); - std::string const filename = split(name_obj, "\\").back(); - this->output_ = join({GetCWD(), strip(filename, ".obj")}, "\\") + ext; + std::string const name_component = this->input_files_.front(); + std::string const filename = split(name_component, "\\").back(); + this->output_ = join({GetCWD(), stripLastExt(filename)}, "\\") + ext; } if (this->implibname_.empty()) { std::string const name = strip(this->output_, ext); this->implibname_ = name + ".lib"; } + this->makeRsp(); +} + +void LinkerInvocation::processInputFiles() { + StrList new_input_files; + for (auto input = this->input_files_.begin(); + input != this->input_files_.end(); ++input) { + if (startswith(*input, "@")) { + // rsp file - expand contents in input files + // list in place and remove self + StrList rsp_inputs = LinkerInvocation::processRSPFile(*input); + new_input_files.insert(new_input_files.end(), rsp_inputs.begin(), + rsp_inputs.end()); + } else { + new_input_files.push_back(*input); + } + } + this->input_files_ = new_input_files; +} + +StrList LinkerInvocation::processRSPFile(std::string const& rsp_file) { + std::string const rsp_file_in = lstrip(rsp_file, "@"); + std::ifstream rsp_stream(rsp_file_in); + if (!rsp_stream) { + std::cerr << "Error: Could not open input rsp file: " << rsp_file_in + << "\n"; + throw FileIOError("Cannot open rsp input file: " + GetLastError()); + } + StrList inputs; + std::string line; + while (std::getline(rsp_stream, line)) { + std::stringstream rsp_line(line); + std::string input_file; + rsp_line >> input_file; + inputs.push_back(input_file); + } + return inputs; +} + +/** + * \brief Ensure command line given to lib.exe is of appropriate length + * max windows createProcess command line length is 32,767, so if we exceed + * that, compose all input file args into an rsp. + * + * Writes an rsp file named spack-build.rsp and sets it to be the only + * input file for the lib tool + */ +bool LinkerInvocation::makeRsp() { + int const total_length = std::accumulate( + this->input_files_.begin(), this->input_files_.end(), 0, + [](size_t sum, const std::string& s) { return sum + s.size(); }); + if (total_length > MaxProcessCommandLength) { + std::string const rsp_name = "spack-build.rsp"; + std::ofstream rsp_out(rsp_name); + if (!rsp_out) { + std::cerr << "Unable to open rsp out file: spack-build.rsp\n"; + throw FileIOError("Unable to open lib rsp file"); + } + for (const auto& line : this->input_files_) { + rsp_out << escape_backslash(line) << "\n"; + } + rsp_out.close(); + this->input_files_ = {"@" + rsp_name}; + this->rsp_files_ = {"@" + rsp_name}; + return true; + } + return false; } /** @@ -104,11 +199,15 @@ void LinkerInvocation::Parse() { */ void LinkerInvocation::processDefFile() { + if (this->def_file_.empty()) { + return; + } // Def from link line std::ifstream def_in(this->def_file_); if (!def_in) { std::cerr << "Error: Could not open input def file: " << this->def_file_ << "\n"; + throw FileIOError("Cannot open def input file: " + GetLastError()); } std::string line; @@ -131,18 +230,22 @@ void LinkerInvocation::processDefFile() { if (keyword == "NAME") { this->is_exe_ = true; def_line >> this->pe_name_; - this->pe_name_ = this->pe_name_ + ".exe"; + this->pe_name_ = stripquotes(this->pe_name_) + ".exe"; def_file_export_name = true; } else if (keyword == "LIBRARY") { this->is_exe_ = false; def_line >> this->pe_name_; - this->pe_name_ = this->pe_name_ + ".dll"; + this->pe_name_ = stripquotes(this->pe_name_) + ".dll"; def_file_export_name = true; } else { exports.push_back(line); } } if (def_file_export_name) { + // if output is not specified on the command line, this defines the output name + if (this->output_.empty()) { + this->output_ = join({GetCWD(), this->pe_name_}, "\\"); + } const std::string def_name = stem(this->def_file_); const std::string def_path = this->def_file_.substr(0, this->def_file_.find(def_name)); @@ -152,6 +255,7 @@ void LinkerInvocation::processDefFile() { if (!def_out) { std::cerr << "Error: could not open output def file: " << rename_def << "\n"; + throw FileIOError("Cannot open def output file: " + GetLastError()); } for (const auto& line : exports) { def_out << line << "\n"; @@ -162,11 +266,11 @@ void LinkerInvocation::processDefFile() { def_in.close(); } -std::string LinkerInvocation::get_implib_name() { +std::string LinkerInvocation::get_implib_name() const { return this->implibname_; } -std::string LinkerInvocation::get_lib_link_args() { +std::string LinkerInvocation::get_lib_link_args() const { std::string lib_link_line; for (const auto& var_args : this->piped_args_) { // Most of these should be single arguments @@ -182,22 +286,30 @@ std::string LinkerInvocation::get_lib_link_args() { return lib_link_line; } -std::string LinkerInvocation::get_def_file() { +std::string LinkerInvocation::get_def_file() const { return this->def_file_; } -std::string LinkerInvocation::get_rsp_file() { - return this->rsp_file_; +StrList LinkerInvocation::get_rsp_files() const { + return this->rsp_files_; +} + +StrList LinkerInvocation::get_rc_files() const { + return this->rc_files_; +} + +StrList LinkerInvocation::get_input_files() const { + return this->input_files_; } -std::string LinkerInvocation::get_out() { - return this->pe_name_.empty() ? this->output_ : this->pe_name_; +std::string LinkerInvocation::get_out() const { + return this->output_; } -std::string LinkerInvocation::get_mangled_out() { +std::string LinkerInvocation::get_mangled_out() const { return mangle_name(this->get_out()); } -bool LinkerInvocation::IsExeLink() { +bool LinkerInvocation::IsExeLink() const { return this->is_exe_ || endswith(this->get_out(), ".exe"); } diff --git a/src/linker_invocation.h b/src/linker_invocation.h index a92a538..207feb0 100644 --- a/src/linker_invocation.h +++ b/src/linker_invocation.h @@ -15,25 +15,31 @@ class LinkerInvocation { explicit LinkerInvocation(const StrList& linkline); ~LinkerInvocation() = default; void Parse(); - bool IsExeLink(); - std::string get_out(); - std::string get_mangled_out(); - std::string get_implib_name(); - std::string get_def_file(); - std::string get_rsp_file(); - std::string get_lib_link_args(); + bool IsExeLink() const; + std::string get_out() const; + std::string get_mangled_out() const; + std::string get_implib_name() const; + std::string get_def_file() const; + StrList get_rsp_files() const; + StrList get_rc_files() const; + StrList get_input_files() const; + std::string get_lib_link_args() const; + bool makeRsp(); private: void processDefFile(); + void processInputFiles(); + static StrList processRSPFile(std::string const& rsp_file); std::string line_; - StrList tokens_; std::string pe_name_; std::string implibname_; std::string def_file_; - std::string rsp_file_; std::string output_; - StrList libs_; - StrList objs_; + StrList rsp_files_; + StrList rc_files_; + StrList command_files_; + StrList input_files_; + StrList tokens_; bool is_exe_; std::map piped_args_ = { {"export", {}}, {"include", {}}, {"libpath", {}}, From 13693b56c1c212c66c0faebc07c5e40dee3de8d5 Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 28 Apr 2026 13:12:34 -0400 Subject: [PATCH 3/3] cleanup Signed-off-by: John Parent --- src/ld.cxx | 8 ++------ src/utils.cxx | 9 ++++++++- src/utils.h | 8 ++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/ld.cxx b/src/ld.cxx index 5cc0683..871e857 100644 --- a/src/ld.cxx +++ b/src/ld.cxx @@ -29,8 +29,7 @@ DWORD LdInvocation::InvokeToolchain() { // recreate the import library from the same set of obj files // and libs LinkerInvocation link_run(LdInvocation::ComposeCommandLists( - {this->command_args, this->include_args, this->lib_args, - this->lib_dir_args, this->obj_args})); + {this->inputs})); link_run.Parse(); // We're creating a PE, we need to create an appropriate import lib std::string const imp_lib_name = link_run.get_implib_name(); @@ -61,10 +60,7 @@ DWORD LdInvocation::InvokeToolchain() { ExecuteCommand("lib.exe", LdInvocation::ComposeCommandLists({ {def, piped_args, "-name:" + pe_name, "-out:" + abs_out_imp_lib_name}, - {link_run.get_rsp_file()}, - this->obj_args, - this->lib_args, - this->lib_dir_args, + link_run.get_input_files(), })); this->rpath_executor.Execute(); DWORD const err_code = this->rpath_executor.Join(); diff --git a/src/utils.cxx b/src/utils.cxx index baa3004..a6ba900 100644 --- a/src/utils.cxx +++ b/src/utils.cxx @@ -1260,4 +1260,11 @@ SFNProcessingError::SFNProcessingError(char const* const message) char const* SFNProcessingError::what() const { return exception::what(); -} \ No newline at end of file +} + +FileIOError::FileIOError(char const* const message) + : std::runtime_error(message) {} + +char const* FileIOError::what() const { + return exception::what(); +} diff --git a/src/utils.h b/src/utils.h index f738662..853a4ad 100644 --- a/src/utils.h +++ b/src/utils.h @@ -383,4 +383,12 @@ class SFNProcessingError : public std::runtime_error { virtual char const* what() const; }; +class FileIOError : public std::runtime_error { + public: + FileIOError(char const* const message); + virtual char const* what() const; +}; + + + static bool DEBUG = false;