From 0585cf5d80e6111fd210cf028071cb6a5779c8f2 Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:12:35 +0200 Subject: [PATCH 01/13] fix: serialize console output in fair::mq::tools::execute - concurrent execute() calls print captured subprocess lines to std::cout from multiple threads; the standard allows that, but libstdc++ maintains the formatted-output state (ios_base::width) with plain reads and writes -- a data race ThreadSanitizer reports once libstdc++ itself is instrumented - a mutex around the insertion also keeps whole lines from interleaving --- fairmq/tools/Process.cxx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fairmq/tools/Process.cxx b/fairmq/tools/Process.cxx index 046d1dae6..05be0aae8 100644 --- a/fairmq/tools/Process.cxx +++ b/fairmq/tools/Process.cxx @@ -18,6 +18,7 @@ #include #include // kill, signals #include +#include #include #include #include @@ -40,10 +41,18 @@ class LinePrinter , fPrefix(std::move(prefix)) {} - // prints line with prefix on both cout (thread-safe) and output stream + // prints line with prefix on both cout and output stream void Print(const string& line) { - cout << fair::mq::tools::ToString(fPrefix, line, "\n") << flush; + // Serialize: the standard allows concurrent insertion on std::cout, + // but libstdc++ maintains the formatted-output state (e.g. + // ios_base::width) with plain reads and writes that constitute data + // races; the lock also keeps whole lines from interleaving. + static mutex sCoutMutex; + { + lock_guard lock(sCoutMutex); + cout << fair::mq::tools::ToString(fPrefix, line, "\n") << flush; + } fOut << fPrefix << line << endl; } From 32882457c0a14c187073afc87274737cd176da1b Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:12:35 +0200 Subject: [PATCH 02/13] test: fix racy loop-variable capture in SubscriptionThreadSafety - the subscriber threads captured the loop counter by reference while the spawning loop kept incrementing it: a genuine data race - depending on timing, threads could also end up with duplicate subscriber names; capture the counter by value instead --- test/plugin_services/_control.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/plugin_services/_control.cxx b/test/plugin_services/_control.cxx index 498c44a81..ebe8e9ee6 100644 --- a/test/plugin_services/_control.cxx +++ b/test/plugin_services/_control.cxx @@ -156,7 +156,7 @@ TEST_F(PluginServices, SubscriptionThreadSafety) std::array, subscribers> threads; auto id = 0; for (auto& thread : threads) { - thread = std::make_unique([&](){ + thread = std::make_unique([&, id](){ auto const subscriber = fair::mq::tools::ToString("subscriber_", id); for (auto i = 0; i < attempts; ++i) { mServices.SubscribeToDeviceStateChange(subscriber, [](DeviceState){}); From 9a266478facade2ee243a2c67c095968a520bb0c Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:12:35 +0200 Subject: [PATCH 03/13] refactor: compile channel name validation regex only once - the pattern is constant; compiling it on every Validate() call is wasted work and, when channels are validated from multiple threads, needlessly exercises libstdc++'s lazily-populated ctype caches --- fairmq/Channel.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fairmq/Channel.cxx b/fairmq/Channel.cxx index b3e3207c0..5fa049781 100644 --- a/fairmq/Channel.cxx +++ b/fairmq/Channel.cxx @@ -181,7 +181,8 @@ try { // validate channel name smatch m; - if (regex_search(fName, m, regex(R"([^a-zA-Z0-9\-_\[\]#])"))) { + static regex const invalidName(R"([^a-zA-Z0-9\-_\[\]#])"); + if (regex_search(fName, m, invalidName)) { ss << "INVALID"; LOG(debug) << ss.str(); LOG(error) << "channel name contains illegal character: '" << m.str(0) << "', allowed characters are: a-z, A-Z, 0-9, -, _, [, ], #"; From f818c2a6fb763a6d98642e978a7ef0db90e8906a Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:14:15 +0200 Subject: [PATCH 04/13] test: pre-fill libstdc++ ctype caches before threads exist - std::ctype caches narrow()/widen() results per character in plain char arrays of the global classic-locale facet, written without synchronization from header-inlined code (locale_facets.h); two threads exercising an uncached character concurrently (e.g. compiling a std::regex in Channel::Validate) constitute a true data race that ThreadSanitizer rightfully reports - the stores are real and unsynchronized, so a tsan-instrumented libstdc++ cannot help here; instead fill the caches before any thread is spawned, which turns every later access into a pure read - warm the lazily-installed num_put/num_get caches used by stream insertion/extraction as well, via a small format/parse round-trip - wire the warm-up into the gtest runner main() and, via a static initializer, into the test device runner --- test/CMakeLists.txt | 1 + test/helper/LocaleWarmup.h | 63 +++++++++++++++++++++++++++++++++++ test/helper/runTestDevice.cxx | 5 +++ test/runner.cxx.in | 2 ++ 4 files changed, 71 insertions(+) create mode 100644 test/helper/LocaleWarmup.h diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 52721c3ba..779e826dd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,6 +32,7 @@ endif() add_testhelper(runTestDevice SOURCES helper/runTestDevice.cxx + helper/LocaleWarmup.h helper/devices/TestPairLeft.h helper/devices/TestPairRight.h helper/devices/TestPollIn.h diff --git a/test/helper/LocaleWarmup.h b/test/helper/LocaleWarmup.h new file mode 100644 index 000000000..d73dfc42e --- /dev/null +++ b/test/helper/LocaleWarmup.h @@ -0,0 +1,63 @@ +/******************************************************************************** + * Copyright (C) 2026 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH * + * * + * This software is distributed under the terms of the * + * GNU Lesser General Public Licence (LGPL) version 3, * + * copied verbatim in the file "LICENSE" * + ********************************************************************************/ + +#ifndef FAIR_MQ_TEST_LOCALEWARMUP_H +#define FAIR_MQ_TEST_LOCALEWARMUP_H + +#include +#include + +namespace fair::mq::test { + +/// libstdc++'s std::ctype caches narrow()/widen() results per character +/// in plain char arrays of the global classic-locale facet -- by design with +/// unsynchronized writes, emitted from header-inlined code (locale_facets.h). +/// Whenever two threads exercise an uncached character concurrently (e.g. by +/// compiling a std::regex), ThreadSanitizer rightfully reports the write as a +/// data race. The stores are real and unsynchronized, so instrumenting +/// libstdc++ cannot help; instead, fill the caches before any thread exists, +/// which turns every later access into a pure read. +inline void WarmUpLocaleCaches() +{ + auto const& ct = std::use_facet>(std::locale::classic()); + for (int c = 1; c < 256; ++c) { + auto const ch = static_cast(c); + ct.narrow(ch, '\0'); + ct.widen(ch); + } + + // Only the range overload runs _M_narrow_init(), which sets the + // all-cached flag (_M_narrow_ok); the single-char loop above fills the + // cache without it. (widen() needs no equivalent: the single-char + // overload already runs _M_widen_init().) + char from[256]; + char to[256]; + for (int c = 0; c < 256; ++c) { + from[c] = static_cast(c); + } + ct.narrow(from, from + 256, '?', to); + + // num_put/num_get caches used by stream insertion/extraction + std::ostringstream os; + os << 4711 << ' ' << 3.14; + std::istringstream is("42 3.14"); + int i = 0; + double d = 0.; + is >> i >> d; +} + +/// For binaries that do not own main(): define a namespace-scope instance to +/// run the warm-up during static initialization, before main(). +struct LocaleWarmup +{ + LocaleWarmup() { WarmUpLocaleCaches(); } +}; + +} // namespace fair::mq::test + +#endif /* FAIR_MQ_TEST_LOCALEWARMUP_H */ diff --git a/test/helper/runTestDevice.cxx b/test/helper/runTestDevice.cxx index e94300b28..e757938a3 100644 --- a/test/helper/runTestDevice.cxx +++ b/test/helper/runTestDevice.cxx @@ -6,6 +6,7 @@ * copied verbatim in the file "LICENSE" * ********************************************************************************/ +#include "LocaleWarmup.h" #include "devices/TestPairLeft.h" #include "devices/TestPairRight.h" #include "devices/TestPollIn.h" @@ -30,6 +31,10 @@ namespace bpo = boost::program_options; +namespace { +[[maybe_unused]] fair::mq::test::LocaleWarmup const gLocaleWarmup{}; +} + auto addCustomOptions(bpo::options_description& options) -> void { options.add_options() diff --git a/test/runner.cxx.in b/test/runner.cxx.in index 4d4261870..2b67a93a5 100644 --- a/test/runner.cxx.in +++ b/test/runner.cxx.in @@ -10,6 +10,7 @@ #include #include +#include #include @@ -24,6 +25,7 @@ string runTestDevice = "@RUN_TEST_DEVICE@"; int main(int argc, char** argv) { + fair::mq::test::WarmUpLocaleCaches(); ::testing::InitGoogleTest(&argc, argv); ::testing::FLAGS_gtest_death_test_style = "threadsafe"; setenv("FAIRMQ_PATH", FAIRMQ_TEST_ENVIRONMENT, 0); From 2fcb26ffd63cf46562f758c5c9e49a6952c04f88 Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:14:15 +0200 Subject: [PATCH 05/13] build: append linker flags as strings, not lists - CMAKE_EXE_LINKER_FLAGS and CMAKE_SHARED_LINKER_FLAGS are command-line strings; list(APPEND) inserts a semicolon once the variable is non-empty, which splits the link command at the shell level - latent until now because nothing passed linker flags on the cmake command line --- cmake/FairMQProjectSettings.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/FairMQProjectSettings.cmake b/cmake/FairMQProjectSettings.cmake index 62147e11e..ea73c9f56 100644 --- a/cmake/FairMQProjectSettings.cmake +++ b/cmake/FairMQProjectSettings.cmake @@ -54,8 +54,8 @@ endif() list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/${PROJECT_INSTALL_LIBDIR}" isSystemDir) if("${isSystemDir}" STREQUAL "-1") if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - list(APPEND CMAKE_EXE_LINKER_FLAGS "-Wl,--enable-new-dtags") - list(APPEND CMAKE_SHARED_LINKER_FLAGS "-Wl,--enable-new-dtags") + string(APPEND CMAKE_EXE_LINKER_FLAGS " -Wl,--enable-new-dtags") + string(APPEND CMAKE_SHARED_LINKER_FLAGS " -Wl,--enable-new-dtags") list(PREPEND CMAKE_INSTALL_RPATH "$ORIGIN/../${PROJECT_INSTALL_LIBDIR}") elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") list(PREPEND CMAKE_INSTALL_RPATH "@loader_path/../${PROJECT_INSTALL_LIBDIR}") From 1ebc967146b7dd6cb809f95fea45ce8debe3fb3d Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:14:15 +0200 Subject: [PATCH 06/13] test: support an alternative runtime library dir per test - introduce FAIRMQ_TEST_LD_LIBRARY_PATH, which prepends a directory to each test's environment via ctest, so the tests can run against an alternative runtime library (e.g. a tsan-instrumented libstdc++) - LD_LIBRARY_PATH rather than an injected rpath: an rpath added via the linker flags cannot precede the rpath spack's gcc adds through its specs file, so the compiler's own libstdc++ would keep winning the runtime search order - scoped per test on purpose: an instrumented library has unresolved __tsan_* symbols and must not be loaded into uninstrumented tools like cmake, ctest or ninja - fail the configuration instead of silently dropping the injection on CMake < 3.22 (ENVIRONMENT_MODIFICATION) - cover the example tests too; they share the instrumented runtime but not the locale-cache warmup (their main() is the installed public header). The custom-controller env block was dead before: it tested lsan_options, which only ever existed in the add_example() function scope, so the test also never received the LSan suppressions --- CMakeLists.txt | 7 +++++ examples/CMakeLists.txt | 33 +++++++++++++++-------- examples/custom-controller/CMakeLists.txt | 6 +++-- test/CMakeLists.txt | 12 ++++++++- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a4164050..e896a20a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,13 @@ include(FairMQDependencies) # Targets ###################################################################### +if(FAIRMQ_TEST_LD_LIBRARY_PATH AND CMAKE_VERSION VERSION_LESS 3.22) + # The per-test injection relies on ctest's ENVIRONMENT_MODIFICATION, which + # older CMake silently drops -- the tests would run against the default + # runtime while looking green. + message(FATAL_ERROR "FAIRMQ_TEST_LD_LIBRARY_PATH requires CMake >= 3.22") +endif() + if(BUILD_FAIRMQ) add_subdirectory(fairmq) endif() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index cba36c7e3..34bc5bc33 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -12,6 +12,22 @@ set(test_script_prefix "test-ex") set(testsuite "Example") set(transports "zeromq" "shmem") +# Environment for every example test (directory scope, so the subdirectories +# see it too) +set(env_mods) +if(ENABLE_SANITIZER_LEAK AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) + get_filename_component(lsan_supps "${CMAKE_SOURCE_DIR}/test/leak_sanitizer_suppressions.txt" ABSOLUTE) + list(APPEND env_mods "LSAN_OPTIONS=set:suppressions=${lsan_supps}") +endif() +# Run the example tests against an alternative runtime library directory as +# well (see test/CMakeLists.txt). The test scripts only launch instrumented +# binaries (bash itself does not link libstdc++). Note: unlike the +# testsuites, the example binaries get no locale-cache warmup -- their +# main() comes from the installed public header fairmq/runDevice.h. +if(FAIRMQ_TEST_LD_LIBRARY_PATH) + list(APPEND env_mods "LD_LIBRARY_PATH=path_list_prepend:${FAIRMQ_TEST_LD_LIBRARY_PATH}") +endif() + function(add_example) cmake_parse_arguments(PARSE_ARGV 0 ARG "CONFIG;NO_TRANSPORT;NO_TEST" @@ -29,11 +45,6 @@ function(add_example) message(FATAL_ERROR "NAME arg is required") endif() - if(ENABLE_SANITIZER_LEAK AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) - get_filename_component(lsan_supps "${CMAKE_SOURCE_DIR}/test/leak_sanitizer_suppressions.txt" ABSOLUTE) - set(lsan_options "LSAN_OPTIONS=set:suppressions=${lsan_supps}") - endif() - if(ARG_DEVICE) set(exe_targets) foreach(device IN LISTS ARG_DEVICE) @@ -78,8 +89,8 @@ function(add_example) set(test "${testsuite}.${name}.${transport}") add_test(NAME ${test} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${test_script} ${transport}) set_tests_properties(${test} PROPERTIES TIMEOUT "30") - if(lsan_options) - set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION ${lsan_options}) + if(env_mods) + set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION "${env_mods}") endif() else() foreach(transport IN LISTS transports) @@ -88,16 +99,16 @@ function(add_example) set(test "${testsuite}.${name}.${variant}.${transport}") add_test(NAME ${test} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${test_script} ${transport} ${variant}) set_tests_properties(${test} PROPERTIES TIMEOUT "30") - if(lsan_options) - set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION ${lsan_options}) + if(env_mods) + set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION "${env_mods}") endif() endforeach() else() set(test "${testsuite}.${name}.${transport}") add_test(NAME ${test} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${test_script} ${transport}) set_tests_properties(${test} PROPERTIES TIMEOUT "30") - if(lsan_options) - set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION ${lsan_options}) + if(env_mods) + set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION "${env_mods}") endif() endif() endforeach() diff --git a/examples/custom-controller/CMakeLists.txt b/examples/custom-controller/CMakeLists.txt index 6d3ce681d..4ce52d95c 100644 --- a/examples/custom-controller/CMakeLists.txt +++ b/examples/custom-controller/CMakeLists.txt @@ -15,6 +15,8 @@ set_target_properties(${exe} PROPERTIES ENABLE_EXPORTS ON) set(test "${testsuite}.${name}") add_test(NAME ${test} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${exe}) set_tests_properties(${test} PROPERTIES TIMEOUT 30) -if(lsan_options) - set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION ${lsan_options}) +# (the previous `if(lsan_options)` here could never fire -- that variable +# only ever existed in the add_example() function scope) +if(env_mods) + set_tests_properties(${test} PROPERTIES ENVIRONMENT_MODIFICATION "${env_mods}") endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 779e826dd..234472aa4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,9 +24,19 @@ if(definitions) set(definitions DEFINITIONS ${definitions}) endif() +set(test_environment) if(ENABLE_SANITIZER_LEAK) get_filename_component(lsan_supps "${CMAKE_CURRENT_SOURCE_DIR}/leak_sanitizer_suppressions.txt" ABSOLUTE) - set(environment ENVIRONMENT "LSAN_OPTIONS=set:suppressions=${lsan_supps}") + list(APPEND test_environment "LSAN_OPTIONS=set:suppressions=${lsan_supps}") +endif() +# Run the tests against an alternative runtime library directory, e.g. a +# tsan-instrumented libstdc++. Set per test (not at the job level), because +# such a library must only be loaded into instrumented executables. +if(FAIRMQ_TEST_LD_LIBRARY_PATH) + list(APPEND test_environment "LD_LIBRARY_PATH=path_list_prepend:${FAIRMQ_TEST_LD_LIBRARY_PATH}") +endif() +if(test_environment) + set(environment ENVIRONMENT ${test_environment}) endif() add_testhelper(runTestDevice From afbc957356cb9721fbd8d00acd5cf86aa7383dd6 Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:33:59 +0200 Subject: [PATCH 07/13] ci: tolerate cache-satisfied specs in the buildcache pushes - buildcache push expands its selection into the full dependency closure, build-time dependencies included; specs that were satisfied from the buildcache do not have those installed locally, and the push fails with PackageNotInstalledError - both push sites (the early gcc node push and the env-level push) only ever ran in fresh-build scenarios before, so the failure surfaced once the cache was warm - pass --allow-missing to skip what is not installed (a best-effort push of everything that is); a freshly built gcc thus still uploads its build-time dependencies, which a future gcc rebuild can then pull as binaries --- .github/actions/setup-deps/action.yml | 5 ++++- .github/workflows/buildcache.yml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-deps/action.yml b/.github/actions/setup-deps/action.yml index 81b94054e..2a78282d5 100644 --- a/.github/actions/setup-deps/action.yml +++ b/.github/actions/setup-deps/action.yml @@ -56,8 +56,11 @@ runs: # Push the gcc node now, BEFORE `spack compiler find` registers it as an # external -- externals are excluded from `buildcache push`. This is what # lets CI pull gcc from the cache (~1 min) instead of building it (~1 h). + # --allow-missing: when gcc itself was pulled from the cache, its + # build-time dependencies are not installed locally and would + # otherwise fail the push. spack mirror set --oci-username ${{ github.actor }} --oci-password-variable GITHUB_TOKEN ghcr-buildcache - spack buildcache push --unsigned ghcr-buildcache /$gcc_hash + spack buildcache push --unsigned --allow-missing ghcr-buildcache /$gcc_hash fi spack compiler find "$(spack location -i /$gcc_hash)" echo "::endgroup::" diff --git a/.github/workflows/buildcache.yml b/.github/workflows/buildcache.yml index 955eafd5f..ab7179e66 100644 --- a/.github/workflows/buildcache.yml +++ b/.github/workflows/buildcache.yml @@ -50,7 +50,10 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | spack -e fairmq mirror set --oci-username ${{ github.actor }} --oci-password-variable GITHUB_TOKEN ghcr-buildcache - spack -e fairmq buildcache push --unsigned ghcr-buildcache + # --allow-missing: build-time dependencies of specs that were + # satisfied from the buildcache are not installed locally and + # would otherwise fail the whole push. + spack -e fairmq buildcache push --unsigned --allow-missing ghcr-buildcache update-index: if: github.repository == 'FairRootGroup/FairMQ' && !cancelled() From 6bdad7212f0a3ce12298aded9c552a21232dd056 Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:15:31 +0200 Subject: [PATCH 08/13] ci: add spack package for a tsan-instrumented libstdc++ - gcc ships no supported switch to build libstdc++ with -fsanitize=thread, and spack's gcc recipe filters all flags out of the target-library build (CXXFLAGS_FOR_TARGET is owned by its generated --with-build-config=spack makefile), so provide a dedicated libstdcxx-tsan package in a custom repo - build only the libstdc++-v3 subtree from the matching gcc release tarball, configured standalone against the already-installed toolchain (recipe modeled on https://iree.dev/developers/debugging/sanitizers/), instead of rebuilding all of gcc - the result is a drop-in runtime replacement for the compiler's libstdc++ (same soname and symbol versions), to be loaded only by the instrumented test executables - normalize the install layout after make install: the standalone build puts the runtime libraries into the multilib os dir (lib64 on x86_64) regardless of --libdir, and --with-toolexeclibdir only applies to cross builds - register the repo in the setup-deps action before creating the env --- .github/actions/setup-deps/action.yml | 1 + .../packages/libstdcxx_tsan/package.py | 90 +++++++++++++++++++ test/ci/spack_repo/fairmq_ci/repo.yaml | 3 + 3 files changed, 94 insertions(+) create mode 100644 test/ci/spack_repo/fairmq_ci/packages/libstdcxx_tsan/package.py create mode 100644 test/ci/spack_repo/fairmq_ci/repo.yaml diff --git a/.github/actions/setup-deps/action.yml b/.github/actions/setup-deps/action.yml index 2a78282d5..9c7c0eeb9 100644 --- a/.github/actions/setup-deps/action.yml +++ b/.github/actions/setup-deps/action.yml @@ -69,6 +69,7 @@ runs: shell: spack-bash {0} run: | echo "::group::Install dependencies" + spack repo add "$GITHUB_WORKSPACE/test/ci/spack_repo/fairmq_ci" spack env create fairmq test/ci/spack-${{ inputs.env }}.yaml spack -e fairmq add gcc@${{ inputs.gcc }} spack -e fairmq config add "packages:all:require:'%gcc@${{ inputs.gcc }}'" diff --git a/test/ci/spack_repo/fairmq_ci/packages/libstdcxx_tsan/package.py b/test/ci/spack_repo/fairmq_ci/packages/libstdcxx_tsan/package.py new file mode 100644 index 000000000..e91b9a0fb --- /dev/null +++ b/test/ci/spack_repo/fairmq_ci/packages/libstdcxx_tsan/package.py @@ -0,0 +1,90 @@ +import os +import shutil + +from spack_repo.builtin.build_systems.generic import Package + +from spack.package import * + + +class LibstdcxxTsan(Package): + """ThreadSanitizer-instrumented build of GCC's libstdc++. + + GCC ships no supported way to produce a tsan-instrumented libstdc++ (and + spack's gcc package offers no hook into the target-library flags), so this + package builds only the libstdc++-v3 subtree from the GCC release tarball + matching the toolchain, with -fsanitize=thread. The result is a drop-in + runtime replacement for the compiler's own libstdc++ (same soname and + symbol versions) that lets tsan observe the synchronization inside the + C++ standard library (e.g. std::locale/std::regex lazy initialization). + + Select it via LD_LIBRARY_PATH in the environment of the instrumented + executables only. Like any shared library built with gcc + -fsanitize=thread it carries unresolved __tsan_* symbols satisfied by the + executable's libtsan, so loading it into an uninstrumented executable + fails. + + Recipe modeled on https://iree.dev/developers/debugging/sanitizers/. + """ + + homepage = "https://gcc.gnu.org/onlinedocs/libstdc++/" + url = "https://ftp.gnu.org/gnu/gcc/gcc-15.2.0/gcc-15.2.0.tar.xz" + + # Keep in lockstep with the gcc version used by the tsan CI job: a + # libstdc++ older than the compiler may lack GLIBCXX symbol versions + # that binaries built by that compiler reference. Checksums match the + # version() directives in spack's gcc package. + version("15.2.0", sha256="438fd996826b0c82485a29da03a72d71d6e3541a83ec702df4271f6fe025d24e") + version("14.3.0", sha256="e0dc77297625631ac8e50fa92fffefe899a4eb702592da5c32ef04e2293aca3a") + + depends_on("c", type="build") + depends_on("cxx", type="build") + depends_on("gmake", type="build") + + def install(self, spec, prefix): + # gthr-default.h only materializes in a configured libgcc build dir; + # on POSIX targets it is a verbatim copy of gthr-posix.h. Providing it + # up front spares building libgcc (and the in-tree compiler). + gthr_include = join_path(self.stage.source_path, "spack-gthr") + mkdirp(gthr_include) + copy( + join_path(self.stage.source_path, "libgcc", "gthr-posix.h"), + join_path(gthr_include, "gthr-default.h"), + ) + + flags = f"-I{gthr_include} -O2 -g -fno-omit-frame-pointer -fsanitize=thread" + build_dir = join_path(self.stage.source_path, "spack-build") + mkdirp(build_dir) + with working_dir(build_dir): + configure = Executable( + join_path(self.stage.source_path, "libstdc++-v3", "configure") + ) + configure( + f"--prefix={prefix}", + f"--libdir={prefix.lib}", + "--disable-multilib", + "--disable-libstdcxx-pch", + "--enable-libstdcxx-threads=yes", + "--with-default-libstdcxx-abi=new", + f"CFLAGS={flags}", + f"CXXFLAGS={flags}", + "LDFLAGS=-fsanitize=thread", + ) + make() + make("install") + + # The runtime libraries land in the multilib os dir (lib64 on + # x86_64), --libdir notwithstanding (--with-toolexeclibdir only + # applies to cross builds); normalize to lib/ for the consumer. + if os.path.isdir(prefix.lib64): + mkdirp(prefix.lib) + for entry in os.listdir(prefix.lib64): + shutil.move(join_path(prefix.lib64, entry), join_path(prefix.lib, entry)) + os.rmdir(prefix.lib64) + + # Only the runtime library is consumed -- compilation uses the + # compiler's own (identical) headers. Drop the rest to keep the + # buildcache entry small. + for directory in ("include", "share"): + path = join_path(prefix, directory) + if os.path.isdir(path): + shutil.rmtree(path) diff --git a/test/ci/spack_repo/fairmq_ci/repo.yaml b/test/ci/spack_repo/fairmq_ci/repo.yaml new file mode 100644 index 000000000..8d96cd9e8 --- /dev/null +++ b/test/ci/spack_repo/fairmq_ci/repo.yaml @@ -0,0 +1,3 @@ +repo: + namespace: fairmq_ci + api: v2.0 From a2b3dbf291d95110bbc5516ebfb280156e8dd667 Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:15:31 +0200 Subject: [PATCH 09/13] ci: add tsan spack environment with instrumented libzmq - mirror spack-latest.yaml, with -fsanitize=thread on the libzmq and libsodium nodes so tsan can observe the happens-before edges established inside libzmq's lock-free queues, plus the libstdcxx-tsan root spec - flags are applied per node instead of via the propagating '==' operator, which could reach the gcc node and trigger a compiler rebuild - unchanged roots (fairlogger, boost, ninja, cmake) keep their spec hashes, so they are shared with the regular buildcache entries; the instrumented nodes hash differently and coexist in the content-addressed cache - exclude libstdcxx-tsan from concretizer reuse so recipe changes always take effect; unchanged recipes still hit the buildcache because the spec hash is identical - add the tsan env to the buildcache matrix (rebuilding also on spack_repo changes) so the instrumented binaries are cached instead of rebuilt on every CI run --- .github/actions/setup-deps/action.yml | 2 +- .github/workflows/buildcache.yml | 3 ++ test/ci/spack-tsan.yaml | 47 +++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 test/ci/spack-tsan.yaml diff --git a/.github/actions/setup-deps/action.yml b/.github/actions/setup-deps/action.yml index 9c7c0eeb9..298c879b0 100644 --- a/.github/actions/setup-deps/action.yml +++ b/.github/actions/setup-deps/action.yml @@ -6,7 +6,7 @@ inputs: description: 'GCC version to use' required: true env: - description: 'Spack environment name (latest, boost187)' + description: 'Spack environment name (latest, boost187, tsan)' default: 'latest' fresh: description: 'Use fresh concretization' diff --git a/.github/workflows/buildcache.yml b/.github/workflows/buildcache.yml index ab7179e66..f63d38bf8 100644 --- a/.github/workflows/buildcache.yml +++ b/.github/workflows/buildcache.yml @@ -8,6 +8,7 @@ on: branches: [dev, master] paths: - 'test/ci/spack-*.yaml' + - 'test/ci/spack_repo/**' - '.github/workflows/buildcache.yml' - '.github/actions/setup-deps/**' @@ -30,6 +31,8 @@ jobs: include: - gcc: '15' env: 'boost187' + - gcc: '15' + env: 'tsan' steps: - uses: actions/checkout@v6 diff --git a/test/ci/spack-tsan.yaml b/test/ci/spack-tsan.yaml new file mode 100644 index 000000000..f0f019a5d --- /dev/null +++ b/test/ci/spack-tsan.yaml @@ -0,0 +1,47 @@ +spack: + specs: + # fairlogger and boost are left uninstrumented on purpose: no tsan report + # has ever implicated them. Should that change, instrument them with + # per-node flags like libzmq below. + - "fairlogger@2.2.0 ^fmt@:11" + - "boost@1.66: +container +program_options +filesystem +date_time +regex" + # libzmq (and the libsodium it links) are built with ThreadSanitizer + # instrumentation so tsan can observe the happens-before edges that + # libzmq's lock-free queues establish between user and I/O threads. + # Flags are set per node on purpose: the propagating '==' operator could + # reach the gcc node (the compiler is a dependency since spack 1.0) and + # trigger a full compiler rebuild. + - "libzmq@4.1.4: cflags='-g -fno-omit-frame-pointer -fsanitize=thread' + cxxflags='-g -fno-omit-frame-pointer -fsanitize=thread' + ldflags='-fsanitize=thread' + ^libsodium cflags='-g -fno-omit-frame-pointer -fsanitize=thread' + ldflags='-fsanitize=thread'" + # ThreadSanitizer-instrumented libstdc++ (from test/ci/spack_repo), a + # drop-in runtime replacement injected per test via LD_LIBRARY_PATH (see + # FAIRMQ_TEST_LD_LIBRARY_PATH in test/CMakeLists.txt). + # Keep the major version in lockstep with the gcc used by the tsan job. + - "libstdcxx-tsan@15" + - ninja + # the per-test env injection uses ctest's ENVIRONMENT_MODIFICATION, + # which needs cmake >= 3.22 + - "cmake@3.22:" + + concretizer: + targets: + granularity: generic + reuse: + # always re-concretize libstdcxx-tsan from the recipe in + # test/ci/spack_repo: spec reuse would keep resurrecting previously + # built (and cached) variants after the recipe changed; unchanged + # recipes still hit the buildcache because the hash is identical + exclude: ["libstdcxx-tsan"] + + packages: + all: + require: + - target=x86_64_v3 + + mirrors: + ghcr-buildcache: + url: oci://ghcr.io/fairrootgroup/fairmq-spack-buildcache + signed: false From 247c1cecaf39b1a7540c133b1fb4b28ef8a4995a Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:16:30 +0200 Subject: [PATCH 10/13] ci: switch thread-sanitizer job to gcc with instrumented deps - build with the spack gcc toolchain like every other job: no clang++, no -fuse-ld=lld and no lld install step (the GNU BFD failure on tsan objects was specific to clang's tsan runtime) - use gcc 15 for the freshest libtsan runtime; the asan entry stays on gcc 14, so the matrix now carries per-entry gcc and spack env names - consume the tsan spack env and load the instrumented libstdc++ into each test's environment via FAIRMQ_TEST_LD_LIBRARY_PATH (it shares the soname of the compiler's own, so it substitutes process-wide at load time) - use -fno-omit-frame-pointer for readable reports; optimization comes from the project's Debug -Og - verify the wiring: assert the test environment resolves libstdc++ to the instrumented copy and that libzmq is tsan-instrumented, since both failure modes are silent (the suite still passes, with reduced race coverage) --- .github/workflows/ci.yml | 48 +++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7c663c985..cae2bb79e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,15 +75,18 @@ jobs: matrix: sanitizer: - name: asan+lsan+ubsan + gcc: '14' + env: latest options: | ENABLE_SANITIZER_ADDRESS=ON ENABLE_SANITIZER_LEAK=ON ENABLE_SANITIZER_UNDEFINED_BEHAVIOUR=ON cxx-flags: -O1 -fno-omit-frame-pointer - name: tsan + gcc: '15' + env: tsan options: ENABLE_SANITIZER_THREAD=ON - cxx-compiler: clang++ - cxx-flags: -fuse-ld=lld + cxx-flags: -fno-omit-frame-pointer steps: - uses: actions/checkout@v6 @@ -95,7 +98,8 @@ jobs: - name: Setup spack environment uses: ./.github/actions/setup-deps with: - gcc: '14' + gcc: ${{ matrix.sanitizer.gcc }} + env: ${{ matrix.sanitizer.env }} - name: ccache uses: hendrikmuhs/ccache-action@v1 @@ -103,23 +107,55 @@ jobs: key: ${{ github.job }}-${{ matrix.sanitizer.name }} max-size: 500M - - name: Install lld + - name: Locate instrumented libstdc++ if: matrix.sanitizer.name == 'tsan' - run: sudo apt-get update && sudo apt-get install -y lld + shell: spack-bash {0} + # The test processes must load the tsan-instrumented libstdc++ + # instead of the compiler's own (same soname, LD_LIBRARY_PATH beats + # the RUNPATH). Set per test via ctest, not at the job level: like + # any shared library built with gcc -fsanitize=thread it has + # unresolved __tsan_* symbols, so loading it into uninstrumented + # tools (cmake, ctest, ninja) would break them. + run: | + prefix=$(spack -e fairmq location -i libstdcxx-tsan) + echo "test_library_path=$prefix/lib" >> $GITHUB_ENV - name: Configure and Build uses: threeal/cmake-action@v2 with: generator: Ninja - cxx-compiler: ${{ matrix.sanitizer.cxx-compiler }} cxx-flags: ${{ matrix.sanitizer.cxx-flags }} options: | CMAKE_BUILD_TYPE=Debug BUILD_TESTING=ON CMAKE_C_COMPILER_LAUNCHER=ccache CMAKE_CXX_COMPILER_LAUNCHER=ccache + FAIRMQ_TEST_LD_LIBRARY_PATH=${{ env.test_library_path }} ${{ matrix.sanitizer.options }} + - name: Verify tsan instrumentation wiring + if: matrix.sanitizer.name == 'tsan' + shell: spack-bash {0} + run: | + set -x + # the test environment must resolve libstdc++ to the instrumented copy + LD_LIBRARY_PATH=$test_library_path ldd build/test/testsuite_Channel \ + | grep 'libstdc++' | tee /dev/stderr | grep -q libstdcxx-tsan + # libzmq must be instrumented + nm -D --undefined-only "$(spack -e fairmq location -i libzmq)/lib/libzmq.so" \ + | grep -q __tsan_ + # the instrumented libstdc++ must match the compiler release exactly, + # or binaries may reference GLIBCXX versions the runtime lacks + # (--color=never: the CI config forces SPACK_COLOR=always, which + # would wrap the version in ANSI escapes) + test "$(spack --color=never -e fairmq find --format '{version}' libstdcxx-tsan)" \ + = "$(g++ -dumpfullversion)" + # the LD_LIBRARY_PATH prepend must reach the registered tests + # (via a file: grep -q quits on first match, and SIGPIPE on the + # large json output would fail the step under pipefail) + ctest --test-dir build --show-only=json-v1 > ctest-show-only.json + grep -q libstdcxx-tsan ctest-show-only.json + - name: Test run: | # Region/segment tests mlock() shared memory; raise the locked-memory From f22baa74dc583154f7046c453559a34d45aed30a Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 16:16:30 +0200 Subject: [PATCH 11/13] test: drop thread-sanitizer suppressions - every entry stood in for a library tsan could not see into; with libzmq, libsodium and libstdc++ now tsan-instrumented in the tsan CI job, the happens-before edges they establish are visible and nothing is left to suppress - suppressions were blunt (a race: entry matches any frame in the stack), so they could also mask real races passing through those frames --- .github/workflows/ci.yml | 2 -- test/thread_sanitizer_suppressions.txt | 32 -------------------------- 2 files changed, 34 deletions(-) delete mode 100644 test/thread_sanitizer_suppressions.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cae2bb79e..e4321703e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,8 +68,6 @@ jobs: if: github.repository == 'FairRootGroup/FairMQ' name: ${{ matrix.sanitizer.name }} runs-on: ubuntu-24.04 - env: - TSAN_OPTIONS: suppressions=${{ github.workspace }}/test/thread_sanitizer_suppressions.txt strategy: fail-fast: false matrix: diff --git a/test/thread_sanitizer_suppressions.txt b/test/thread_sanitizer_suppressions.txt deleted file mode 100644 index 153fbba79..000000000 --- a/test/thread_sanitizer_suppressions.txt +++ /dev/null @@ -1,32 +0,0 @@ -# ThreadSanitizer suppressions for FairMQ. -# -# libzmq is not built with -fsanitize=thread, so tsan cannot observe the -# happens-before that its inproc/socket queues establish between the user -# threads and libzmq's I/O threads. This produces false-positive data races -# on message buffers handed across that boundary. The proper fix is to build -# libzmq with tsan as well; until then these races are suppressed. - -# Racy access made directly from libzmq (encoder/decoder memcpy, -# signaler/epoll, recv). -called_from_lib:libzmq.so - -# Zero-copy message deleters that libzmq runs from msg_t::close once it is -# done with a buffer; the happens-before with the buffer's producer runs -# through libzmq's queue. -race:zmq::msg_t::close -race:fair::mq::TransportFactory::SimpleMsgCleanup - -# shmem message metadata read on the user side after delivery through libzmq -# (the shmem transport ships its metadata over a zeromq channel). -race:fair::mq::shmem::Message::Message -race:fair::mq::shmem::Socket::Receive -race:fair::mq::shmem::ShmHeader - -# Multipart message buffers written by the sending thread and read back by the -# receiving thread after delivery through libzmq's inproc queue; the -# happens-before runs through that non-instrumented queue. -race:RunMultiThreadedMultipart - -# std::regex / std::locale facet lazy-init races inside libstdc++. -race:fair::mq::Channel::Validate -race:fair::mq::tools::ToString From 1603dc5930c95e09c17e2cf6f472a41876131783 Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 18:43:13 +0200 Subject: [PATCH 12/13] ci: fix UBSan option spelling so it takes effect - ENABLE_SANITIZER_UNDEFINED_BEHAVIOUR never matched the CMake option ENABLE_SANITIZER_UNDEFINED_BEHAVIOR (cmake/FairMQProjectSettings.cmake), so the asan+lsan+ubsan job has never actually enabled UBSan --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e4321703e..3e6f52d9c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,7 +78,7 @@ jobs: options: | ENABLE_SANITIZER_ADDRESS=ON ENABLE_SANITIZER_LEAK=ON - ENABLE_SANITIZER_UNDEFINED_BEHAVIOUR=ON + ENABLE_SANITIZER_UNDEFINED_BEHAVIOR=ON cxx-flags: -O1 -fno-omit-frame-pointer - name: tsan gcc: '15' From 6cca61c67f645eb4c247eafb50a38dc253f7676e Mon Sep 17 00:00:00 2001 From: Dennis Klein Date: Wed, 10 Jun 2026 19:22:55 +0200 Subject: [PATCH 13/13] ci: pin third-party actions to release commit SHAs - hendrikmuhs/ccache-action: the v1 major tag also lagged behind the latest release (v1.2.20 instead of v1.2.23), so this is a bump too - threeal/cmake-action: v2 == v2.1.0, pinned as-is --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3e6f52d9c..0617f3ce5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,13 +37,13 @@ jobs: env: ${{ matrix.env }} - name: ccache - uses: hendrikmuhs/ccache-action@v1 + uses: hendrikmuhs/ccache-action@d62db5f07c26379fc4b4e0916f098a92573c3b03 # v1.2.23 with: key: ${{ github.job }}-${{ matrix.env }}-gcc${{ matrix.gcc }} max-size: 500M - name: Configure and Build - uses: threeal/cmake-action@v2 + uses: threeal/cmake-action@725d1314ccf9ea922805d7e3f9d9bcbca892b406 # v2.1.0 with: generator: Ninja options: | @@ -100,7 +100,7 @@ jobs: env: ${{ matrix.sanitizer.env }} - name: ccache - uses: hendrikmuhs/ccache-action@v1 + uses: hendrikmuhs/ccache-action@d62db5f07c26379fc4b4e0916f098a92573c3b03 # v1.2.23 with: key: ${{ github.job }}-${{ matrix.sanitizer.name }} max-size: 500M @@ -119,7 +119,7 @@ jobs: echo "test_library_path=$prefix/lib" >> $GITHUB_ENV - name: Configure and Build - uses: threeal/cmake-action@v2 + uses: threeal/cmake-action@725d1314ccf9ea922805d7e3f9d9bcbca892b406 # v2.1.0 with: generator: Ninja cxx-flags: ${{ matrix.sanitizer.cxx-flags }} @@ -180,7 +180,7 @@ jobs: gcc: '14' - name: ccache - uses: hendrikmuhs/ccache-action@v1 + uses: hendrikmuhs/ccache-action@d62db5f07c26379fc4b4e0916f098a92573c3b03 # v1.2.23 with: key: ${{ github.job }} max-size: 500M