From eb8b64af302e591cea95da9e6271f7653d6ca84d Mon Sep 17 00:00:00 2001 From: Ashmit JaiSarita Gupta Date: Fri, 5 Jun 2026 01:23:19 +0530 Subject: [PATCH 01/10] feat: add Qureg checkpointing via ADIOS2 (#747) Adds saveQuregToFile() and createQuregFromFile() to write a Qureg to disk and restore it later, behind the optional CMake flag ENABLE_CHECKPOINTING (which requires ADIOS2). The file records only the Qureg dimension (numQubits, isDensityMatrix) and its amplitudes - never the incidental deployment fields, nor derivable fields like numAmps - so a Qureg may be restored under a different deployment than it was saved with. Amplitudes are written as an ADIOS2 global array of interleaved (real, imag) reals, with each node contributing only its local slice, so the implementation streams without excessive memory and is distributed- and GPU-ready: GPU state is synced to host before writing and back after reading, and the global-array selection lets any node count read back its own portion. Also adds a validation error when the API is called in a build without checkpointing, reports isCheckpointingCompiled in the environment info (alongside isOmpCompiled, isGpuCompiled, etc), a guarded Catch2 test (tests/unit/checkpoint.cpp) exercising statevector and density-matrix round-trips, and documents the build flag in docs/compile.md. --- CMakeLists.txt | 11 ++++ docs/compile.md | 27 +++++++++ quest/include/qureg.h | 45 ++++++++++++++ quest/src/api/environment.cpp | 23 ++++++-- quest/src/api/qureg.cpp | 107 ++++++++++++++++++++++++++++++++++ quest/src/core/validation.cpp | 35 +++++++++++ quest/src/core/validation.hpp | 4 ++ tests/CMakeLists.txt | 4 ++ tests/unit/CMakeLists.txt | 1 + tests/unit/checkpoint.cpp | 88 ++++++++++++++++++++++++++++ 10 files changed, 339 insertions(+), 6 deletions(-) create mode 100644 tests/unit/checkpoint.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b5a438713..72093fe49 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -542,6 +542,17 @@ if (QUEST_ENABLE_CUQUANTUM) endif() +# Checkpointing (ADIOS2) +option(ENABLE_CHECKPOINTING "Enable Qureg checkpointing (saveQuregToFile / createQuregFromFile) via ADIOS2. Turned OFF by default." OFF) +if (ENABLE_CHECKPOINTING) + find_package(adios2 REQUIRED) + target_link_libraries(QuEST PRIVATE adios2::cxx) + target_compile_definitions(QuEST PRIVATE ENABLE_CHECKPOINTING=1) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) + message(STATUS "Qureg checkpointing is turned ON (via ADIOS2).") +endif() + + # =============================== # Set options to save in config.h diff --git a/docs/compile.md b/docs/compile.md index ba4306a85..56157ce72 100644 --- a/docs/compile.md +++ b/docs/compile.md @@ -689,3 +689,30 @@ Note that distributed executables are launched in a distinct way to the other de > - UCX > - launch flags > - checking via reportenv + + + + +------------------ + + +## Checkpointing + +QuEST can optionally _checkpoint_ a `Qureg` to disk; writing its state to a file with `saveQuregToFile()`, to later be restored into a new `Qureg` with `createQuregFromFile()`. This is useful for long-running jobs which risk timeout or failure - an evolving `Qureg` can be periodically saved and resumed in a subsequent process. The file records only the `Qureg` dimension (the number of qubits, and whether it is a density matrix) and its amplitudes; never the incidental deployment configuration. A `Qureg` saved by one deployment (say, distributed over `8` nodes) can therefore be restored by any other (say, a single GPU-accelerated node). + +Checkpointing is built upon [ADIOS2](https://github.com/ornladios/ADIOS2) and is _disabled_ by default. To enable it, install ADIOS2 and specify `ENABLE_CHECKPOINTING` at configuration: +```bash +# configure +cmake .. -D ENABLE_CHECKPOINTING=ON + +# build +cmake --build . --parallel +``` + +> [!IMPORTANT] +> ADIOS2 must be discoverable by CMake. If it was installed to a non-standard location (such as `~/.local`), pass its prefix via `CMAKE_PREFIX_PATH`: +> ```bash +> cmake .. -D ENABLE_CHECKPOINTING=ON -D CMAKE_PREFIX_PATH=$HOME/.local +> ``` + +Calling `saveQuregToFile()` or `createQuregFromFile()` in a build _without_ checkpointing enabled throws a validation error. diff --git a/quest/include/qureg.h b/quest/include/qureg.h index 4ff4c5627..042bf5676 100644 --- a/quest/include/qureg.h +++ b/quest/include/qureg.h @@ -488,6 +488,51 @@ void getDensityQuregAmps(qcomp** outAmps, Qureg qureg, qindex startRow, qindex s /** @} */ + +/** + * @defgroup qureg_checkpoint Checkpointing + * @brief Functions for saving a Qureg to file and restoring it later. + * @details These functions are only available when QuEST is compiled with + * checkpointing support (CMake variable @c ENABLE_CHECKPOINTING=ON), + * which additionally requires the ADIOS2 library. Calling them in a + * build without checkpointing support throws a validation error. + * @{ + */ + + +/** Writes the contents of @p qureg to the file @p fn, so that it may later be + * restored with createQuregFromFile(). The file records only the @p qureg + * dimension (number of qubits and whether it is a density matrix) and its full + * set of amplitudes; incidental deployment information (e.g. multithreading, + * GPU-acceleration, distribution) is not recorded. + * + * @param[in] qureg the Qureg to write to disk. + * @param[in] fn the output file path. + * @notyetdoced + * @notyettested + * @see + * - createQuregFromFile() to restore a Qureg saved by this function. + */ +void saveQuregToFile(Qureg qureg, const char* fn); + + +/** Creates a new Qureg from a file previously written by saveQuregToFile(), + * with automatically chosen deployments (independent of those used when the + * file was saved), and populates it with the stored amplitudes. + * + * @param[in] fn the input file path. + * @returns A new Qureg instance matching the saved dimension and amplitudes. + * @notyetdoced + * @notyettested + * @see + * - saveQuregToFile() to create a file readable by this function. + */ +Qureg createQuregFromFile(const char* fn); + + +/** @} */ + + // end de-mangler #ifdef __cplusplus } diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp index c59334b55..10ffc44d6 100644 --- a/quest/src/api/environment.cpp +++ b/quest/src/api/environment.cpp @@ -204,16 +204,27 @@ void printPrecisionInfo() { } +// reports whether QuEST was compiled with Qureg checkpointing support (ADIOS2) +static bool isCheckpointingCompiled() { +#ifdef ENABLE_CHECKPOINTING + return true; +#else + return false; +#endif +} + + void printCompilationInfo() { print_table( "compilation", { - {"isOmpCompiled", cpu_isOpenmpCompiled()}, - {"isMpiCompiled", comm_isMpiCompiled()}, - {"isMpiSubCommCompiled", comm_isMpiSubCommCompiled()}, - {"isGpuCompiled", gpu_isGpuCompiled()}, - {"isHipCompiled", gpu_isHipCompiled()}, - {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, + {"isOmpCompiled", cpu_isOpenmpCompiled()}, + {"isMpiCompiled", comm_isMpiCompiled()}, + {"isMpiSubCommCompiled", comm_isMpiSubCommCompiled()}, + {"isGpuCompiled", gpu_isGpuCompiled()}, + {"isHipCompiled", gpu_isHipCompiled()}, + {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, + {"isCheckpointingCompiled", isCheckpointingCompiled()}, }); } diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index 84bcd2bd0..db5350a64 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -25,6 +25,10 @@ #include #include +#ifdef ENABLE_CHECKPOINTING +#include +#endif + using std::string; using std::vector; @@ -560,3 +564,106 @@ vector> getDensityQuregAmps(Qureg qureg, qindex startRow, qindex s getDensityQuregAmps(ptrs.data(), qureg, startRow, startCol, numRows, numCols); return out; } + + + +/* + * CHECKPOINTING + * + * which is compiled only when ENABLE_CHECKPOINTING=ON (requiring ADIOS2). + * The API functions are always defined so that the validation layer can throw + * a clear error in non-checkpointing builds, rather than failing to link. + */ + + +void saveQuregToFile(Qureg qureg, const char* fn) { + validate_quregCheckpointingIsCompiled(__func__); + +#ifdef ENABLE_CHECKPOINTING + validate_quregFields(qureg, __func__); + + // ensure the CPU amplitudes reflect any GPU-resident state before writing + syncQuregFromGpu(qureg); + + adios2::ADIOS adios; + adios2::IO io = adios.DeclareIO("QuESTQuregSave"); + adios2::Engine engine = io.Open(fn, adios2::Mode::Write); + + // global single-value metadata; we deliberately record only the dimension + // and precision, never incidental deployment fields (the loader chooses its + // own deployment) nor derivable fields (like numAmps) + adios2::Variable vNumQubits = io.DefineVariable("numQubits"); + adios2::Variable vIsDensMatr = io.DefineVariable("isDensityMatrix"); + adios2::Variable vQrealBytes = io.DefineVariable("qrealBytes"); + + // amplitudes are stored as interleaved (real, imag) reals to stay agnostic + // to precision and to ADIOS2's complex-type support; each node writes only + // its local slice into the global array, avoiding excessive memory use + qindex globalReals = 2 * qureg.numAmps; + qindex localReals = 2 * qureg.numAmpsPerNode; + qindex startReal = 2 * ((qindex) qureg.rank) * qureg.numAmpsPerNode; + adios2::Variable vAmps = io.DefineVariable( + "amps", + { (size_t) globalReals }, + { (size_t) startReal }, + { (size_t) localReals }); + + int qrealBytes = (int) sizeof(qreal); + + engine.BeginStep(); + engine.Put(vNumQubits, qureg.numQubits); + engine.Put(vIsDensMatr, qureg.isDensityMatrix); + engine.Put(vQrealBytes, qrealBytes); + engine.Put(vAmps, reinterpret_cast(qureg.cpuAmps)); + engine.EndStep(); + engine.Close(); +#endif +} + + +Qureg createQuregFromFile(const char* fn) { + validate_quregCheckpointingIsCompiled(__func__); + +#ifdef ENABLE_CHECKPOINTING + adios2::ADIOS adios; + adios2::IO io = adios.DeclareIO("QuESTQuregLoad"); + adios2::Engine engine = io.Open(fn, adios2::Mode::Read); + + engine.BeginStep(); + + // read dimension + precision metadata first, so we can size the new Qureg + int numQubits = 0; + int isDensMatr = 0; + int fileQrealBytes = 0; + engine.Get(io.InquireVariable("numQubits"), numQubits); + engine.Get(io.InquireVariable("isDensityMatrix"), isDensMatr); + engine.Get(io.InquireVariable("qrealBytes"), fileQrealBytes); + engine.PerformGets(); + + validate_quregFileMatchesPrecision(fileQrealBytes, __func__); + + // create a matching-dimension Qureg with automatically chosen deployments, + // independent of those used when the file was saved + Qureg qureg = (isDensMatr)? + createDensityQureg(numQubits) : + createQureg(numQubits); + + // read only this node's slice of the global amplitude array into its buffer + qindex localReals = 2 * qureg.numAmpsPerNode; + qindex startReal = 2 * ((qindex) qureg.rank) * qureg.numAmpsPerNode; + adios2::Variable vAmps = io.InquireVariable("amps"); + vAmps.SetSelection({ { (size_t) startReal }, { (size_t) localReals } }); + engine.Get(vAmps, reinterpret_cast(qureg.cpuAmps)); + + engine.EndStep(); + engine.Close(); + + // propagate the restored CPU amplitudes to the GPU, if deployed + syncQuregToGpu(qureg); + + return qureg; +#else + // unreachable: the validation above always throws in non-checkpointing builds + return Qureg{}; +#endif +} diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index 62ff93166..fb7a6d583 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -277,6 +277,12 @@ namespace report { string QUREG_NOT_STATE_VECTOR = "Expected a statevector Qureg but received a density matrix."; + string QUREG_CHECKPOINTING_NOT_COMPILED = + "Qureg checkpointing (saveQuregToFile and createQuregFromFile) requires QuEST to be compiled with checkpointing support. Reconfigure with the CMake option -DENABLE_CHECKPOINTING=ON, which additionally requires the ADIOS2 library."; + + string QUREG_FILE_PRECISION_MISMATCH = + "The checkpoint file was written with a qreal precision of ${FILE_BYTES} bytes, but this QuEST build uses ${EXEC_BYTES} bytes. A Qureg can only be restored by a QuEST build using the same floating-point precision (QUEST_FLOAT_PRECISION) as the build which saved it."; + /* * MUTABLE OBJECT FLAGS @@ -1990,6 +1996,35 @@ void validate_quregIsDensityMatrix(Qureg qureg, const char* caller) { assertThat(qureg.isDensityMatrix, report::QUREG_NOT_DENSITY_MATRIX, caller); } +void validate_quregCheckpointingIsCompiled(const char* caller) { + + if (!global_isValidationEnabled) + return; + + // this validation must fire regardless of ENABLE_CHECKPOINTING, so the user + // receives a clear error (rather than a linker error) when calling the + // checkpointing API in a build which did not compile it + #ifdef ENABLE_CHECKPOINTING + bool isCompiled = true; + #else + bool isCompiled = false; + #endif + + assertThat(isCompiled, report::QUREG_CHECKPOINTING_NOT_COMPILED, caller); +} + +void validate_quregFileMatchesPrecision(int fileQrealBytes, const char* caller) { + + if (!global_isValidationEnabled) + return; + + tokenSubs vars = { + {"${FILE_BYTES}", fileQrealBytes}, + {"${EXEC_BYTES}", (int) sizeof(qreal)}}; + + assertThat(fileQrealBytes == (int) sizeof(qreal), report::QUREG_FILE_PRECISION_MISMATCH, vars, caller); +} + /* diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp index 87f81a0d6..e8eb7306d 100644 --- a/quest/src/core/validation.hpp +++ b/quest/src/core/validation.hpp @@ -137,6 +137,10 @@ void validate_quregIsStateVector(Qureg qureg, const char* caller); void validate_quregIsDensityMatrix(Qureg qureg, const char* caller); +void validate_quregCheckpointingIsCompiled(const char* caller); + +void validate_quregFileMatchesPrecision(int fileQrealBytes, const char* caller); + /* diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4d5050e51..7ddcafee8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,10 @@ add_executable(tests target_link_libraries(tests PRIVATE QuEST::QuEST Catch2::Catch2) target_compile_features(tests PUBLIC cxx_std_20) +if (ENABLE_CHECKPOINTING) + target_compile_definitions(tests PRIVATE ENABLE_CHECKPOINTING=1) +endif() + if (QUEST_ENABLE_MPI AND QUEST_ENABLE_SUBCOMM) target_link_libraries(tests PRIVATE MPI::MPI_CXX) endif() diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 59341759f..4e06fac9d 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -4,6 +4,7 @@ target_sources(tests PUBLIC calculations.cpp channels.cpp + checkpoint.cpp debug.cpp decoherence.cpp environment.cpp diff --git a/tests/unit/checkpoint.cpp b/tests/unit/checkpoint.cpp new file mode 100644 index 000000000..8326e62e1 --- /dev/null +++ b/tests/unit/checkpoint.cpp @@ -0,0 +1,88 @@ +/** @file + * Unit tests of Qureg checkpointing (saveQuregToFile / createQuregFromFile). + * + * These tests are only compiled when QuEST is built with the CMake option + * -DENABLE_CHECKPOINTING=ON (which additionally requires the ADIOS2 library). + * + * @author Ashmit JaiSarita Gupta + * + * @defgroup unitcheckpoint Checkpointing + * @ingroup unittests + */ + +#include "quest.h" + +#ifdef ENABLE_CHECKPOINTING + +#include + +#include +#include +#include +#include +#include + +namespace { + + const char* SV_FILE = "test_checkpoint_statevector.bp"; + const char* DM_FILE = "test_checkpoint_densitymatrix.bp"; + + qreal maxStatevectorAmpDiff(Qureg a, Qureg b) { + qreal m = 0; + for (qindex i = 0; i < a.numAmps; i++) + m = std::max(m, std::abs(getQuregAmp(a, i) - getQuregAmp(b, i))); + return m; + } + + qreal maxDensityMatrixAmpDiff(Qureg a, Qureg b) { + qreal m = 0; + qindex dim = (qindex) 1 << a.numQubits; + for (qindex r = 0; r < dim; r++) + for (qindex c = 0; c < dim; c++) + m = std::max(m, std::abs(getDensityQuregAmp(a, r, c) - getDensityQuregAmp(b, r, c))); + return m; + } +} + +TEST_CASE( "saveQuregToFile and createQuregFromFile", "[checkpoint]" ) { + + SECTION( "statevector round-trip preserves dimension and amplitudes" ) { + + Qureg q = createQureg(6); + initRandomPureState(q); + + saveQuregToFile(q, SV_FILE); + Qureg r = createQuregFromFile(SV_FILE); + + CHECK( r.numQubits == q.numQubits ); + CHECK( r.isDensityMatrix == q.isDensityMatrix ); + CHECK( maxStatevectorAmpDiff(q, r) < 1e-12 ); + + destroyQureg(q); + destroyQureg(r); + std::filesystem::remove_all(SV_FILE); + } + + SECTION( "density-matrix round-trip preserves dimension and amplitudes" ) { + + Qureg q = createDensityQureg(4); + initZeroState(q); + for (int t = 0; t < q.numQubits; t++) + applyHadamard(q, t); + applyT(q, 0); + applyControlledPauliX(q, 0, 1); + + saveQuregToFile(q, DM_FILE); + Qureg r = createQuregFromFile(DM_FILE); + + CHECK( r.numQubits == q.numQubits ); + CHECK( r.isDensityMatrix == q.isDensityMatrix ); + CHECK( maxDensityMatrixAmpDiff(q, r) < 1e-12 ); + + destroyQureg(q); + destroyQureg(r); + std::filesystem::remove_all(DM_FILE); + } +} + +#endif // ENABLE_CHECKPOINTING From fdec5e1cc5a51b524e87eca9d7eb439e645eb48e Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Thu, 4 Jun 2026 21:46:34 -0400 Subject: [PATCH 02/10] Adding adios2 download to CMake --- CMakeLists.txt | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 72093fe49..606069a18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -542,17 +542,58 @@ if (QUEST_ENABLE_CUQUANTUM) endif() + +### DRAFT BELOW + + # Checkpointing (ADIOS2) option(ENABLE_CHECKPOINTING "Enable Qureg checkpointing (saveQuregToFile / createQuregFromFile) via ADIOS2. Turned OFF by default." OFF) if (ENABLE_CHECKPOINTING) - find_package(adios2 REQUIRED) - target_link_libraries(QuEST PRIVATE adios2::cxx) - target_compile_definitions(QuEST PRIVATE ENABLE_CHECKPOINTING=1) - set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) - message(STATUS "Qureg checkpointing is turned ON (via ADIOS2).") + + find_package(adios2 QUIET) + + if(NOT adios2_FOUND) + message(STATUS "adios2 not found: fetching ADIOS2 via FetchContent") + + include(FetchContent) + FetchContent_Declare( + adios2 + GIT_REPOSITORY https://github.com/ornladios/ADIOS2.git + GIT_TAG v2.12.1 + ) + + # Forego MPI and CUDA if QuEST won't use + set(ADIOS2_USE_MPI ${QUEST_ENABLE_MPI} CACHE BOOL "" FORCE) + set(ADIOS2_USE_CUDA ${QUEST_ENABLE_CUDA} CACHE BOOL "" FORCE) + + # Forego unused facilities + set(ADIOS2_BUILD_TESTING OFF CACHE BOOL "" FORCE) + set(ADIOS2_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SODIUM OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Fortran OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_HDF5 OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_ZeroMQ OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SST OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_BZip2 OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Blosc OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SZ OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_ZFP OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_PNG OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Profiling OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Python OFF CACHE BOOL "" FORCE) + + FetchContent_MakeAvailable(adios2) + + else() + # force failure (see Oliver's Catch2 trick) + find_package(adios2 REQUIRED) + endif() endif() +### DRAFT ABOVE + + # =============================== # Set options to save in config.h From acfd1872a28cb134323f440bef0291d0b82bbec6 Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Thu, 4 Jun 2026 22:00:47 -0400 Subject: [PATCH 03/10] Trigger checkpoint tests --- .github/workflows/compile.yml | 2 ++ .github/workflows/test_free.yml | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index c86de84f1..23087911d 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -67,6 +67,7 @@ jobs: cuda: [ON, OFF] hip: [ON, OFF] cuquantum: [ON, OFF] + adios2: [ON, OFF] mpilib: ['', 'mpich', 'ompi', 'impi', 'msmpi'] # disable deprecated API on MSVC, and assign unique compilers, @@ -249,6 +250,7 @@ jobs: -DQUEST_ENABLE_CUDA=${{ matrix.cuda }} -DQUEST_ENABLE_HIP=${{ matrix.hip }} -DQUEST_ENABLE_CUQUANTUM=${{ matrix.cuquantum }} + -DENABLE_CHECKPOINTING=${{ matrix.adios2 }} -DCMAKE_CUDA_ARCHITECTURES=${{ env.cuda_arch }} -DCMAKE_HIP_ARCHITECTURES=${{ env.hip_arch }} -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} diff --git a/.github/workflows/test_free.yml b/.github/workflows/test_free.yml index 2d332e842..7d6ab8642 100644 --- a/.github/workflows/test_free.yml +++ b/.github/workflows/test_free.yml @@ -41,7 +41,7 @@ jobs: # we will compile QuEST with all precisions but no parallelisation matrix: os: [ubuntu-latest, macos-latest, windows-latest] - version: [3, 4] + version: [4] # [3, 4] precision: [1, 2, 4] # MSVC cannot compile deprecated v3 tests @@ -68,6 +68,7 @@ jobs: -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }} -DQUEST_DISABLE_DEPRECATION_WARNINGS=${{ matrix.version == 3 && 'ON' || 'OFF' }} -DQUEST_FLOAT_PRECISION=${{ matrix.precision }} + -DENABLE_CHECKPOINTING=ON # force 'Release' build (needed by MSVC to enable optimisations) - name: Compile @@ -78,9 +79,13 @@ jobs: # TODO: # ctest currently doesn't know of our Catch2 tags, so we # are manually excluding each integration test by name + + # DEBUG: + # runining ONLY the checkpoint flags + - name: Run v4 tests if: ${{ matrix.version == 4 }} - run: ctest -j2 --output-on-failure --schedule-random -C Release -E "density evolution" + run: ctest -j2 --output-on-failure --schedule-random -C Release -E "density evolution" -R "saveQuregToFile" working-directory: ${{ env.build_dir }} # run v3 unit tests in random order From adf6427c92bd89d7aa46d201efb5cd05dc90ee31 Mon Sep 17 00:00:00 2001 From: Ashmit JaiSarita Gupta Date: Fri, 5 Jun 2026 23:00:15 +0000 Subject: [PATCH 04/10] fix: route checkpointing flag through config.h (QUEST_COMPILE_CHECKPOINTING) QuEST defines all compile-time feature macros centrally in config.h (generated from config.h.in). The checkpointing flag was instead passed as a raw target_compile_definitions, so validation.cpp (which doesn't include config.h) saw it undefined and always reported 'not compiled' under the project's normal build path. Add #cmakedefine01 QUEST_COMPILE_CHECKPOINTING to config.h.in, set it from the ENABLE_CHECKPOINTING option, link ADIOS2 to the QuEST target, and switch the sources/tests to #include config.h + #if QUEST_COMPILE_CHECKPOINTING. Remove the per-target compile-definition hacks. Verified: ON build -> config.h has =1 and tests/tests '[checkpoint]' passes (CPU, CPU+OMP); default OFF build has =0 and compiles without ADIOS2. --- CMakeLists.txt | 7 ++++--- quest/include/config.h.in | 4 ++++ quest/src/api/environment.cpp | 3 ++- quest/src/api/qureg.cpp | 7 ++++--- quest/src/core/validation.cpp | 7 ++++--- tests/CMakeLists.txt | 4 ---- tests/unit/checkpoint.cpp | 4 ++-- 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 606069a18..2418ba1cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -588,10 +588,10 @@ if (ENABLE_CHECKPOINTING) # force failure (see Oliver's Catch2 trick) find_package(adios2 REQUIRED) endif() -endif() - -### DRAFT ABOVE + target_link_libraries(QuEST PRIVATE adios2::cxx) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) +endif() @@ -605,6 +605,7 @@ set(QUEST_COMPILE_OMP ${QUEST_ENABLE_OMP}) set(QUEST_COMPILE_MPI ${QUEST_ENABLE_MPI}) set(QUEST_COMPILE_SUBCOMM ${QUEST_ENABLE_SUBCOMM}) set(QUEST_COMPILE_CUQUANTUM ${QUEST_ENABLE_CUQUANTUM}) +set(QUEST_COMPILE_CHECKPOINTING ${ENABLE_CHECKPOINTING}) set(QUEST_INCLUDE_DEPRECATED_FUNCTIONS ${QUEST_ENABLE_DEPRECATED_API}) diff --git a/quest/include/config.h.in b/quest/include/config.h.in index 1bb8a0470..ef40e4e91 100644 --- a/quest/include/config.h.in +++ b/quest/include/config.h.in @@ -41,6 +41,7 @@ defined(QUEST_COMPILE_CUDA) || \ defined(QUEST_COMPILE_HIP) || \ defined(QUEST_COMPILE_CUQUANTUM) || \ + defined(QUEST_COMPILE_CHECKPOINTING) || \ defined(QUEST_ENABLE_NUMA) || \ defined(QUEST_INCLUDE_DEPRECATED_FUNCTIONS) || \ defined(QUEST_DISABLE_DEPRECATION_WARNINGS) @@ -84,6 +85,7 @@ #cmakedefine01 QUEST_COMPILE_CUDA #cmakedefine01 QUEST_COMPILE_CUQUANTUM #cmakedefine01 QUEST_COMPILE_HIP +#cmakedefine01 QUEST_COMPILE_CHECKPOINTING // crucial to QuEST source (informs optional NUMA usage) @@ -125,6 +127,7 @@ ! defined(QUEST_COMPILE_CUDA) || \ ! defined(QUEST_COMPILE_HIP) || \ ! defined(QUEST_COMPILE_CUQUANTUM) || \ + ! defined(QUEST_COMPILE_CHECKPOINTING) || \ ! defined(QUEST_ENABLE_NUMA) || \ ! defined(QUEST_INCLUDE_DEPRECATED_FUNCTIONS) || \ ! defined(QUEST_DISABLE_DEPRECATION_WARNINGS) @@ -152,6 +155,7 @@ ! (QUEST_COMPILE_CUDA == 0 || QUEST_COMPILE_CUDA == 1) || \ ! (QUEST_COMPILE_HIP == 0 || QUEST_COMPILE_HIP == 1) || \ ! (QUEST_COMPILE_CUQUANTUM == 0 || QUEST_COMPILE_CUQUANTUM == 1) || \ + ! (QUEST_COMPILE_CHECKPOINTING == 0 || QUEST_COMPILE_CHECKPOINTING == 1) || \ ! (QUEST_ENABLE_NUMA == 0 || QUEST_ENABLE_NUMA == 1) || \ ! (QUEST_INCLUDE_DEPRECATED_FUNCTIONS == 0 || QUEST_INCLUDE_DEPRECATED_FUNCTIONS == 1) || \ ! (QUEST_DISABLE_DEPRECATION_WARNINGS == 0 || QUEST_DISABLE_DEPRECATION_WARNINGS == 1) diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp index 10ffc44d6..2685d1494 100644 --- a/quest/src/api/environment.cpp +++ b/quest/src/api/environment.cpp @@ -5,6 +5,7 @@ * @author Tyson Jones */ +#include "quest/include/config.h" #include "quest/include/environment.h" #include "quest/include/precision.h" #include "quest/include/modes.h" @@ -206,7 +207,7 @@ void printPrecisionInfo() { // reports whether QuEST was compiled with Qureg checkpointing support (ADIOS2) static bool isCheckpointingCompiled() { -#ifdef ENABLE_CHECKPOINTING +#if QUEST_COMPILE_CHECKPOINTING return true; #else return false; diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index db5350a64..7d1eec3aa 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -5,6 +5,7 @@ * @author Tyson Jones */ +#include "quest/include/config.h" #include "quest/include/qureg.h" #include "quest/include/modes.h" #include "quest/include/environment.h" @@ -25,7 +26,7 @@ #include #include -#ifdef ENABLE_CHECKPOINTING +#if QUEST_COMPILE_CHECKPOINTING #include #endif @@ -579,7 +580,7 @@ vector> getDensityQuregAmps(Qureg qureg, qindex startRow, qindex s void saveQuregToFile(Qureg qureg, const char* fn) { validate_quregCheckpointingIsCompiled(__func__); -#ifdef ENABLE_CHECKPOINTING +#if QUEST_COMPILE_CHECKPOINTING validate_quregFields(qureg, __func__); // ensure the CPU amplitudes reflect any GPU-resident state before writing @@ -624,7 +625,7 @@ void saveQuregToFile(Qureg qureg, const char* fn) { Qureg createQuregFromFile(const char* fn) { validate_quregCheckpointingIsCompiled(__func__); -#ifdef ENABLE_CHECKPOINTING +#if QUEST_COMPILE_CHECKPOINTING adios2::ADIOS adios; adios2::IO io = adios.DeclareIO("QuESTQuregLoad"); adios2::Engine engine = io.Open(fn, adios2::Mode::Read); diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index fb7a6d583..c0e010bc0 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -7,6 +7,7 @@ * @author Kshitij Chhabra (patched v3 overflow bug) */ +#include "quest/include/config.h" #include "quest/include/modes.h" #include "quest/include/types.h" #include "quest/include/precision.h" @@ -2001,10 +2002,10 @@ void validate_quregCheckpointingIsCompiled(const char* caller) { if (!global_isValidationEnabled) return; - // this validation must fire regardless of ENABLE_CHECKPOINTING, so the user - // receives a clear error (rather than a linker error) when calling the + // this validation must fire regardless of QUEST_COMPILE_CHECKPOINTING, so the + // user receives a clear error (rather than a linker error) when calling the // checkpointing API in a build which did not compile it - #ifdef ENABLE_CHECKPOINTING + #if QUEST_COMPILE_CHECKPOINTING bool isCompiled = true; #else bool isCompiled = false; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7ddcafee8..4d5050e51 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,10 +7,6 @@ add_executable(tests target_link_libraries(tests PRIVATE QuEST::QuEST Catch2::Catch2) target_compile_features(tests PUBLIC cxx_std_20) -if (ENABLE_CHECKPOINTING) - target_compile_definitions(tests PRIVATE ENABLE_CHECKPOINTING=1) -endif() - if (QUEST_ENABLE_MPI AND QUEST_ENABLE_SUBCOMM) target_link_libraries(tests PRIVATE MPI::MPI_CXX) endif() diff --git a/tests/unit/checkpoint.cpp b/tests/unit/checkpoint.cpp index 8326e62e1..b11083e4c 100644 --- a/tests/unit/checkpoint.cpp +++ b/tests/unit/checkpoint.cpp @@ -12,7 +12,7 @@ #include "quest.h" -#ifdef ENABLE_CHECKPOINTING +#if QUEST_COMPILE_CHECKPOINTING #include @@ -85,4 +85,4 @@ TEST_CASE( "saveQuregToFile and createQuregFromFile", "[checkpoint]" ) { } } -#endif // ENABLE_CHECKPOINTING +#endif // QUEST_COMPILE_CHECKPOINTING From 4c4c3dddc9038e25a60259a7b96ddc5d5e096b20 Mon Sep 17 00:00:00 2001 From: Ashmit JaiSarita Gupta Date: Mon, 8 Jun 2026 07:38:23 +0530 Subject: [PATCH 05/10] fix: collective MPI checkpointing + CUDA-off build, rank-safe test cleanup --- CMakeLists.txt | 19 +++++++++++++++---- quest/src/api/qureg.cpp | 18 ++++++++++++++++-- tests/unit/checkpoint.cpp | 17 +++++++++++++++-- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2418ba1cb..0cf5b7bd5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -562,9 +562,13 @@ if (ENABLE_CHECKPOINTING) GIT_TAG v2.12.1 ) - # Forego MPI and CUDA if QuEST won't use - set(ADIOS2_USE_MPI ${QUEST_ENABLE_MPI} CACHE BOOL "" FORCE) - set(ADIOS2_USE_CUDA ${QUEST_ENABLE_CUDA} CACHE BOOL "" FORCE) + # Match ADIOS2's MPI to QuEST's so distributed runs write per-rank slices + # into one shared file. ADIOS2's CUDA support is deliberately left OFF: + # checkpointing copies amps to host memory (syncQuregFromGpu/syncQuregToGpu) + # before any I/O, so ADIOS2 never touches device pointers. Building it with + # CUDA is unnecessary and stalls the Windows CUDA CI job. + set(ADIOS2_USE_MPI ${QUEST_ENABLE_MPI} CACHE BOOL "" FORCE) + set(ADIOS2_USE_CUDA OFF CACHE BOOL "" FORCE) # Forego unused facilities set(ADIOS2_BUILD_TESTING OFF CACHE BOOL "" FORCE) @@ -589,7 +593,14 @@ if (ENABLE_CHECKPOINTING) find_package(adios2 REQUIRED) endif() - target_link_libraries(QuEST PRIVATE adios2::cxx) + # In distributed builds link ADIOS2's MPI-enabled C++ interface: it defines + # ADIOS2_USE_MPI, which exposes the adios2::ADIOS(MPI_Comm) constructor used in + # qureg.cpp for collective per-rank I/O. The serial target lacks it. + if (QUEST_ENABLE_MPI) + target_link_libraries(QuEST PRIVATE adios2::cxx_mpi) + else() + target_link_libraries(QuEST PRIVATE adios2::cxx) + endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) endif() diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index 7d1eec3aa..b82614708 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -28,6 +28,20 @@ #if QUEST_COMPILE_CHECKPOINTING #include +#if QUEST_COMPILE_MPI +#include +#endif +#endif + +// In distributed builds, ADIOS2 must be given QuEST's communicator so that each +// node's call collectively writes/reads its own slice of the shared file. Without +// it, ADIOS2 runs serially per rank and the per-node slices never form one file. +#if QUEST_COMPILE_CHECKPOINTING +#if QUEST_COMPILE_MPI +#define QUEST_MAKE_ADIOS() adios2::ADIOS(MPI_COMM_WORLD) +#else +#define QUEST_MAKE_ADIOS() adios2::ADIOS() +#endif #endif using std::string; @@ -586,7 +600,7 @@ void saveQuregToFile(Qureg qureg, const char* fn) { // ensure the CPU amplitudes reflect any GPU-resident state before writing syncQuregFromGpu(qureg); - adios2::ADIOS adios; + adios2::ADIOS adios = QUEST_MAKE_ADIOS(); adios2::IO io = adios.DeclareIO("QuESTQuregSave"); adios2::Engine engine = io.Open(fn, adios2::Mode::Write); @@ -626,7 +640,7 @@ Qureg createQuregFromFile(const char* fn) { validate_quregCheckpointingIsCompiled(__func__); #if QUEST_COMPILE_CHECKPOINTING - adios2::ADIOS adios; + adios2::ADIOS adios = QUEST_MAKE_ADIOS(); adios2::IO io = adios.DeclareIO("QuESTQuregLoad"); adios2::Engine engine = io.Open(fn, adios2::Mode::Read); diff --git a/tests/unit/checkpoint.cpp b/tests/unit/checkpoint.cpp index b11083e4c..561b0467d 100644 --- a/tests/unit/checkpoint.cpp +++ b/tests/unit/checkpoint.cpp @@ -60,7 +60,15 @@ TEST_CASE( "saveQuregToFile and createQuregFromFile", "[checkpoint]" ) { destroyQureg(q); destroyQureg(r); - std::filesystem::remove_all(SV_FILE); + + // In distributed runs every node opened the same shared file, so only one + // may delete it; a barrier first guarantees all nodes have finished + // reading, and a barrier after keeps the next section's collective write + // from racing a half-removed directory. + syncQuESTEnv(); + if (getQuESTEnv().rank == 0) + std::filesystem::remove_all(SV_FILE); + syncQuESTEnv(); } SECTION( "density-matrix round-trip preserves dimension and amplitudes" ) { @@ -81,7 +89,12 @@ TEST_CASE( "saveQuregToFile and createQuregFromFile", "[checkpoint]" ) { destroyQureg(q); destroyQureg(r); - std::filesystem::remove_all(DM_FILE); + + // see the statevector section: one node deletes, barriers bracket cleanup + syncQuESTEnv(); + if (getQuESTEnv().rank == 0) + std::filesystem::remove_all(DM_FILE); + syncQuESTEnv(); } } From 921d24d63c8987b1db5897d1caea4e6b2f50984b Mon Sep 17 00:00:00 2001 From: Ashmit JaiSarita Gupta Date: Tue, 9 Jun 2026 02:50:46 +0530 Subject: [PATCH 06/10] renamed option to QUEST_ENABLE_CHECKPOINTING, fixed extern C linkage and MPI adios2 fallback, restored test_free CI, broadened checkpoint tests across deployments --- .github/workflows/compile.yml | 3 +- .github/workflows/test_free.yml | 9 +-- CMakeLists.txt | 26 ++++--- docs/compile.md | 9 ++- quest/include/qureg.h | 2 +- quest/src/api/environment.cpp | 6 +- quest/src/api/qureg.cpp | 22 +++--- quest/src/core/validation.cpp | 2 +- tests/unit/checkpoint.cpp | 116 +++++++++++++++++++++----------- 9 files changed, 121 insertions(+), 74 deletions(-) diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index 23087911d..3c0b95420 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -47,6 +47,7 @@ jobs: ${{ matrix.cuda == 'ON' && 'CUDA' || '' }} ${{ matrix.hip == 'ON' && 'HIP' || '' }} ${{ matrix.cuquantum == 'ON' && 'CUQ' || '' }} + ${{ matrix.adios2 == 'ON' && 'CKPT' || '' }} runs-on: ${{ matrix.os }} @@ -250,7 +251,7 @@ jobs: -DQUEST_ENABLE_CUDA=${{ matrix.cuda }} -DQUEST_ENABLE_HIP=${{ matrix.hip }} -DQUEST_ENABLE_CUQUANTUM=${{ matrix.cuquantum }} - -DENABLE_CHECKPOINTING=${{ matrix.adios2 }} + -DQUEST_ENABLE_CHECKPOINTING=${{ matrix.adios2 }} -DCMAKE_CUDA_ARCHITECTURES=${{ env.cuda_arch }} -DCMAKE_HIP_ARCHITECTURES=${{ env.hip_arch }} -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} diff --git a/.github/workflows/test_free.yml b/.github/workflows/test_free.yml index 7d6ab8642..01311140a 100644 --- a/.github/workflows/test_free.yml +++ b/.github/workflows/test_free.yml @@ -41,7 +41,7 @@ jobs: # we will compile QuEST with all precisions but no parallelisation matrix: os: [ubuntu-latest, macos-latest, windows-latest] - version: [4] # [3, 4] + version: [3, 4] precision: [1, 2, 4] # MSVC cannot compile deprecated v3 tests @@ -68,7 +68,7 @@ jobs: -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }} -DQUEST_DISABLE_DEPRECATION_WARNINGS=${{ matrix.version == 3 && 'ON' || 'OFF' }} -DQUEST_FLOAT_PRECISION=${{ matrix.precision }} - -DENABLE_CHECKPOINTING=ON + -DQUEST_ENABLE_CHECKPOINTING=ON # force 'Release' build (needed by MSVC to enable optimisations) - name: Compile @@ -79,13 +79,10 @@ jobs: # TODO: # ctest currently doesn't know of our Catch2 tags, so we # are manually excluding each integration test by name - - # DEBUG: - # runining ONLY the checkpoint flags - name: Run v4 tests if: ${{ matrix.version == 4 }} - run: ctest -j2 --output-on-failure --schedule-random -C Release -E "density evolution" -R "saveQuregToFile" + run: ctest -j2 --output-on-failure --schedule-random -C Release -E "density evolution" working-directory: ${{ env.build_dir }} # run v3 unit tests in random order diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cf5b7bd5..d70ac04fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -543,17 +543,24 @@ endif() -### DRAFT BELOW - - # Checkpointing (ADIOS2) -option(ENABLE_CHECKPOINTING "Enable Qureg checkpointing (saveQuregToFile / createQuregFromFile) via ADIOS2. Turned OFF by default." OFF) -if (ENABLE_CHECKPOINTING) +option(QUEST_ENABLE_CHECKPOINTING "Enable Qureg checkpointing (saveQuregToFile / createQuregFromFile) via ADIOS2. Turned OFF by default." OFF) +message(STATUS "Checkpointing is turned ${QUEST_ENABLE_CHECKPOINTING}. Set QUEST_ENABLE_CHECKPOINTING to modify.") +if (QUEST_ENABLE_CHECKPOINTING) find_package(adios2 QUIET) - if(NOT adios2_FOUND) - message(STATUS "adios2 not found: fetching ADIOS2 via FetchContent") + # A distributed QuEST needs an MPI-enabled ADIOS2 (which provides the + # adios2::cxx_mpi target). A serial system install lacks it, so in that case we + # ignore the found package and fetch an MPI-enabled build instead of failing. + set(quest_use_found_adios2 ${adios2_FOUND}) + if (adios2_FOUND AND QUEST_ENABLE_MPI AND NOT TARGET adios2::cxx_mpi) + message(STATUS "Found ADIOS2 lacks MPI support (no adios2::cxx_mpi target); fetching an MPI-enabled build instead") + set(quest_use_found_adios2 FALSE) + endif() + + if(NOT quest_use_found_adios2) + message(STATUS "fetching ADIOS2 via FetchContent") include(FetchContent) FetchContent_Declare( @@ -589,7 +596,8 @@ if (ENABLE_CHECKPOINTING) FetchContent_MakeAvailable(adios2) else() - # force failure (see Oliver's Catch2 trick) + # re-run non-QUIET so configuration fails with a clear error if the package + # somehow became unavailable between the two calls find_package(adios2 REQUIRED) endif() @@ -616,7 +624,7 @@ set(QUEST_COMPILE_OMP ${QUEST_ENABLE_OMP}) set(QUEST_COMPILE_MPI ${QUEST_ENABLE_MPI}) set(QUEST_COMPILE_SUBCOMM ${QUEST_ENABLE_SUBCOMM}) set(QUEST_COMPILE_CUQUANTUM ${QUEST_ENABLE_CUQUANTUM}) -set(QUEST_COMPILE_CHECKPOINTING ${ENABLE_CHECKPOINTING}) +set(QUEST_COMPILE_CHECKPOINTING ${QUEST_ENABLE_CHECKPOINTING}) set(QUEST_INCLUDE_DEPRECATED_FUNCTIONS ${QUEST_ENABLE_DEPRECATED_API}) diff --git a/docs/compile.md b/docs/compile.md index 56157ce72..664ac56a0 100644 --- a/docs/compile.md +++ b/docs/compile.md @@ -696,14 +696,17 @@ Note that distributed executables are launched in a distinct way to the other de ------------------ + + + ## Checkpointing QuEST can optionally _checkpoint_ a `Qureg` to disk; writing its state to a file with `saveQuregToFile()`, to later be restored into a new `Qureg` with `createQuregFromFile()`. This is useful for long-running jobs which risk timeout or failure - an evolving `Qureg` can be periodically saved and resumed in a subsequent process. The file records only the `Qureg` dimension (the number of qubits, and whether it is a density matrix) and its amplitudes; never the incidental deployment configuration. A `Qureg` saved by one deployment (say, distributed over `8` nodes) can therefore be restored by any other (say, a single GPU-accelerated node). -Checkpointing is built upon [ADIOS2](https://github.com/ornladios/ADIOS2) and is _disabled_ by default. To enable it, install ADIOS2 and specify `ENABLE_CHECKPOINTING` at configuration: +Checkpointing is built upon [ADIOS2](https://github.com/ornladios/ADIOS2) and is _disabled_ by default. To enable it, install ADIOS2 and specify `QUEST_ENABLE_CHECKPOINTING` at configuration: ```bash # configure -cmake .. -D ENABLE_CHECKPOINTING=ON +cmake .. -D QUEST_ENABLE_CHECKPOINTING=ON # build cmake --build . --parallel @@ -712,7 +715,7 @@ cmake --build . --parallel > [!IMPORTANT] > ADIOS2 must be discoverable by CMake. If it was installed to a non-standard location (such as `~/.local`), pass its prefix via `CMAKE_PREFIX_PATH`: > ```bash -> cmake .. -D ENABLE_CHECKPOINTING=ON -D CMAKE_PREFIX_PATH=$HOME/.local +> cmake .. -D QUEST_ENABLE_CHECKPOINTING=ON -D CMAKE_PREFIX_PATH=$HOME/.local > ``` Calling `saveQuregToFile()` or `createQuregFromFile()` in a build _without_ checkpointing enabled throws a validation error. diff --git a/quest/include/qureg.h b/quest/include/qureg.h index 042bf5676..b0e33aa1d 100644 --- a/quest/include/qureg.h +++ b/quest/include/qureg.h @@ -493,7 +493,7 @@ void getDensityQuregAmps(qcomp** outAmps, Qureg qureg, qindex startRow, qindex s * @defgroup qureg_checkpoint Checkpointing * @brief Functions for saving a Qureg to file and restoring it later. * @details These functions are only available when QuEST is compiled with - * checkpointing support (CMake variable @c ENABLE_CHECKPOINTING=ON), + * checkpointing support (CMake variable @c QUEST_ENABLE_CHECKPOINTING=ON), * which additionally requires the ADIOS2 library. Calling them in a * build without checkpointing support throws a validation error. * @{ diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp index 2685d1494..700ece439 100644 --- a/quest/src/api/environment.cpp +++ b/quest/src/api/environment.cpp @@ -207,11 +207,7 @@ void printPrecisionInfo() { // reports whether QuEST was compiled with Qureg checkpointing support (ADIOS2) static bool isCheckpointingCompiled() { -#if QUEST_COMPILE_CHECKPOINTING - return true; -#else - return false; -#endif + return (bool) QUEST_COMPILE_CHECKPOINTING; } diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index b82614708..70be4fd62 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -33,15 +33,17 @@ #endif #endif +#if QUEST_COMPILE_CHECKPOINTING // In distributed builds, ADIOS2 must be given QuEST's communicator so that each // node's call collectively writes/reads its own slice of the shared file. Without // it, ADIOS2 runs serially per rank and the per-node slices never form one file. -#if QUEST_COMPILE_CHECKPOINTING +static adios2::ADIOS makeAdios() { #if QUEST_COMPILE_MPI -#define QUEST_MAKE_ADIOS() adios2::ADIOS(MPI_COMM_WORLD) + return adios2::ADIOS(MPI_COMM_WORLD); #else -#define QUEST_MAKE_ADIOS() adios2::ADIOS() + return adios2::ADIOS(); #endif +} #endif using std::string; @@ -585,13 +587,17 @@ vector> getDensityQuregAmps(Qureg qureg, qindex startRow, qindex s /* * CHECKPOINTING * - * which is compiled only when ENABLE_CHECKPOINTING=ON (requiring ADIOS2). + * which is compiled only when QUEST_ENABLE_CHECKPOINTING=ON (requiring ADIOS2). * The API functions are always defined so that the validation layer can throw * a clear error in non-checkpointing builds, rather than failing to link. + * + * These are defined with C linkage (matching their extern "C" declarations in + * qureg.h) so they remain callable from C consumers; the signatures pass no + * qcomp by value and so stay C-ABI-safe. */ -void saveQuregToFile(Qureg qureg, const char* fn) { +extern "C" void saveQuregToFile(Qureg qureg, const char* fn) { validate_quregCheckpointingIsCompiled(__func__); #if QUEST_COMPILE_CHECKPOINTING @@ -600,7 +606,7 @@ void saveQuregToFile(Qureg qureg, const char* fn) { // ensure the CPU amplitudes reflect any GPU-resident state before writing syncQuregFromGpu(qureg); - adios2::ADIOS adios = QUEST_MAKE_ADIOS(); + adios2::ADIOS adios = makeAdios(); adios2::IO io = adios.DeclareIO("QuESTQuregSave"); adios2::Engine engine = io.Open(fn, adios2::Mode::Write); @@ -636,11 +642,11 @@ void saveQuregToFile(Qureg qureg, const char* fn) { } -Qureg createQuregFromFile(const char* fn) { +extern "C" Qureg createQuregFromFile(const char* fn) { validate_quregCheckpointingIsCompiled(__func__); #if QUEST_COMPILE_CHECKPOINTING - adios2::ADIOS adios = QUEST_MAKE_ADIOS(); + adios2::ADIOS adios = makeAdios(); adios2::IO io = adios.DeclareIO("QuESTQuregLoad"); adios2::Engine engine = io.Open(fn, adios2::Mode::Read); diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index c0e010bc0..aa1d0b4ec 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -279,7 +279,7 @@ namespace report { "Expected a statevector Qureg but received a density matrix."; string QUREG_CHECKPOINTING_NOT_COMPILED = - "Qureg checkpointing (saveQuregToFile and createQuregFromFile) requires QuEST to be compiled with checkpointing support. Reconfigure with the CMake option -DENABLE_CHECKPOINTING=ON, which additionally requires the ADIOS2 library."; + "Qureg checkpointing (saveQuregToFile and createQuregFromFile) requires QuEST to be compiled with checkpointing support. Reconfigure with the CMake option -DQUEST_ENABLE_CHECKPOINTING=ON, which additionally requires the ADIOS2 library."; string QUREG_FILE_PRECISION_MISMATCH = "The checkpoint file was written with a qreal precision of ${FILE_BYTES} bytes, but this QuEST build uses ${EXEC_BYTES} bytes. A Qureg can only be restored by a QuEST build using the same floating-point precision (QUEST_FLOAT_PRECISION) as the build which saved it."; diff --git a/tests/unit/checkpoint.cpp b/tests/unit/checkpoint.cpp index 561b0467d..121cbd5fb 100644 --- a/tests/unit/checkpoint.cpp +++ b/tests/unit/checkpoint.cpp @@ -2,7 +2,7 @@ * Unit tests of Qureg checkpointing (saveQuregToFile / createQuregFromFile). * * These tests are only compiled when QuEST is built with the CMake option - * -DENABLE_CHECKPOINTING=ON (which additionally requires the ADIOS2 library). + * -DQUEST_ENABLE_CHECKPOINTING=ON (which additionally requires the ADIOS2 library). * * @author Ashmit JaiSarita Gupta * @@ -16,12 +16,24 @@ #include +#include "tests/utils/macros.hpp" +#include "tests/utils/cache.hpp" + #include #include #include #include #include + + +/* + * file constants and helpers + */ + +#define TEST_CATEGORY \ + LABEL_UNIT_TAG "[checkpoint]" + namespace { const char* SV_FILE = "test_checkpoint_statevector.bp"; @@ -42,60 +54,84 @@ namespace { m = std::max(m, std::abs(getDensityQuregAmp(a, r, c) - getDensityQuregAmp(b, r, c))); return m; } + + // distributed-safe cleanup: a barrier guarantees every node has finished + // reading the shared file, only rank 0 deletes it (concurrent removal races), + // and a second barrier stops the next write racing a half-removed directory. + void removeCheckpointFile(const char* fn) { + syncQuESTEnv(); + if (getQuESTEnv().rank == 0) + std::filesystem::remove_all(fn); + syncQuESTEnv(); + } } -TEST_CASE( "saveQuregToFile and createQuregFromFile", "[checkpoint]" ) { - SECTION( "statevector round-trip preserves dimension and amplitudes" ) { - Qureg q = createQureg(6); - initRandomPureState(q); +/** TESTS + * + * @ingroup unitcheckpoint + * @{ + */ - saveQuregToFile(q, SV_FILE); - Qureg r = createQuregFromFile(SV_FILE); +TEST_CASE( "saveQuregToFile and createQuregFromFile", TEST_CATEGORY ) { - CHECK( r.numQubits == q.numQubits ); - CHECK( r.isDensityMatrix == q.isDensityMatrix ); - CHECK( maxStatevectorAmpDiff(q, r) < 1e-12 ); + SECTION( LABEL_CORRECTNESS ) { - destroyQureg(q); - destroyQureg(r); + // iterate the cached Quregs so the save path is exercised under every + // deployment combination (serial, OMP, MPI, GPU and their mixtures); + // each restored Qureg chooses its own deployment independently + SECTION( LABEL_STATEVEC ) { - // In distributed runs every node opened the same shared file, so only one - // may delete it; a barrier first guarantees all nodes have finished - // reading, and a barrier after keeps the next section's collective write - // from racing a half-removed directory. - syncQuESTEnv(); - if (getQuESTEnv().rank == 0) - std::filesystem::remove_all(SV_FILE); - syncQuESTEnv(); - } + for (auto& [label, q] : getCachedStatevecs()) { + DYNAMIC_SECTION( label ) { - SECTION( "density-matrix round-trip preserves dimension and amplitudes" ) { + initRandomPureState(q); - Qureg q = createDensityQureg(4); - initZeroState(q); - for (int t = 0; t < q.numQubits; t++) - applyHadamard(q, t); - applyT(q, 0); - applyControlledPauliX(q, 0, 1); + saveQuregToFile(q, SV_FILE); + Qureg r = createQuregFromFile(SV_FILE); - saveQuregToFile(q, DM_FILE); - Qureg r = createQuregFromFile(DM_FILE); + CHECK( r.numQubits == q.numQubits ); + CHECK( r.isDensityMatrix == q.isDensityMatrix ); + CHECK( maxStatevectorAmpDiff(q, r) < 1e-12 ); - CHECK( r.numQubits == q.numQubits ); - CHECK( r.isDensityMatrix == q.isDensityMatrix ); - CHECK( maxDensityMatrixAmpDiff(q, r) < 1e-12 ); + destroyQureg(r); + removeCheckpointFile(SV_FILE); + } + } + } - destroyQureg(q); - destroyQureg(r); + SECTION( LABEL_DENSMATR ) { - // see the statevector section: one node deletes, barriers bracket cleanup - syncQuESTEnv(); - if (getQuESTEnv().rank == 0) - std::filesystem::remove_all(DM_FILE); - syncQuESTEnv(); + for (auto& [label, q] : getCachedDensmatrs()) { + DYNAMIC_SECTION( label ) { + + initRandomPureState(q); // works even for density matrices + + saveQuregToFile(q, DM_FILE); + Qureg r = createQuregFromFile(DM_FILE); + + CHECK( r.numQubits == q.numQubits ); + CHECK( r.isDensityMatrix == q.isDensityMatrix ); + CHECK( maxDensityMatrixAmpDiff(q, r) < 1e-12 ); + + destroyQureg(r); + removeCheckpointFile(DM_FILE); + } + } + } + } + + SECTION( LABEL_VALIDATION ) { + + // The only checkpointing-specific validation - calling the API when QuEST + // was compiled without checkpointing - is unreachable here, since this + // file only compiles under QUEST_COMPILE_CHECKPOINTING. ADIOS2's own + // runtime errors (e.g. a missing file) are not QuEST validation errors. + SUCCEED( ); } } +/** @} (end defgroup) */ + #endif // QUEST_COMPILE_CHECKPOINTING From 24096866d9b0bda88d7c7cbd3e9aa86b25a9a21f Mon Sep 17 00:00:00 2001 From: Ashmit JaiSarita Gupta Date: Tue, 9 Jun 2026 12:50:37 +0530 Subject: [PATCH 07/10] fix: disable ADIOS2 streaming engines to stop Linux CI OOM The FetchContent ADIOS2 build was OOM-killed (exit 143) on the Linux CI runners while compiling the EVPath/atl/ffs/dill/enet third-party stack pulled in by the DataMan/SSC/MHS/SST network engines. Checkpointing only uses the local BP5 file engine, so disable all streaming/staging engines (plus MGARD and Blosc2). Verified the slimmed ADIOS2 still builds and the [checkpoint] round-trip passes serially and under MPI. --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d70ac04fd..b99161fc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -585,8 +585,14 @@ if (QUEST_ENABLE_CHECKPOINTING) set(ADIOS2_USE_HDF5 OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_ZeroMQ OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_SST OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_DataMan OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SSC OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_MHS OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_DAOS OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_MGARD OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_BZip2 OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_Blosc OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Blosc2 OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_SZ OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_ZFP OFF CACHE BOOL "" FORCE) set(ADIOS2_USE_PNG OFF CACHE BOOL "" FORCE) From be3de773df662248a9197bf8b729ff97d5e3157c Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Wed, 10 Jun 2026 23:55:48 -0400 Subject: [PATCH 08/10] Free space before ADIOS2 installation --- .github/workflows/compile.yml | 4 ++-- .github/workflows/test_free.yml | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index 3c0b95420..82e23b17e 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -159,9 +159,9 @@ jobs: # perform the job steps: - # free space for big-chungus ROCm compiler + # free space for big-chungus ROCm compiler, and ADIOS2 installation - name: Free disk space - if: ${{ matrix.hip == 'ON' }} + if: ${{ matrix.hip == 'ON' || matrix.adios2 == 'ON' }} uses: jlumbroso/free-disk-space@main with: tool-cache: false diff --git a/.github/workflows/test_free.yml b/.github/workflows/test_free.yml index 01311140a..edb5c1fc5 100644 --- a/.github/workflows/test_free.yml +++ b/.github/workflows/test_free.yml @@ -59,6 +59,12 @@ jobs: - name: Get QuEST uses: actions/checkout@main + # free space for big-chungus ADIOS2 installation + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + # compile serial unit tests, optionally include deprecated test - name: Configure CMake run: > From b849ba411b4531b17d3ab4485f32b7ef35e8e3c8 Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Thu, 11 Jun 2026 00:00:34 -0400 Subject: [PATCH 09/10] restrict pre-ADIOS2 memory free to linux since that was the only OS seeing the timeout anyhow --- .github/workflows/compile.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index 82e23b17e..f320a0cf8 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -159,9 +159,9 @@ jobs: # perform the job steps: - # free space for big-chungus ROCm compiler, and ADIOS2 installation + # free space for big-chungus ROCm compiler, and ADIOS2 installation (only times out on Linux) - name: Free disk space - if: ${{ matrix.hip == 'ON' || matrix.adios2 == 'ON' }} + if: ${{ (matrix.hip == 'ON' || matrix.adios2 == 'ON') && matrix.os == 'ubuntu-latest' }} uses: jlumbroso/free-disk-space@main with: tool-cache: false From 122e35e633eda5c615003f9c07210ee194b9076a Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Thu, 11 Jun 2026 00:10:50 -0400 Subject: [PATCH 10/10] Force serial compilation to shrink memory to attemptedly avoid ADIOS2 OOM --- .github/workflows/compile.yml | 9 +++++---- .github/workflows/test_free.yml | 10 ++-------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index f320a0cf8..75a0111cb 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -159,9 +159,9 @@ jobs: # perform the job steps: - # free space for big-chungus ROCm compiler, and ADIOS2 installation (only times out on Linux) + # free space for big-chungus ROCm compiler - name: Free disk space - if: ${{ (matrix.hip == 'ON' || matrix.adios2 == 'ON') && matrix.os == 'ubuntu-latest' }} + if: ${{ matrix.hip == 'ON' }} uses: jlumbroso/free-disk-space@main with: tool-cache: false @@ -257,9 +257,10 @@ jobs: -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_CXX_FLAGS=${{ matrix.mpi == 'ON' && matrix.cuda == 'ON' && '-fno-lto' || '' }} - # force 'Release' build (needed by MSVC to enable optimisations) + # force 'Release' build (needed by MSVC to enable optimisations), + # temporarily forcing serial compilation to avoid ADIOS2 OOM error - name: Compile - run: cmake --build ${{ env.build_dir }} --config Release --parallel + run: cmake --build ${{ env.build_dir }} --config Release --parallel 1 # run all compiled isolated examples to test for link-time errors, # continuing if any fail (since some deliberately fail) diff --git a/.github/workflows/test_free.yml b/.github/workflows/test_free.yml index edb5c1fc5..0f12cae6b 100644 --- a/.github/workflows/test_free.yml +++ b/.github/workflows/test_free.yml @@ -59,12 +59,6 @@ jobs: - name: Get QuEST uses: actions/checkout@main - # free space for big-chungus ADIOS2 installation - - name: Free disk space - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - # compile serial unit tests, optionally include deprecated test - name: Configure CMake run: > @@ -76,9 +70,9 @@ jobs: -DQUEST_FLOAT_PRECISION=${{ matrix.precision }} -DQUEST_ENABLE_CHECKPOINTING=ON - # force 'Release' build (needed by MSVC to enable optimisations) + # force 'Release' build (needed by MSVC to enable optimisations), and force serial (to avoid ADIOS2 OOM) - name: Compile - run: cmake --build ${{ env.build_dir }} --config Release --parallel + run: cmake --build ${{ env.build_dir }} --config Release --parallel 1 # run v4 unit tests in random order, excluding the integration tests, # using the default environment variables (e.g. test all permutations)