From 588b653c4024bf2cfc7911ca37df83e5adfd29bb Mon Sep 17 00:00:00 2001 From: tzh476 Date: Thu, 11 Jun 2026 20:23:13 +0800 Subject: [PATCH] test: add bmi2 bitwise benchmark baseline Change-Id: Iaa22a3db798903f6852a166ab4a039fd040b959b --- examples/automated/CMakeLists.txt | 6 + examples/automated/benchmark_bmi2_bitwise.cpp | 113 ++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 examples/automated/benchmark_bmi2_bitwise.cpp diff --git a/examples/automated/CMakeLists.txt b/examples/automated/CMakeLists.txt index 5880c2ac0..2fbf257cb 100644 --- a/examples/automated/CMakeLists.txt +++ b/examples/automated/CMakeLists.txt @@ -1,3 +1,9 @@ # @author Tyson Jones add_all_local_examples() + +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-mbmi2" QUEST_COMPILER_SUPPORTS_MBMI2) +if (QUEST_COMPILER_SUPPORTS_MBMI2) + target_compile_options(benchmark_bmi2_bitwise_cpp PRIVATE -mbmi2) +endif() diff --git a/examples/automated/benchmark_bmi2_bitwise.cpp b/examples/automated/benchmark_bmi2_bitwise.cpp new file mode 100644 index 000000000..396f7e2fb --- /dev/null +++ b/examples/automated/benchmark_bmi2_bitwise.cpp @@ -0,0 +1,113 @@ +/** @file + * Quick benchmark for BMI2-assisted bit-index helpers. + * + * @author tzh476 + */ + +#include "quest/src/core/bitwise.hpp" + +#include +#include +#include +#include +#include +#include +#include + +static volatile qindex sinkValue = 0; + +template +qindex makeMask(const std::array& indices, qindex pattern) { + qindex mask = 0; + for (size_t i=0; i> i) & 1) + mask |= QINDEX_ONE << indices[i]; + return mask; +} + +template +double benchGet(const std::string& name, const std::array& indices, const std::vector& inputs, qindex ampMask) { + constexpr qindex numIterations = 5000000; + constexpr int numReps = 5; + + size_t inputMask = inputs.size() - 1; + double best = std::numeric_limits::max(); + + for (int r=0; r(0x13579BDF); + auto start = std::chrono::steady_clock::now(); + + for (qindex i=0; i(i) & inputMask] + acc) & ampMask; + acc ^= getValueOfBits(n, indices.data(), static_cast(N)) + (i & 7); + } + + auto end = std::chrono::steady_clock::now(); + sinkValue ^= acc; + + double nsPerCall = std::chrono::duration(end - start).count() / static_cast(numIterations); + best = std::min(best, nsPerCall); + } + + std::cout << std::left << std::setw(30) << name << " " << std::fixed << std::setprecision(3) << best << " ns/call\n"; + return best; +} + +template +double benchInsert(const std::string& name, const std::array& indices, const std::vector& inputs, qindex valueMask, qindex insertedMask) { + constexpr qindex numIterations = 5000000; + constexpr int numReps = 5; + + size_t inputMask = inputs.size() - 1; + double best = std::numeric_limits::max(); + + for (int r=0; r(0x2468ACE0); + auto start = std::chrono::steady_clock::now(); + + for (qindex i=0; i(i) & inputMask] + acc) & valueMask; + acc ^= insertBitsWithMaskedValues(n, indices.data(), static_cast(N), insertedMask) + (i & 15); + } + + auto end = std::chrono::steady_clock::now(); + sinkValue ^= acc; + + double nsPerCall = std::chrono::duration(end - start).count() / static_cast(numIterations); + best = std::min(best, nsPerCall); + } + + std::cout << std::left << std::setw(30) << name << " " << std::fixed << std::setprecision(3) << best << " ns/call\n"; + return best; +} + +int main() { +#if defined(QUEST_USE_BMI2_INTRINSICS) + std::cout << "BMI2 intrinsics: enabled\n"; +#else + std::cout << "BMI2 intrinsics: disabled\n"; +#endif + + std::vector inputs(1 << 15); + qindex state = static_cast(0x123456789ABCDEFULL); + for (qindex& input : inputs) { + state = state * static_cast(0x5851F42D4C957F2DULL) + static_cast(0x14057B7EF767814FULL); + input = state; + } + + qindex nineQubitMask = (QINDEX_ONE << 9) - QINDEX_ONE; + const std::array inds2 = {2, 7}; + const std::array inds5 = {0, 2, 4, 6, 8}; + const std::array inds6 = {0, 1, 3, 5, 7, 8}; + + benchGet("getValueOfBits 2 bits", inds2, inputs, nineQubitMask); + benchGet("getValueOfBits 5 bits", inds5, inputs, nineQubitMask); + benchGet("getValueOfBits 6 bits", inds6, inputs, nineQubitMask); + + benchInsert("insertBitsWithMask 2 bits", inds2, inputs, (QINDEX_ONE << 7) - QINDEX_ONE, makeMask(inds2, 0b01)); + benchInsert("insertBitsWithMask 5 bits", inds5, inputs, (QINDEX_ONE << 4) - QINDEX_ONE, makeMask(inds5, 0b10101)); + benchInsert("insertBitsWithMask 6 bits", inds6, inputs, (QINDEX_ONE << 3) - QINDEX_ONE, makeMask(inds6, 0b101011)); + + std::cout << "sink: " << sinkValue << "\n"; + return 0; +}