Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion quest/src/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,14 @@ target_sources(QuEST
PRIVATE
cpu_config.cpp
cpu_subroutines.cpp
)
)

# issue #598: optionally use compensated (Kahan) summation in applyCompMatr's
# dense-matrix CPU subroutine. Off by default (naive summation); enable with
# -DQUEST_COMPENSATE_DENSEMATR_SUM=ON
# to trade ~2-7x runtime for ~25x better accuracy on many-target matrices.
option(QUEST_COMPENSATE_DENSEMATR_SUM "Use compensated (Kahan) summation in applyCompMatr's CPU subroutine (issue #598)" OFF)
if (QUEST_COMPENSATE_DENSEMATR_SUM)
target_compile_definitions(QuEST PRIVATE QUEST_COMPENSATED_DENSEMATR_SUM=1)
message(STATUS "Compensated (Kahan) dense-matrix summation is turned ON.")
endif()
38 changes: 25 additions & 13 deletions quest/src/cpu/cpu_subroutines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,14 @@ void cpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, ConstList64 ctrls, Co

// i = nth local index where ctrls are active and targs form value k
qindex i = setBits(i0, targs.data(), numTargBits, k); // loop may be unrolled
amps[i] = getCpuQcomp(0, 0);


// accumulate the row-vector inner-product (matr[k] . cache) in a
// thread-private scalar, writing to memory once at the end
cpu_qcomp sum = getCpuQcomp(0, 0);
#if defined(QUEST_COMPENSATED_DENSEMATR_SUM)
cpu_qcomp comp = getCpuQcomp(0, 0); // running Kahan compensation
#endif

// loop may be unrolled
for (qindex j=0; j<numTargAmps; j++) {

Expand All @@ -624,18 +630,24 @@ void cpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, ConstList64 ctrls, Co
if constexpr (ApplyConj)
elem = conj(elem);

amps[i] += elem * cache[j];

/// @todo
/// qureg.cpuAmps[i] is being serially updated by only this thread,
/// so is a candidate for Kahan summation for improved numerical
/// stability. Explore whether this is time-free and worthwhile!
///
/// BEWARE that Kahan summation may be incompatible with
/// the commutator tricks used in base_qcomp's (ancestor
/// of cpu_qcomp) arithmetic operator overloads. Check
/// base_qcomp.hpp before implementing compensation.
cpu_qcomp term = elem * cache[j];

// this serial reduction over a dynamic (up to ~2^16) number of
// terms is liable to catastrophic cancellation; optionally
// compensate (#598). base_qcomp's operators are plain IEEE
// arithmetic (no associativity tricks), so the compensation
// is honoured here; enable via -DQUEST_COMPENSATE_DENSEMATR_SUM.
#if defined(QUEST_COMPENSATED_DENSEMATR_SUM)
cpu_qcomp y = term - comp;
cpu_qcomp t = sum + y;
comp = (t - sum) - y;
sum = t;
#else
sum += term;
#endif
}

amps[i] = sum;
}
}
}
Expand Down