From 51a110a080b30878d0f1f13e4bb2b8d25129af54 Mon Sep 17 00:00:00 2001 From: Jens Keiner Date: Thu, 25 Sep 2025 16:01:36 +0200 Subject: [PATCH] Add multi-threaded benchmarks. --- .github/workflows/build-linux.yml | 4 +-- benchmarks/Makefile.am | 21 +++++++++-- benchmarks/bench_nfft_direct.cpp | 60 ++++++++++++++++++++++++------- benchmarks/util.h | 2 +- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index c573f5bd..0dc24acb 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -19,7 +19,7 @@ jobs: compiler: ["gcc"] window: ["kaiserbessel", "gaussian", "bspline", "sinc"] # TODO: Add dirac. precision_opt: ["", "--enable-float", "--enable-long-double"] - openmp: [0, 1] + openmp: [1] build_octave: [0] # TODO: Re-activate Octave build and tests. build_julia: [1] include: @@ -132,7 +132,7 @@ jobs: if: steps.cache-codspeed.outputs.cache-hit != 'true' run: | cd codspeed-cpp/google_benchmark - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DCODSPEED_MODE=instrumentation -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=OFF. + cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DCODSPEED_MODE=instrumentation -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=OFF . - name: Build CodSpeed integration library if: steps.cache-codspeed.outputs.cache-hit != 'true' diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index e0a748ce..2e144864 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -7,21 +7,38 @@ bench_nfft_direct_SOURCES = bench_nfft_direct.cpp util.h bench_nfft_direct_CXXFLAGS = @nfft_benchmarks_CXXFLAGS@ @CXXFLAGS@ bench_nfft_direct_LDFLAGS = @nfft_benchmarks_LDFLAGS@ @fftw3_LDFLAGS@ @LDFLAGS@ bench_nfft_direct_LDADD = @nfft_benchmarks_LIBS@ $(top_builddir)/libnfft3@PREC_SUFFIX@.la @fftw3_LIBS@ @LIBS@ +if ENABLE_OPENMP +NFFT_BENCHMARKS_DIRECT_OMP = bench_nfft_direct_omp +bench_nfft_direct_omp_SOURCES = bench_nfft_direct.cpp util.h +bench_nfft_direct_omp_CXXFLAGS = @nfft_benchmarks_CXXFLAGS@ $(OPENMP_CFLAGS) @CXXFLAGS@ +bench_nfft_direct_omp_LDFLAGS = @nfft_benchmarks_LDFLAGS@ @fftw3_LDFLAGS@ $(OPENMP_CFLAGS) @LDFLAGS@ +bench_nfft_direct_omp_LDADD = @nfft_benchmarks_LIBS@ $(top_builddir)/libnfft3@PREC_SUFFIX@_omp.la @fftw3_LIBS_omp@ @fftw3_LIBS@ $(OPENMP_LIBS) +endif else NFFT_BENCHMARKS_DIRECT = + NFFT_BENCHMARKS_DIRECT_OMP = endif else NFFT_BENCHMARKS_DIRECT = + NFFT_BENCHMARKS_DIRECT_OMP = endif # Aggregate all benchmarks NFFT_BENCHMARKS = $(NFFT_BENCHMARKS_DIRECT) -noinst_PROGRAMS = $(NFFT_BENCHMARKS) +if ENABLE_OPENMP +NFFT_BENCHMARKS_OMP = $(NFFT_BENCHMARKS_DIRECT_OMP) +endif + +noinst_PROGRAMS = $(NFFT_BENCHMARKS) $(NFFT_BENCHMARKS_OMP) # Run all built benchmarks -bench: $(NFFT_BENCHMARKS) +bench: $(NFFT_BENCHMARKS) $(NFFT_BENCHMARKS_OMP) for bench in $(NFFT_BENCHMARKS); do \ echo "Running $$bench..."; \ ./$$bench; \ done + for bench in $(NFFT_BENCHMARKS_OMP); do \ + echo "Running $$bench..."; \ + ./$$bench; \ + done diff --git a/benchmarks/bench_nfft_direct.cpp b/benchmarks/bench_nfft_direct.cpp index f78e8b88..a29f7ceb 100644 --- a/benchmarks/bench_nfft_direct.cpp +++ b/benchmarks/bench_nfft_direct.cpp @@ -27,8 +27,30 @@ #include "util.h" +#ifdef _OPENMP + #define SUFFIX "_omp" +#else + #define SUFFIX "" +#endif + +static void DoSetup(const benchmark::State& state) { + #ifdef _OPENMP + #ifdef HAVE_FFTW_THREADS + FFTW(init_threads)(); + #endif + #endif +} + +static void DoTeardown(const benchmark::State& state) { + #ifdef _OPENMP + #ifdef HAVE_FFTW_THREADS + FFTW(cleanup_threads)(); + #endif + #endif +} + // Helper function to initialize random data -static void NFFT(init_random_data)(NFFT(plan)* plan) { +static void init_random_data(NFFT(plan)* plan) { NFFT(vrand_shifted_unit_double)(plan->x, plan->d * plan->M_total); NFFT(vrand_unit_complex)(plan->f_hat, plan->N_total); NFFT(vrand_unit_complex)(plan->f, plan->M_total); @@ -41,7 +63,7 @@ static void nfft_forward_direct_1d(benchmark::State& state) { NFFT(plan) plan; NFFT(init_1d)(&plan, N, M); - NFFT(init_random_data)(&plan); + init_random_data(&plan); for (auto _ : state) { NFFT(trafo_direct)(&plan); @@ -58,7 +80,7 @@ static void nfft_adjoint_direct_1d(benchmark::State& state) { NFFT(plan) plan; NFFT(init_1d)(&plan, N, M); - NFFT(init_random_data)(&plan); + init_random_data(&plan); for (auto _ : state) { NFFT(adjoint_direct)(&plan); @@ -76,7 +98,7 @@ static void nfft_forward_direct_2d(benchmark::State& state) { NFFT(plan) plan; NFFT(init_2d)(&plan, N1, N2, M); - NFFT(init_random_data)(&plan); + init_random_data(&plan); for (auto _ : state) { NFFT(trafo_direct)(&plan); @@ -94,7 +116,7 @@ static void nfft_adjoint_direct_2d(benchmark::State& state) { NFFT(plan) plan; NFFT(init_2d)(&plan, N1, N2, M); - NFFT(init_random_data)(&plan); + init_random_data(&plan); for (auto _ : state) { NFFT(adjoint_direct)(&plan); @@ -113,7 +135,7 @@ static void nfft_forward_direct_3d(benchmark::State& state) { NFFT(plan) plan; NFFT(init_3d)(&plan, N1, N2, N3, M); - NFFT(init_random_data)(&plan); + init_random_data(&plan); for (auto _ : state) { NFFT(trafo_direct)(&plan); @@ -132,7 +154,7 @@ static void nfft_adjoint_direct_3d(benchmark::State& state) { NFFT(plan) plan; NFFT(init_3d)(&plan, N1, N2, N3, M); - NFFT(init_random_data)(&plan); + init_random_data(&plan); for (auto _ : state) { NFFT(adjoint_direct)(&plan); @@ -143,44 +165,56 @@ static void nfft_adjoint_direct_3d(benchmark::State& state) { } // Register benchmarks for direct transforms -BENCH(nfft_forward_direct_1d) +BENCH(nfft_forward_direct_1d, SUFFIX) ->Args({32, 100}) ->Args({64, 200}) ->Args({128, 400}) ->Args({256, 800}) ->Args({512, 1600}) + ->Setup(DoSetup) + ->Teardown(DoTeardown) ->Complexity(); -BENCH(nfft_adjoint_direct_1d) +BENCH(nfft_adjoint_direct_1d, SUFFIX) ->Args({32, 100}) ->Args({64, 200}) ->Args({128, 400}) ->Args({256, 800}) ->Args({512, 1600}) + ->Setup(DoSetup) + ->Teardown(DoTeardown) ->Complexity(); -BENCH(nfft_forward_direct_2d) +BENCH(nfft_forward_direct_2d, SUFFIX) ->Args({16, 16, 500}) ->Args({32, 32, 1000}) ->Args({64, 64, 2000}) + ->Setup(DoSetup) + ->Teardown(DoTeardown) ->Complexity(); -BENCH(nfft_adjoint_direct_2d) +BENCH(nfft_adjoint_direct_2d, SUFFIX) ->Args({16, 16, 500}) ->Args({32, 32, 1000}) ->Args({64, 64, 2000}) + ->Setup(DoSetup) + ->Teardown(DoTeardown) ->Complexity(); -BENCH(nfft_forward_direct_3d) +BENCH(nfft_forward_direct_3d, SUFFIX) ->Args({4, 4, 4, 250}) ->Args({8, 8, 8, 500}) ->Args({16, 16, 16, 1000}) + ->Setup(DoSetup) + ->Teardown(DoTeardown) ->Complexity(); -BENCH(nfft_adjoint_direct_3d) +BENCH(nfft_adjoint_direct_3d, SUFFIX) ->Args({4, 4, 4, 250}) ->Args({8, 8, 8, 500}) ->Args({16, 16, 16, 1000}) + ->Setup(DoSetup) + ->Teardown(DoTeardown) ->Complexity(); // Main function. diff --git a/benchmarks/util.h b/benchmarks/util.h index 8aaf73db..ad0eb26f 100644 --- a/benchmarks/util.h +++ b/benchmarks/util.h @@ -22,6 +22,6 @@ #include "config.h" // Macro to register benchmark with optional prefix -#define BENCH(function) BENCHMARK(function)->Name("benchmarks/" __FILE__ "::" BENCHMARKS_PREFIX #function) +#define BENCH(function, suffix) BENCHMARK(function)->Name("benchmarks/" __FILE__ "::" BENCHMARKS_PREFIX #function suffix) #endif // NFFT_BENCHMARKS_UTIL_H \ No newline at end of file