Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"GPRAT_WITH_CUDA": "OFF"
"GPRAT_WITH_CUDA": "OFF",
"GPRAT_APEX_STEPS": "OFF",
"GPRAT_APEX_CHOLESKY": "OFF"
}
},
{
Expand All @@ -169,23 +171,29 @@
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"GPRAT_WITH_CUDA": "OFF"
"GPRAT_WITH_CUDA": "OFF",
"GPRAT_APEX_STEPS": "OFF",
"GPRAT_APEX_CHOLESKY": "OFF"
}
},
{
"name": "dev-linux-gpu",
"binaryDir": "${sourceDir}/build/dev-linux-gpu",
"inherits": ["dev-linux"],
"cacheVariables": {
"GPRAT_WITH_CUDA": "ON"
"GPRAT_WITH_CUDA": "ON",
"GPRAT_APEX_STEPS": "OFF",
"GPRAT_APEX_CHOLESKY": "OFF"
}
},
{
"name": "release-linux-gpu",
"binaryDir": "${sourceDir}/build/release-linux-gpu",
"inherits": ["release-linux"],
"cacheVariables": {
"GPRAT_WITH_CUDA": "ON"
"GPRAT_WITH_CUDA": "ON",
"GPRAT_APEX_STEPS": "OFF",
"GPRAT_APEX_CHOLESKY": "OFF"
}
}
],
Expand Down
20 changes: 11 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ code.

## Dependencies

GPRat depends on [HPX](https://hpx-docs.stellar-group.org/latest/html/index.html) for asynchronous task-based parallelization.
GPRat depends on [HPX](https://hpx-docs.stellar-group.org/latest/html/index.html) for asynchronous task-based parallelization.
Furthermore, for CPU-only BLAS computation GPRat requires [OpenBLAS](http://www.openmathlib.org/OpenBLAS/) or [MKL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html).
A [CUDA](https://developer.nvidia.com/cuda-toolkit) installation is required for GPU-only BLAS computations.

Expand Down Expand Up @@ -40,14 +40,16 @@ The configurations suffixed with `-gpu` build the library with CUDA.
GPRat can be build with or without Python bindings.
The following options can be set to include / exclude parts of the project:

| Option name | Description | Default value |
|-----------------------------|------------------------------------------------|-----------------|
| GPRAT_BUILD_CORE | Enable/Disable building of the core library | ON |
| GPRAT_BUILD_BINDINGS | Enable/Disable building of the Python bindings | ON |
| GPRAT_ENABLE_FORMAT_TARGETS | Enable/Disable code formatting helper targets | ON if top-level |
| GPRAT_ENABLE_EXAMPLES | Enable/Disable example projects | ON if top-level |
| GPRAT_USE_MKL | Enable/Disable usage of MKL library | OFF |
| GPRAT_WITH_CUDA | Enable/disable compilation with CUDA support | OFF |
| Option name | Description | Default value |
|--------------------------------|--------------------------------------------------------------------------------------|-----------------|
| GPRAT_BUILD_CORE | Enable/Disable building of the core library | ON |
| GPRAT_BUILD_BINDINGS | Enable/Disable building of the Python bindings | ON |
| GPRAT_ENABLE_FORMAT_TARGETS | Enable/Disable code formatting helper targets | ON if top-level |
| GPRAT_ENABLE_EXAMPLES | Enable/Disable example projects | ON if top-level |
| GPRAT_USE_MKL | Enable/Disable usage of MKL library | OFF |
| GPRAT_WITH_CUDA | Enable/disable compilation with CUDA support | OFF |
| GPRAT_APEX_STEPS | Enable/disable compilation for steps duration measurement with APEX | OFF |
| GPRAT_APEX_CHOLESKY | Enable/disable compilation for measuring cholesky assembly and computation with APEX | OFF |

Respective scripts can be found in this directory.

Expand Down
18 changes: 18 additions & 0 deletions bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ option(GPRAT_WITH_CUDA "Enable GPU support with CUDA, cuSolver, cuBLAS" OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_WITH_CUDA=$<BOOL:${GPRAT_WITH_CUDA}>)

# Option for steps duration measurement with APEX
option(GPRAT_APEX_STEPS "Enable measuring duration of steps with APEX" OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_APEX_STEPS=$<BOOL:${GPRAT_APEX_STEPS}>)

# Option for measuring duration of assembly of covariance matrix and right
# looking cholesky in the cholesky function using APEX. Technically, the
# cholesky function also returns a large matrix but this adds significant
# overhead to the whole function call and does not provide much useful
# information on the optimized assembly and right looking cholesky steps which
# are used by other GP operations.
option(
GPRAT_APEX_CHOLESKY
"Enable measuring duration of cholesky assembly and computation with APEX"
OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_APEX_CHOLESKY=$<BOOL:${GPRAT_APEX_CHOLESKY}>)

file(GLOB SOURCE_FILES CONFIGURE_DEPENDS "*.cpp")
file(GLOB HEADER_FILES CONFIGURE_DEPENDS "*.hpp")

Expand Down
18 changes: 17 additions & 1 deletion compile_gprat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ else
USE_MKL=OFF
fi

# Select APEX profiling option
if [[ "$4" == "steps" ]]; then
GPRAT_APEX_STEPS=ON
GPRAT_APEX_CHOLESKY=OFF
elif [[ "$4" == "cholesky" ]]; then
GPRAT_APEX_STEPS=OFF
GPRAT_APEX_CHOLESKY=ON
else
GPRAT_APEX_STEPS=OFF
GPRAT_APEX_CHOLESKY=OFF
fi

if command -v spack &> /dev/null; then
echo "Spack command found, checking for environments..."

Expand Down Expand Up @@ -101,7 +113,9 @@ if [[ $PRESET == "release-linux" || $PRESET == "dev-linux" ]]; then
-DHPX_IGNORE_BOOST_COMPATIBILITY=ON \
-DHPX_DIR=$HPX_CMAKE \
-DGPRAT_ENABLE_FORMAT_TARGETS=OFF \
-DGPRAT_ENABLE_MKL=$USE_MKL
-DGPRAT_ENABLE_MKL=$USE_MKL \
-DGPRAT_APEX_STEPS=${GPRAT_APEX_STEPS} \
-DGPRAT_APEX_CHOLESKY=${GPRAT_APEX_CHOLESKY}
elif [[ $PRESET == "release-linux-gpu" || $PRESET == "dev-linux-gpu" ]]; then
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | awk -F '.' '{print $1$2}')

Expand All @@ -111,6 +125,8 @@ elif [[ $PRESET == "release-linux-gpu" || $PRESET == "dev-linux-gpu" ]]; then
-DHPX_IGNORE_BOOST_COMPATIBILITY=ON \
-DGPRAT_ENABLE_FORMAT_TARGETS=OFF \
-DGPRAT_ENABLE_MKL=$USE_MKL \
-DGPRAT_APEX_STEPS=${GPRAT_APEX_STEPS} \
-DGPRAT_APEX_CHOLESKY=${GPRAT_APEX_CHOLESKY} \
-DCMAKE_C_COMPILER=$(which clang) \
-DCMAKE_CXX_COMPILER=$(which clang++) \
-DCMAKE_CUDA_COMPILER=$(which clang++) \
Expand Down
18 changes: 18 additions & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ option(GPRAT_WITH_CUDA "Enable GPU support with CUDA, cuSolver, cuBLAS" OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_WITH_CUDA=$<BOOL:${GPRAT_WITH_CUDA}>)

# Option for steps duration measurement with APEX
option(GPRAT_APEX_STEPS "Enable measuring duration of steps with APEX" OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_APEX_STEPS=$<BOOL:${GPRAT_APEX_STEPS}>)

# Option for measuring duration of assembly of covariance matrix and right
# looking cholesky in the cholesky function using APEX. Technically, the
# cholesky function also returns a large matrix but this adds significant
# overhead to the whole function call and does not provide much useful
# information on the optimized assembly and right looking cholesky steps which
# are used by other GP operations.
option(
GPRAT_APEX_CHOLESKY
"Enable measuring duration of cholesky assembly and computation with APEX"
OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_APEX_CHOLESKY=$<BOOL:${GPRAT_APEX_CHOLESKY}>)

set(SOURCE_FILES
src/gprat_c.cpp
src/utils_c.cpp
Expand Down
56 changes: 56 additions & 0 deletions core/include/apex_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef APEX_STEPS_H
#define APEX_STEPS_H

#include <apex_api.hpp>
#include <hpx/future.hpp>

/// @brief Alias for obtaining the current high-resolution time point.
inline auto now = std::chrono::high_resolution_clock::now;

/// @brief Computes the duration in nanoseconds between the current time and a given start time.
inline double diff(const std::chrono::high_resolution_clock::time_point &start_time)
{
return static_cast<double>(std::chrono::duration_cast<std::chrono::nanoseconds>(now() - start_time).count());
}

/**
* @brief Initializes a new apex timer newTimer for the current scope.
*
* @param newTimer Identifier of the new timer variable to be declared
*/
#define GPRAT_START_TIMER(newTimer) auto newTimer = now()

/**
* @brief Blocks execution until all provided HPX futures are ready and samples the duration of APEX timer oldTimer with
* label oldLabel.
*
* @param oldTimer Identifier of the existing timer variable to be sampled
* @param oldLabel String label associated with the measured duration
* @param ... Variadic arguments representing HPX futures to wait on
*/
#define GPRAT_STOP_TIMER(oldTimer, oldLabel, ...) \
hpx::wait_all(__VA_ARGS__); \
apex::sample_value(oldLabel, diff(oldTimer))

// Macros GPRAT_START_STEP and GPRAT_END_STEP are conditionally defined based on the value of GPRAT_APEX_STEPS. They are
// identical to GPRAT_START_TIMER and GPRAT_STOP_TIMER when GPRAT_APEX_STEPS=ON, otherwise they are defined as empty.
#if GPRAT_APEX_STEPS

/// @see GPRAT_START_TIMER
#define GPRAT_START_STEP(newTimer) GPRAT_START_TIMER(newTimer)

/// @see GPRAT_STOP_TIMER
#define GPRAT_END_STEP(oldTimer, oldLabel, ...) GPRAT_STOP_TIMER(oldTimer, oldLabel, __VA_ARGS__)

#else

// Empty macro definitions when GPRAT_APEX_STEPS=OFF
#define GPRAT_START_STEP(...)
#define GPRAT_END_STEP(...)

#endif // GPRAT_APEX_STEPS

// NOTE: We could also create similar macros, e.g. for GPRAT_APEX_CHOLESKY. However, since GPRAT_APEX_CHOLESKY is only
// used in once, this would unnecessarily bloat this header file.

#endif // APEX_STEPS_H
Loading