99
1010jobs :
1111 benchmark :
12- runs-on : ${{ matrix.runner }}
12+ runs-on : cern-nextgen-mi300x
1313 container : registry.cern.ch/alisw/slc9-gpu-builder@sha256:ea3443f9dfbc770e4b4bce0d1a9ecc0b7a7c16e9f76e416b796d170877220820
14- strategy :
15- fail-fast : false
16- matrix :
17- name : [cpu, nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
18- include :
19- - name : cpu
20- runner : cern-nextgen-mi300x
21- cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=0
22- profiler_runs : 42
23- standalone_runs : 42
24- cpu_gpu : " -c"
25- - name : nvidia-h100
26- runner : cern-nextgen-h100
27- cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
28- profiler_runs : 21
29- standalone_runs : 42
30- cpu_gpu : " -g --memSize 20000000000"
31- - name : nvidia-l40s
32- runner : cern-nextgen-l40s
33- cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
34- profiler_runs : 42
35- standalone_runs : 42
36- cpu_gpu : " -g --memSize 20000000000"
37- - name : amd-mi300x
38- runner : cern-nextgen-mi300x
39- cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
40- profiler_runs : 42
41- standalone_runs : 42
42- cpu_gpu : " -g --memSize 20000000000"
43- - name : amd-w7900
44- runner : cern-nextgen-w7900
45- cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
46- profiler_runs : 42
47- standalone_runs : 42
48- cpu_gpu : " -g --memSize 20000000000"
4914 env :
5015 WORK_DIR : /cvmfs/alice.cern.ch
5116 ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
5217 MODULEPATH : /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
5318 STANDALONE_DIR : /root/standalone
54- BENCHMARK_CSV : standalone_${{ matrix.name }} .csv
55- PROFILER_CSV : profiler_${{ matrix.name }} .csv
56- TIMING_CA : ./ca -e 50kHz ${{ matrix.cpu_gpu }} --seed 0 --sync --runsInit 0 --PROCresetTimers 1 --PROCdebugMarkdown 1 --debug 1 # Add --runs 42 for benchmark runs
19+ BENCHMARK_CSV : standalone_cpu .csv
20+ PROFILER_CSV : profiler_cpu .csv
21+ TIMING_CA : ./ca -e 50kHz -c --seed 0 --sync --runsInit 0 --PROCresetTimers 1 --PROCdebugMarkdown 1 --debug 1 # Add --runs 42 for benchmark runs
5722 LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
5823
59- name : ${{ matrix.name }}
24+ name : cpu
6025 steps :
6126 - name : Checkout Repository
6227 uses : actions/checkout@v6
@@ -80,84 +45,27 @@ jobs:
8045 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
8146
8247 mkdir -p ${STANDALONE_DIR}
83- cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
48+ cmake -B ${STANDALONE_DIR}/build -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
8449 cmake --build ${STANDALONE_DIR}/build --target install -j 8
85- env :
86- DETERMINISTIC_MODE : GPU
8750
8851 - name : Test Track Reconstruction
8952 run : |
9053 source /etc/profile.d/modules.sh
9154 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
9255 cd ${STANDALONE_DIR}
93- ${STANDALONE_DIR}/ca -e o2-simple ${{ matrix.cpu_gpu }} --seed 0 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
56+ ${STANDALONE_DIR}/ca -e o2-simple -c --seed 0 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
9457 cmp ${STANDALONE_DIR}/*.out
95- rm -rf ${STANDALONE_DIR}/*.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
96-
97- - name : Build Non-Deterministic
98- run : *build
99- env :
100- DETERMINISTIC_MODE : OFF
10158
10259 - name : Benchmark Track Reconstruction
10360 run : |
10461 source /etc/profile.d/modules.sh
10562 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
10663 cd ${STANDALONE_DIR}
107- ${TIMING_CA} --runs ${{ matrix.standalone_runs }} --PROCdebugCSV /root/${BENCHMARK_CSV}
64+ ${TIMING_CA} --runs 42 --PROCdebugCSV /root/${BENCHMARK_CSV}
10865 python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_standalone.py --discard 0 --input /root/${BENCHMARK_CSV} --output /root/summary_${BENCHMARK_CSV}
109-
110- - name : Profiler - Nsight Compute
111- if : ${{ matrix.name == 'nvidia-h100' }}
112- run : |
113- dnf install -y cuda-nsight-compute-13-1
114- source /etc/profile.d/modules.sh
115- module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
116- cd ${STANDALONE_DIR}
117- ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs ${{ matrix.profiler_runs }} # Generates ${{ matrix.name }}.ncu-rep
118- ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
119- rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
120- python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
121-
122- - name : Profiler - Nsight Systems
123- if : ${{ matrix.name == 'nvidia-l40s' }}
124- run : |
125- dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/"
126- dnf install --nogpgcheck -y nsight-systems-cli-2026.2.1
127- source /etc/profile.d/modules.sh
128- module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
129- cd ${STANDALONE_DIR}
130- nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.profiler_runs }} # Generates ${{ matrix.name }}.nsys-rep
131- nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
132- rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
133- python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
134-
135- - name : Profiler - rocprofv2
136- if : ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
137- run : |
138- source /etc/profile.d/modules.sh
139- module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
140- cd ${STANDALONE_DIR}
141- rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.standalone_runs }} # Generates results_${{ matrix.name }}.csv
142- rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
143- mv /root/results_${{ matrix.name }}.csv /root/${PROFILER_CSV}
144- python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
14566
14667 - name : Upload Artifact
14768 uses : actions/upload-artifact@v6
14869 with :
149- name : ${{ matrix.name }} -artifact
70+ name : cpu -artifact
15071 path : " /root/*.csv"
151-
152- - name : Display table on GitHub web
153- run : |
154- source /etc/profile.d/modules.sh
155- module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
156- mkdir -p ${STANDALONE_DIR}/baseline
157- curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/summary_${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/summary_${PROFILER_CSV}
158- curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/summary_${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/summary_${BENCHMARK_CSV}
159- python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --runs ${{ matrix.profiler_runs }} --baseline ${STANDALONE_DIR}/baseline/summary_${PROFILER_CSV} --current /root/summary_${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
160- echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
161- python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --runs ${{ matrix.standalone_runs }} --baseline ${STANDALONE_DIR}/baseline/summary_${BENCHMARK_CSV} --current /root/summary_${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
162- rm -rf ${STANDALONE_DIR}/baseline
163- if : ${{ matrix.name != 'cpu' }}
0 commit comments