Skip to content

Commit 1b73af9

Browse files
committed
Remove external lib-loading to avoid glibc errors
1 parent fce6045 commit 1b73af9

1 file changed

Lines changed: 13 additions & 73 deletions

File tree

prodtests/full-system-test/gen_single_gpu_rtc_benchmark.sh

Lines changed: 13 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Generate or run a single-GPU reconstruction benchmark workflow using dpl-workflow.sh.
33
#
44
# Main benchmark mode:
5-
# BENCHMARK_RUN=1 FILEWORKDIR=/path/to/raw_tf_dir ./gen_single_gpu_rtc_benchmark.sh
5+
# RUN_BENCHMARK=1 GPUTYPE=HIP FILEWORKDIR=/path/to/raw_tf_dir ./gen_single_gpu_rtc_benchmark.sh
66

77
set -euo pipefail
88

@@ -35,7 +35,7 @@ esac
3535

3636
export DPL_REPORT_PROCESSING="${DPL_REPORT_PROCESSING:-1}"
3737
export WORKFLOW_PARAMETERS="${WORKFLOW_PARAMETERS:-GPU,CTF}"
38-
export NGPUS=1
38+
export NGPUS="${NGPUS:-1}"
3939
export O2_GPU_DOUBLE_PIPELINE="${O2_GPU_DOUBLE_PIPELINE:-1}"
4040
export O2_GPU_RTC="${O2_GPU_RTC:-1}"
4141
export SYNCMODE="${SYNCMODE:-1}"
@@ -57,6 +57,7 @@ export RUN_BENCHMARK="${RUN_BENCHMARK:-0}"
5757
: "${BENCH_TAG:=${BENCH_TAG:-$(hostname -s)}}"
5858
BENCH_STAMP="$(date +%Y%m%d_%H%M%S)"
5959
: "${OUTDIR:=${BENCHMARK_OUTDIR:-$PWD/single_gpu_rtc_bench_${BENCH_TAG}_${BENCH_STAMP}}}"
60+
6061
mkdir -p "$OUTDIR"
6162
RUNDIR="$OUTDIR/run"
6263
mkdir -p "$RUNDIR"
@@ -79,73 +80,6 @@ if [[ "${BENCH_DISABLE_INPUT_COPY:-1}" == "1" ]]; then
7980
unset INPUT_FILE_COPY_CMD || true
8081
fi
8182

82-
# ----------------------------------------------------------------------------------------------------------------------
83-
# Library path fixes for common EPN/dev-node issues.
84-
85-
: "${BENCH_USE_SYSTEM_FONT_LIBS:=1}"
86-
: "${BENCH_AUTO_ROCM_LIBS:=0}"
87-
88-
prepend_ld_path() {
89-
local dir="$1"
90-
[[ -d "$dir" ]] || return 0
91-
case ":${LD_LIBRARY_PATH:-}:" in
92-
*":$dir:"*) ;;
93-
*) export LD_LIBRARY_PATH="$dir:${LD_LIBRARY_PATH:-}" ;;
94-
esac
95-
}
96-
97-
if [[ "0$BENCH_USE_SYSTEM_FONT_LIBS" == "01" ]]; then
98-
prepend_ld_path /usr/lib64
99-
prepend_ld_path /lib64
100-
fi
101-
102-
# ROCm library injection is only useful for HIP runs. Keep it off by default for CUDA/NVIDIA containers,
103-
# because mixed AMD/NVIDIA hosts can otherwise leak ROCm libraries into LD_LIBRARY_PATH.
104-
if [[ "${GPUTYPE:-}" == "HIP" && $BENCH_AUTO_ROCM_LIBS == 1 ]]; then
105-
if [[ -n "${ROCM_PATH:-}" ]]; then
106-
prepend_ld_path "$ROCM_PATH/lib64"
107-
prepend_ld_path "$ROCM_PATH/lib"
108-
fi
109-
for d in /opt/rocm/lib /opt/rocm/lib64 /usr/lib64/rocm /usr/lib/rocm/lib; do
110-
prepend_ld_path "$d"
111-
done
112-
fi
113-
114-
if [[ -n "${BENCH_EXTRA_LD_LIBRARY_PATH:-}" ]]; then
115-
export LD_LIBRARY_PATH="$BENCH_EXTRA_LD_LIBRARY_PATH:${LD_LIBRARY_PATH:-}"
116-
fi
117-
118-
# Check CUDA runtime/device visibility before starting the full workflow.
119-
if [[ "$GPUTYPE" == "CUDA" ]]; then
120-
if ! command -v nvidia-smi >/dev/null 2>&1; then
121-
echo "WARNING: GPUTYPE=CUDA but nvidia-smi is not in PATH." >&2
122-
echo "If this is an Apptainer/Singularity container, run it with --nv." >&2
123-
else
124-
nvidia-smi -L >/dev/null 2>&1 || {
125-
echo "FATAL: GPUTYPE=CUDA but nvidia-smi cannot see an NVIDIA GPU." >&2
126-
echo "If this is an Apptainer/Singularity container, run it with --nv." >&2
127-
exit 1
128-
}
129-
fi
130-
131-
if ! ldconfig -p 2>/dev/null | grep -q 'libcuda.so.1' && \
132-
! find ${LD_LIBRARY_PATH//:/ } -maxdepth 1 -name 'libcuda.so.1*' 2>/dev/null | grep -q .; then
133-
echo "WARNING: GPUTYPE=CUDA but libcuda.so.1 is not visible via ldconfig or LD_LIBRARY_PATH." >&2
134-
echo "This usually means the container was not started with --nv, or the host NVIDIA driver is not mounted." >&2
135-
fi
136-
fi
137-
138-
# Check HIP runtime visibility before starting the full workflow.
139-
if [[ "$GPUTYPE" == "HIP" ]]; then
140-
if ! ldconfig -p 2>/dev/null | grep -q 'libamdhip64.so.6' && \
141-
! find ${LD_LIBRARY_PATH//:/ } -maxdepth 1 -name 'libamdhip64.so.6*' 2>/dev/null | grep -q .; then
142-
echo "FATAL: GPUTYPE=HIP but libamdhip64.so.6 is not visible." >&2
143-
echo "Current LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}" >&2
144-
echo "Set ROCM_PATH=/opt/rocm or BENCH_EXTRA_LD_LIBRARY_PATH=/path/to/rocm/lib" >&2
145-
exit 1
146-
fi
147-
fi
148-
14983
# A single-GPU benchmark must not enter EPN sync mode, because the workflow intentionally sets NGPUS=4 there.
15084
if [[ "${EPNSYNCMODE:-0}" == "1" ]]; then
15185
echo "FATAL: EPNSYNCMODE=1 is incompatible with the single-GPU RTC benchmark." >&2
@@ -165,22 +99,28 @@ echo "# NGPUS=$NGPUS GPUTYPE=$GPUTYPE"
16599
echo "# O2_GPU_DOUBLE_PIPELINE=$O2_GPU_DOUBLE_PIPELINE O2_GPU_RTC=$O2_GPU_RTC"
166100
echo "# NTIMEFRAMES=$NTIMEFRAMES TFLOOP=$TFLOOP"
167101
echo "# FILEWORKDIR=${FILEWORKDIR:-} INPUT_FILE_LIST=${INPUT_FILE_LIST:-}"
168-
echo "# LD_LIBRARY_PATH font-lib workaround: BENCH_USE_SYSTEM_FONT_LIBS=$BENCH_USE_SYSTEM_FONT_LIBS"
169-
echo "# ROCm library auto-detect: BENCH_AUTO_ROCM_LIBS=$BENCH_AUTO_ROCM_LIBS (active only when GPUTYPE=HIP)"
102+
echo "# LD_LIBRARY_PATH is not modified by this script"
170103
echo
171104

105+
# ----------------------------------------------------------------------------------------------------------------------
106+
# Generate workflow with the caller-provided environment.
107+
172108
export WORKFLOWMODE="print"
173109
cmdfile="$OUTDIR/workflow_${BENCH_TAG}_${BENCH_STAMP}.sh"
110+
174111
echo "# Generating workflow only; command file: $cmdfile"
112+
175113
(
176114
cd "$RUNDIR"
177115
"$O2_DPL_WORKFLOW"
178116
) > "$cmdfile"
179117

180118
if [[ "$RUN_BENCHMARK" == "1" ]]; then
181-
export WORKFLOWMODE="${WORKFLOWMODE:-run}"
119+
export WORKFLOWMODE="run"
120+
182121
log="$OUTDIR/reco_${BENCH_TAG}_${BENCH_STAMP}.log"
183122
env | sort > "$OUTDIR/env_${BENCH_TAG}_${BENCH_STAMP}.txt"
123+
184124
echo "# Running benchmark; log: $log"
185125

186126
set +e
@@ -214,4 +154,4 @@ if [[ "$RUN_BENCHMARK" == "1" ]]; then
214154
fi
215155

216156
exit "$status"
217-
fi
157+
fi

0 commit comments

Comments
 (0)