Skip to content

Commit 92ba4d4

Browse files
committed
Handling crashes and exit codes
1 parent 207d153 commit 92ba4d4

1 file changed

Lines changed: 23 additions & 0 deletions

File tree

prodtests/full-system-test/analyze_gpu_benchmarks.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@
2828
r"(?P<timeslice>\d+)"
2929
)
3030

31+
WORKFLOW_CRASH_RE = re.compile(
32+
r"\[ERROR\]\s*Workflow crashed\b.*?\bcode was set to\s*(?P<code>\d+)"
33+
)
34+
35+
def find_workflow_crash_error_code(logfile: Path):
36+
error_codes = []
37+
38+
with logfile.open("r", errors="replace") as f:
39+
for line in f:
40+
match = WORKFLOW_CRASH_RE.search(line)
41+
if match:
42+
error_codes.append(int(match.group("code")))
43+
44+
return error_codes
45+
3146

3247
def parse_hms_to_seconds(hms: str) -> float:
3348
hhmmss, *frac = hms.split(".")
@@ -472,6 +487,13 @@ def main():
472487
durations_by_timeslice, starts_by_timeslice, ends_by_timeslice = read_timeslice_durations(
473488
args.logfile
474489
)
490+
491+
workflow_crash_error_codes = find_workflow_crash_error_code(args.logfile)
492+
if workflow_crash_error_codes:
493+
print(
494+
f"{RED}{BOLD}Workflow crash error code(s) detected:{RESET} "
495+
f"{RED}{workflow_crash_error_codes}{RESET}"
496+
)
475497

476498
excluded_timeslices, processing_sequences, wall_time_mean = analyze_processing_sequences(
477499
starts_by_timeslice,
@@ -643,6 +665,7 @@ def save_summary_output(output_file: Path, lines):
643665

644666
summary_lines = [
645667
f"Input file: {args.logfile}",
668+
f"Workflow crash error codes: {workflow_crash_error_codes if workflow_crash_error_codes else 'none'}",
646669
f"Complete timeslices found: {n_total}",
647670
f"Timeslices used after dropping first/last two: {n_used}",
648671
f"First used timeslice: {trimmed_timeslices[0]}",

0 commit comments

Comments
 (0)