From 6df183dead48907ca65ff2dbdd0ed8463d13b4a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Sat, 8 Nov 2025 12:42:10 +0100 Subject: [PATCH 01/16] new attempt at pinning npm install inside container build --- containers/Dockerfile.smee-client | 22 +++++++---- containers/package-lock.json | 62 +++++++++++++++++++++++++++++++ containers/package.json | 9 +++++ 3 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 containers/package-lock.json create mode 100644 containers/package.json diff --git a/containers/Dockerfile.smee-client b/containers/Dockerfile.smee-client index 28c5d21a..7b226467 100644 --- a/containers/Dockerfile.smee-client +++ b/containers/Dockerfile.smee-client @@ -1,12 +1,18 @@ -ARG smee_client_version=4.4.1 -# ARG smee_client_version_commit=b837fa85fd05853731160e21356ffd30c8c3e791 # v4.4.1 - -# pinning base image to specific hash (corresponding to lts-alpine) +# pin base image to specific hash (corresponding to lts-alpine) FROM node@sha256:f36fed0b2129a8492535e2853c64fbdbd2d29dc1219ee3217023ca48aebd3787 -ARG smee_client_version -# ARG smee_client_version_commit -# Then install -RUN npm install --global smee-client@${smee_client_version} +# create app dir for locked installation +WORKDIR /app + +# copy lockfile and manifest +COPY containers/package.json containers/package-lock.json ./ + +# install exactly what's in the lockfile (change version in package.json and update +# lockfile via 'npm install --package-lock-only') +RUN npm ci --omit=dev + +# expose CLI by symlinking +RUN ln -sf /app/node_modules/.bin/smee /usr/local/bin/smee + ENTRYPOINT ["smee"] CMD ["--help"] diff --git a/containers/package-lock.json b/containers/package-lock.json new file mode 100644 index 00000000..22aa5a14 --- /dev/null +++ b/containers/package-lock.json @@ -0,0 +1,62 @@ +{ + "name": "smee-wrapper", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "smee-wrapper", + "version": "1.0.0", + "license": "GPL-2.0-only", + "dependencies": { + "smee-client": "4.4.1" + } + }, + "node_modules/eventsource": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-4.0.0.tgz", + "integrity": "sha512-fvIkb9qZzdMxgZrEQDyll+9oJsyaVvY92I2Re+qK0qEJ+w5s0X3dtz+M0VAPOjP1gtU3iqWyjQ0G3nvd5CLZ2g==", + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/smee-client": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/smee-client/-/smee-client-4.4.1.tgz", + "integrity": "sha512-o2px88YVTmKWpaF3sg0Qpuk5MEVpM7BrImjNy/PYf3ENiD51dnPWtXnYHAM6h5/+06ug7z7BBu3werlIEwVkdw==", + "license": "ISC", + "dependencies": { + "eventsource": "^4.0.0", + "undici": "^7.0.0" + }, + "bin": { + "smee": "bin/smee.js" + }, + "engines": { + "node": "^20.18 || >= 22" + } + }, + "node_modules/undici": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.16.0.tgz", + "integrity": "sha512-QEg3HPMll0o3t2ourKwOeUAZ159Kn9mx5pnzHRQO8+Wixmh88YdZRiIwat0iNzNNXn0yoEtXJqFpyW7eM8BV7g==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + } + } +} diff --git a/containers/package.json b/containers/package.json new file mode 100644 index 00000000..8b33566e --- /dev/null +++ b/containers/package.json @@ -0,0 +1,9 @@ +{ + "name": "smee-wrapper", + "private": true, + "version": "1.0.0", + "license": "GPL-2.0-only", + "dependencies": { + "smee-client": "4.4.1" + } +} From 97d66258252f6457b156590d3df2af5a8a89da3d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 2 Dec 2025 21:58:55 +0100 Subject: [PATCH 02/16] use requests library instead of 'curl' and obtain all comments --- tasks/build.py | 298 +++++++++++++++++++++++++------------------------ 1 file changed, 154 insertions(+), 144 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 165ab544..f0c8aa19 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -24,6 +24,7 @@ import json import os import re +import requests import shutil import string import sys @@ -1207,154 +1208,163 @@ def request_bot_build_issue_comments(repo_name, pr_number): # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page # argument at the moment the for loop is for a max of 400 comments could bump this up - for x in range(1, 5): - curl_cmd = f'curl -L https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments?per_page=100&page={x}' - curl_output, curl_error, curl_exit_code = run_cmd(curl_cmd, "fetch all comments") - - comments = json.loads(curl_output) - - for comment in comments: - # iterate through the comments to find the one where the status of the build was in - submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] - accelerator_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR] - instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] - instance_repo_re = template_to_regex(instance_repo_fmt) - comment_body = comment['body'].split('\n') - instance_repo_match = re.match(instance_repo_re, comment_body[0]) - # Check if this body starts with an initial comment from the bot (first item is always the instance + repo - # it is building for) - # Then, check that it has at least 4 lines so that we can safely index up to that number - if instance_repo_match and len(comment_body) >= 4: - # Set some defaults - repo_id = "" - on_arch = "" - for_arch = "" - date = "" - status = "" - url = "" - result = "" - - log(f"{fn}(): found bot build response in issue, processing...") - - # First, extract the repo_id - log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") - repo_id = instance_repo_match.group('repo_id') - - # Then, try to match the architecture we build on. - # First try this including accelerator, to see if one was defined - on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] - on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) - on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) - on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) + + url = f'https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments' + all_comments = [] + + try: + while url: + response = requests.get(url, params={'per_page': 100}) + respone.raise_for_status() + + all_comments.extend(response.json()) + # get next URL from Link header in response (we are done if that is empty) + url = response.links.get('next', {}).get('url') + + except Exception as err: + log(f"{fn}(): obtaining comments for PR {pr_number} in repo {repo_name!r} failed: {err}") + return status_table + + for comment in all_comments: + # iterate through the comments to find the one where the status of the build was in + submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] + accelerator_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR] + instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] + instance_repo_re = template_to_regex(instance_repo_fmt) + comment_body = comment['body'].split('\n') + instance_repo_match = re.match(instance_repo_re, comment_body[0]) + # Check if this body starts with an initial comment from the bot (first item is always the instance + repo + # it is building for) + # Then, check that it has at least 4 lines so that we can safely index up to that number + if instance_repo_match and len(comment_body) >= 4: + # Set some defaults + repo_id = "" + on_arch = "" + for_arch = "" + date = "" + status = "" + url = "" + result = "" + + log(f"{fn}(): found bot build response in issue, processing...") + + # First, extract the repo_id + log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") + repo_id = instance_repo_match.group('repo_id') + + # Then, try to match the architecture we build on. + # First try this including accelerator, to see if one was defined + on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] + on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) + on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) + on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) + if on_arch_match: + # Pattern with accelerator matched, append to status_table + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " + f"with accelerator {on_arch_match.group('accelerator')}") + on_arch = f"`{on_arch_match.group('on_arch')}`, `{on_arch_match.group('accelerator')}`" + else: + # Pattern with accelerator did not match, retry without accelerator + on_arch_re = template_to_regex(on_arch_fmt) + on_arch_match = re.match(on_arch_re, comment_body[1]) if on_arch_match: - # Pattern with accelerator matched, append to status_table - log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " - f"with accelerator {on_arch_match.group('accelerator')}") - on_arch = f"`{on_arch_match.group('on_arch')}`, `{on_arch_match.group('accelerator')}`" + # Pattern without accelerator matched, append to status_table + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") + on_arch = f"`{on_arch_match.group('on_arch')}`" else: - # Pattern with accelerator did not match, retry without accelerator - on_arch_re = template_to_regex(on_arch_fmt) - on_arch_match = re.match(on_arch_re, comment_body[1]) - if on_arch_match: - # Pattern without accelerator matched, append to status_table - log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") - on_arch = f"`{on_arch_match.group('on_arch')}`" - else: - # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' - msg = "Could not match regular expression for extracting the architecture to build on.\n" - msg += "String to be matched:\n" - msg += f"{comment_body[1]}\n" - msg += "First regex attempted:\n" - msg += f"{on_arch_re_with_accel.pattern}\n" - msg += "Second regex attempted:\n" - msg += f"{on_arch_re.pattern}\n" - raise ValueError(msg) - - # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator - for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] - for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) - for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) - for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build on.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[1]}\n" + msg += "First regex attempted:\n" + msg += f"{on_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{on_arch_re.pattern}\n" + raise ValueError(msg) + + # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator + for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] + for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) + for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) + for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) + if for_arch_match: + # Pattern with accelerator matched, append to status_table + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " + f"with accelerator {for_arch_match.group('accelerator')}") + for_arch = f"`{for_arch_match.group('for_arch')}`, `{for_arch_match.group('accelerator')}`" + else: + # Pattern with accelerator did not match, retry without accelerator + for_arch_re = template_to_regex(for_arch_fmt) + for_arch_match = re.match(for_arch_re, comment_body[2]) if for_arch_match: - # Pattern with accelerator matched, append to status_table - log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " - f"with accelerator {for_arch_match.group('accelerator')}") - for_arch = f"`{for_arch_match.group('for_arch')}`, `{for_arch_match.group('accelerator')}`" + # Pattern without accelerator matched, append to status_table + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") + for_arch = f"`{for_arch_match.group('for_arch')}`" else: - # Pattern with accelerator did not match, retry without accelerator - for_arch_re = template_to_regex(for_arch_fmt) - for_arch_match = re.match(for_arch_re, comment_body[2]) - if for_arch_match: - # Pattern without accelerator matched, append to status_table - log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") - for_arch = f"`{for_arch_match.group('for_arch')}`" - else: - # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' - msg = "Could not match regular expression for extracting the architecture to build for.\n" - msg += "String to be matched:\n" - msg += f"{comment_body[2]}\n" - msg += "First regex attempted:\n" - msg += f"{for_arch_re_with_accel.pattern}\n" - msg += "Second regex attempted:\n" - msg += f"{for_arch_re.pattern}\n" - raise ValueError(msg) - - # get date, status, url and result from the markdown table - comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] - - # Convert markdown table to a dictionary - lines = comment_table.split('\n') - rows = [] - keys = [] - for i, row in enumerate(lines): - values = {} - if i == 0: - for key in row.split('|'): - keys.append(key.strip()) - elif i == 1: - continue + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build for.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[2]}\n" + msg += "First regex attempted:\n" + msg += f"{for_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{for_arch_re.pattern}\n" + raise ValueError(msg) + + # get date, status, url and result from the markdown table + comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] + + # Convert markdown table to a dictionary + lines = comment_table.split('\n') + rows = [] + keys = [] + for i, row in enumerate(lines): + values = {} + if i == 0: + for key in row.split('|'): + keys.append(key.strip()) + elif i == 1: + continue + else: + for j, value in enumerate(row.split('|')): + if j > 0 and j < len(keys) - 1: + values[keys[j]] = value.strip() + rows.append(values) + + # add date, status, url to status_table if + for row in rows: + if row['job status'] == 'finished': + date = row['date'] + status = row['job status'] + url = comment['html_url'] + if 'FAILURE' in row['comment']: + result = ':cry: FAILURE' + elif 'SUCCESS' in row['comment']: + result = ':grin: SUCCESS' + elif 'UNKNOWN' in row['comment']: + result = ':shrug: UNKNOWN' else: - for j, value in enumerate(row.split('|')): - if j > 0 and j < len(keys) - 1: - values[keys[j]] = value.strip() - rows.append(values) - - # add date, status, url to status_table if - for row in rows: - if row['job status'] == 'finished': - date = row['date'] - status = row['job status'] - url = comment['html_url'] - if 'FAILURE' in row['comment']: - result = ':cry: FAILURE' - elif 'SUCCESS' in row['comment']: - result = ':grin: SUCCESS' - elif 'UNKNOWN' in row['comment']: - result = ':shrug: UNKNOWN' - else: - result = row['comment'] - elif row['job status'] in ['submitted', 'received', 'running']: - # Make sure that if the job is not finished yet, we also put something useful in these fields - # It is useful to know a job is submitted, running, etc - date = row['date'] - status = row['job status'] - url = comment['html_url'] result = row['comment'] - else: - # Don't do anything for the test line for now - we might add an extra entry to the status - # table later to reflect the test result - continue - - # Add all entries to status_table. We do this at the end of this loop so that the operation is - # more or less 'atomic', i.e. all vectors in the status_table dict have the same length - status_table['for repo'].append(repo_id) - status_table['on arch'].append(on_arch) - status_table['for arch'].append(for_arch) - status_table['date'].append(date) - status_table['status'].append(status) - status_table['url'].append(url) - status_table['result'].append(result) - - if len(comments) != 100: - break + elif row['job status'] in ['submitted', 'received', 'running']: + # Make sure that if the job is not finished yet, we also put something useful in these fields + # It is useful to know a job is submitted, running, etc + date = row['date'] + status = row['job status'] + url = comment['html_url'] + result = row['comment'] + else: + # Don't do anything for the test line for now - we might add an extra entry to the status + # table later to reflect the test result + continue + + # Add all entries to status_table. We do this at the end of this loop so that the operation is + # more or less 'atomic', i.e. all vectors in the status_table dict have the same length + status_table['for repo'].append(repo_id) + status_table['on arch'].append(on_arch) + status_table['for arch'].append(for_arch) + status_table['date'].append(date) + status_table['status'].append(status) + status_table['url'].append(url) + status_table['result'].append(result) + return status_table From 5c22855a6ffa1736c9fe22552e57b49b03d72754 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 2 Dec 2025 22:06:38 +0100 Subject: [PATCH 03/16] fix spelling typo --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index f0c8aa19..4bdbbda0 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1215,7 +1215,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): try: while url: response = requests.get(url, params={'per_page': 100}) - respone.raise_for_status() + response.raise_for_status() all_comments.extend(response.json()) # get next URL from Link header in response (we are done if that is empty) From e443ab2cf65afab977e8ff74b804b931ce524fae Mon Sep 17 00:00:00 2001 From: "Sondre B. Risanger" <168830227+sondrebr@users.noreply.github.com> Date: Wed, 21 Jan 2026 16:17:16 +0100 Subject: [PATCH 04/16] Add `bot: cancel` command --- README.md | 6 ++ app.cfg.example | 3 + eessi_bot_event_handler.py | 66 ++++++++++++++- tasks/build.py | 143 ++++++++++++++++++++++++++++++++- tests/test_bot_job123.metadata | 1 + tests/test_task_build.py | 26 +++--- tools/config.py | 1 + tools/filter.py | 2 +- tools/job_metadata.py | 5 +- 9 files changed, 234 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 8aa1b931..87226dac 100644 --- a/README.md +++ b/README.md @@ -548,6 +548,12 @@ submit_command = /usr/bin/sbatch `submit_command` is the full path to the Slurm job submission command used for submitting batch jobs. You may want to verify if `sbatch` is provided at that path or determine its actual location (using `which sbatch`). +```ini +cancel_command = /usr/bin/scancel +``` + +`cancel_command` is the full path to the Slurm command used for cancelling batch jobs. You may want to verify if `scancel` is provided at that path or determine its actual location (using `which scancel`). + ```ini build_permission = -NOT_ALLOWED_GH_ACCOUNT_NAME- [...] ``` diff --git a/app.cfg.example b/app.cfg.example index 0b393a4c..a5b1441c 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -155,6 +155,9 @@ slurm_params = --hold # full path to the job submission command submit_command = /usr/bin/sbatch +# full path to the job cancellation command +cancel_command = /usr/bin/scancel + # defines which GitHub accounts have the permission to trigger # build jobs, i.e., for which accounts the bot acts on `bot: build ...` # commands. If the value is left empty, everyone can trigger build jobs. diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 04529a7e..0a6cc03c 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -29,8 +29,8 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from connections import github -from tasks.build import check_build_permission, get_node_types, request_bot_build_issue_comments, \ - submit_build_jobs +from tasks.build import cancel_jobs, check_build_permission, get_job_ids, get_work_dirs, \ + get_node_types, request_bot_build_issue_comments, submit_build_jobs from tasks.deploy import deploy_built_artefacts, determine_job_dirs from tasks.clean_up import move_to_trash_bin from tools import config @@ -53,6 +53,7 @@ config.BUILDENV_SETTING_BUILD_JOB_SCRIPT, # required config.BUILDENV_SETTING_BUILD_LOGS_DIR, # optional+recommended config.BUILDENV_SETTING_BUILD_PERMISSION, # optional+recommended + config.BUILDENV_SETTING_CANCEL_COMMAND, # required config.BUILDENV_SETTING_CONTAINER_CACHEDIR, # optional+recommended # config.BUILDENV_SETTING_CLONE_GIT_REPO_VIA, # optional # config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional @@ -102,6 +103,7 @@ # the poll interval setting is required for the alternative job handover # protocol (delayed_begin) config.SECTION_JOB_MANAGER: [ + config.JOB_MANAGER_SETTING_POLL_COMMAND, # required config.JOB_MANAGER_SETTING_POLL_INTERVAL], # required config.SECTION_REPO_TARGETS: [ config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required @@ -507,7 +509,7 @@ def handle_bot_command_help(self, event_info, bot_command): help_msg += "\n - Commands must be sent with a **new** comment (edits of existing comments are ignored)." help_msg += "\n - A comment may contain multiple commands, one per line." help_msg += "\n - Every command begins at the start of a line and has the syntax `bot: COMMAND [ARGUMENTS]*`" - help_msg += "\n - Currently supported COMMANDs are: `help`, `build`, `show_config`, `status`" + help_msg += "\n - Currently supported COMMANDs are: `help`, `build`, `show_config`, `status`, `cancel`" help_msg += "\n" help_msg += "\n For more information, see https://www.eessi.io/docs/bot" return help_msg @@ -679,6 +681,64 @@ def handle_bot_command_status(self, event_info, bot_command): else: return "\n - failed to create status comment" + def handle_bot_command_cancel(self, event_info, bot_command): + """ + Handles bot command 'cancel' by parsing 'jobid:' arguments and + cancelling the jobs. + + Args: + event_info (dict): event received by event_handler + bot_command (EESSIBotCommand): command to be handled + + Returns: + comment (string): list of cancelled jobs if any, error message if not + """ + self.log("processing bot command 'cancel'") + + request_body = event_info["raw_request_body"] + repo_name = request_body["repository"]["full_name"] + pr_number = request_body["issue"]["number"] + user = request_body["comment"]["user"]["login"] + + gh = github.get_instance() + pr = gh.get_repo(repo_name).get_pull(pr_number) + + # Jobs can only be cancelled by the user who submitted the job + # -> No need to proceed if user cannot submit jobs + if not check_build_permission(pr, event_info): + self.log(f"User '{user}' does not have build permission - skipping cancellation.") + return f"\n - User '{user}' cannot submit build jobs." + + # Get valid 'jobid:' arguments + job_ids = get_job_ids(bot_command.action_filters) + if len(job_ids) == 0: + self.log("Got no valid job IDs") + return "\n - No valid job IDs were given." + + # Get working directories of jobs + work_dirs = get_work_dirs(job_ids, self.cfg) + if len(work_dirs) == 0: + self.log("None of the given jobs are cancellable") + return "\n - No cancellable jobs were given." + + # Log skipped jobs + jobs = [] + for job_id in job_ids: + if job_id in work_dirs: + jobs.append((job_id, work_dirs.get(job_id))) + else: + log(f"Skipping job {job_id} - not found") + + # Cancel jobs + cancelled_jobs = cancel_jobs(jobs, user, pr, self.cfg) + if len(cancelled_jobs) == 0: + return "\n - No jobs were cancelled." + else: + comment = "" + for job_id in cancelled_jobs: + comment += f"\n - cancelled job `{job_id}`" + return comment + def start(self, app, port=3000): """ Logs startup information to shell and log file and starts the app using diff --git a/tasks/build.py b/tasks/build.py index 165ab544..1af052e8 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -34,7 +34,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd import tools.filter as tools_filter -from tools.pr_comments import ChatLevels, create_comment +from tools.pr_comments import ChatLevels, create_comment, update_comment from tools.build_params import BUILD_PARAM_ARCH, BUILD_PARAM_ACCEL # defaults (used if not specified via, eg, 'app.cfg') @@ -51,7 +51,9 @@ # other constants EXPORT_VARS_FILE = 'export_vars.sh' -Job = namedtuple('Job', ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id', 'accelerator')) + +Job = namedtuple('Job', + ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id', 'accelerator', 'owner')) # global repo_cfg repo_cfg = {} @@ -108,6 +110,10 @@ def get_build_env_cfg(cfg): log(f"{fn}(): submit_command '{submit_command}'") config_data[config.BUILDENV_SETTING_SUBMIT_COMMAND] = submit_command + cancel_command = buildenv.get(config.BUILDENV_SETTING_CANCEL_COMMAND) + log(f"{fn}(): cancel_command '{cancel_command}'") + config_data[config.BUILDENV_SETTING_CANCEL_COMMAND] = cancel_command + job_handover_protocol = buildenv.get(config.BUILDENV_SETTING_JOB_HANDOVER_PROTOCOL) slurm_params = buildenv.get(config.BUILDENV_SETTING_SLURM_PARAMS) if job_handover_protocol == config.JOB_HANDOVER_PROTOCOL_HOLD_RELEASE: @@ -582,6 +588,8 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): base_branch_name = pr.base.ref log(f"{fn}(): pr.base.repo.ref '{base_branch_name}'") + job_owner = event_info['raw_request_body']['sender']['login'] + # create run dir (base directory for potentially several jobs) # TODO may still be too early (before we get to any actual job being # prepared below when calling 'download_pr') @@ -689,7 +697,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): # enlist jobs to proceed job = Job(job_dir, partition_info['cpu_subdir'], repo_id, partition_info['slurm_params'], year_month, - pr_id, accelerator) + pr_id, accelerator, job_owner) jobs.append(job) log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list") @@ -1358,3 +1366,132 @@ def request_bot_build_issue_comments(repo_name, pr_number): if len(comments) != 100: break return status_table + + +def get_job_ids(action_filter): + """ + Gets and validates 'jobid:' arguments. + + Args: + action_filter (EESSIBotActionFilter): Instance containing 'jobid:' arguments + + Returns: + job_ids (list): valid 'jobid:' arguments + """ + fn = sys._getframe().f_code.co_name + + # Get 'jobid:' arguments + job_filter = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_JOB) + if not job_filter: + log(f"{fn}(): bot: cancel needs at least one 'jobid:' argument.") + return [] + + # Validate job IDs + job_ids = [] + for job_id in job_filter: + try: + if int(job_id) > 0: + job_ids.append(job_id) + else: + log(f"{fn}(): Invalid job ID: '{job_id}'") + except Exception as e: + log(f"{fn}(): Invalid job ID: {e}") + + return job_ids + + +def get_work_dirs(job_ids, cfg): + """ + Gets working directories of build jobs. + + Args: + job_ids (list): list of job_ids to check. + cfg (ConfigParser): Instance containing full configuration from app.cfg + + Returns: + work_dirs (dict): dict mapping each job_id to its work_dir + """ + + buildenv = get_build_env_cfg(cfg) + poll_command = cfg[config.SECTION_JOB_MANAGER][config.JOB_MANAGER_SETTING_POLL_COMMAND] + job_name = buildenv[config.BUILDENV_SETTING_JOB_NAME] + + user = os.getenv("USER", None) + if user is None: + raise Exception("Environment variable $USER is not set.") + + # squeue only the given job IDs + cs_jobs = ",".join(job_ids) + command_line = f"{poll_command} --noheader --Format=JobId:0@,WorkDir:0 --user={user} --job={cs_jobs}" + if job_name: + command_line += f" --name={job_name}" + out, err, exit_code = run_cmd(command_line, "Get WorkDirs of jobs") + + # All output lines are formatted as '{job_id}@{work_dir}' + work_dirs = {} + for line in out.split("\n"): + job = [field.strip() for field in line.split("@")] + if len(job) != 2: + continue + work_dirs[job[0]] = job[1] + + return work_dirs + + +def cancel_jobs(jobs, user, pr, cfg): + """ + Cancels a list of build jobs. + + Args: + jobs (list): (job_id, work_dir) tuples of the jobs to cancel + user (str): The user who sent the 'bot: cancel' command + pr (github.PullRequest.PullRequest): instance representing the pull request + cfg (ConfigParser): Instance containing full configuration from app.cfg + + Returns: + cancelled_jobs (list): job_ids of successfully cancelled jobs + """ + fn = sys._getframe().f_code.co_name + + buildenv = get_build_env_cfg(cfg) + cancel_command = buildenv[config.BUILDENV_SETTING_CANCEL_COMMAND] + + cancelled_jobs = [] + for job_id, work_dir in jobs: + # Get job owner and PR comment ID from metadata + metadata_path = os.path.join(work_dir, f"_bot_job{job_id}.metadata") + metadata = job_metadata.get_section_from_file( + filepath=metadata_path, + section=job_metadata.JOB_PR_SECTION, + ) + job_owner = metadata.get(job_metadata.JOB_PR_JOB_OWNER) + pr_comment_id = metadata.get(job_metadata.JOB_PR_PR_COMMENT_ID) + + # Only the job owner should be able to cancel a job + if job_owner != user: + log(f"{fn}(): User {user} did not start job {job_id} - skipping cancellation") + continue + log(f"{fn}(): Job {job_id} was started by user {user} - cancelling job") + + # Cancel job + command_line = f"{cancel_command} --verbose {job_id}" + out, err, exit_code = run_cmd(command_line, f"cancel job {job_id}", raise_on_error=False) + + # Check if command was successful + if exit_code != 0: + log(f"{fn}(): scancel resulted in a non-zero exit code for job {job_id}.") + continue + if any([line.startswith("scancel: error: ") for line in err.split("\n")]): + log(f"{fn}(): Unable to cancel job {job_id}.") + continue + + log(f"{fn}(): Cancelled job {job_id}") + + # Update job status table + dt = datetime.now(timezone.utc) + update = f"\n|{dt.strftime("%b %d %X %Z %Y")}|finished|job id `{job_id}` was cancelled|" + update_comment(int(pr_comment_id), pr, update) + + cancelled_jobs.append(job_id) + + return cancelled_jobs diff --git a/tests/test_bot_job123.metadata b/tests/test_bot_job123.metadata index 29f8965d..62010c4d 100644 --- a/tests/test_bot_job123.metadata +++ b/tests/test_bot_job123.metadata @@ -2,4 +2,5 @@ repo = test_repo pr_number = 999 pr_comment_id = 77 +job_owner = user01 diff --git a/tests/test_task_build.py b/tests/test_task_build.py index af49ac9b..fcb9f428 100644 --- a/tests/test_task_build.py +++ b/tests/test_task_build.py @@ -287,7 +287,7 @@ def test_create_pr_comment_succeeds(monkeypatch, mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic", "user01") build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" @@ -318,7 +318,7 @@ def test_create_pr_comment_succeeds_none(monkeypatch, mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic", "user01") build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" @@ -345,7 +345,7 @@ def test_create_pr_comment_raises_once_then_succeeds(monkeypatch, mocked_github, print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic", "user01") build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" @@ -372,7 +372,7 @@ def test_create_pr_comment_always_raises(monkeypatch, mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic", "user01") build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" @@ -400,7 +400,7 @@ def test_create_pr_comment_three_raises(monkeypatch, mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic", "user01") build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" @@ -423,7 +423,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # create some test data ym = datetime.today().strftime('%Y.%m') pr_number = 999 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") + job = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic", "user01") job_id = "123" @@ -441,6 +441,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # repo = test_repo # pr_number = 999 # pr_comment_id = 77 + # job_owner = user01 test_file = "tests/test_bot_job123.metadata" assert filecmp.cmp(expected_file_path, test_file, shallow=False) @@ -450,18 +451,21 @@ def test_create_read_metadata_file(mocked_github, tmpdir): assert metadata["PR"]["repo"] == "test_repo" assert metadata["PR"]["pr_number"] == "999" assert metadata["PR"]["pr_comment_id"] == "77" - assert sorted(metadata["PR"].keys()) == ["pr_comment_id", "pr_number", "repo"] + assert metadata["PR"]["job_owner"] == "user01" + assert sorted(metadata["PR"].keys()) == ["job_owner", "pr_comment_id", "pr_number", "repo"] # use directory that does not exist dir_does_not_exist = os.path.join(tmpdir, "dir_does_not_exist") - job2 = Job(dir_does_not_exist, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") + job2 = Job(dir_does_not_exist, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic", + "user01") job_id2 = "222" with pytest.raises(FileNotFoundError): create_metadata_file(job2, job_id2, pr_comment) # use directory without write permission dir_without_write_perm = os.path.join("/") - job3 = Job(dir_without_write_perm, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") + job3 = Job(dir_without_write_perm, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic", + "user01") job_id3 = "333" with pytest.raises(OSError): create_metadata_file(job3, job_id3, pr_comment) @@ -471,7 +475,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # use undefined values for parameters # job_id = None - job4 = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") + job4 = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic", "user01") job_id4 = None create_metadata_file(job4, job_id4, pr_comment) @@ -486,7 +490,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # use undefined values for parameters # job.working_dir = None - job5 = Job(None, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") + job5 = Job(None, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic", "user01") job_id5 = "555" with pytest.raises(TypeError): create_metadata_file(job5, job_id5, pr_comment) diff --git a/tools/config.py b/tools/config.py index 7f814ea4..70dfb472 100644 --- a/tools/config.py +++ b/tools/config.py @@ -44,6 +44,7 @@ BUILDENV_SETTING_BUILD_JOB_SCRIPT = 'build_job_script' BUILDENV_SETTING_BUILD_LOGS_DIR = 'build_logs_dir' BUILDENV_SETTING_BUILD_PERMISSION = 'build_permission' +BUILDENV_SETTING_CANCEL_COMMAND = 'cancel_command' BUILDENV_SETTING_CLONE_GIT_REPO_VIA = 'clone_git_repo_via' BUILDENV_SETTING_CONTAINER_CACHEDIR = 'container_cachedir' BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations' diff --git a/tools/filter.py b/tools/filter.py index ddc58352..5027053c 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -27,7 +27,7 @@ FILTER_COMPONENT_ARCH = 'architecture' FILTER_COMPONENT_EXPORT = 'exportvariable' FILTER_COMPONENT_INST = 'instance' -FILTER_COMPONENT_JOB = 'job' +FILTER_COMPONENT_JOB = 'jobid' FILTER_COMPONENT_REPO = 'repository' FILTER_COMPONENTS = [FILTER_COMPONENT_ACCEL, FILTER_COMPONENT_ARCH, diff --git a/tools/job_metadata.py b/tools/job_metadata.py index f5ee21ce..e4031faf 100644 --- a/tools/job_metadata.py +++ b/tools/job_metadata.py @@ -63,6 +63,7 @@ JOB_PR_REPO = "repo" JOB_PR_PR_NUMBER = "pr_number" JOB_PR_PR_COMMENT_ID = "pr_comment_id" +JOB_PR_JOB_OWNER = "job_owner" # JWD/_bot_jobJOBID.result JOB_RESULT_SECTION = "RESULT" @@ -99,12 +100,14 @@ def create_metadata_file(job, job_id, pr_comment): repo_name = pr_comment.repo_name pr_number = pr_comment.pr_number pr_comment_id = pr_comment.pr_comment_id + job_owner = job.owner # create _bot_job.metadata file in the job's working directory bot_jobfile = configparser.ConfigParser() bot_jobfile[JOB_PR_SECTION] = {'repo': repo_name, 'pr_number': pr_number, - 'pr_comment_id': pr_comment_id} + 'pr_comment_id': pr_comment_id, + 'job_owner': job_owner} bot_jobfile_path = os.path.join(job.working_dir, f'_bot_job{job_id}.metadata') with open(bot_jobfile_path, 'w') as bjf: bot_jobfile.write(bjf) From 4947dce2bc49342ec86f01956bf38e5225075919 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 22 Jan 2026 07:14:47 +0100 Subject: [PATCH 05/16] use configurable GH API timeout + token for higher rate limits --- README.md | 6 ++++++ app.cfg.example | 3 +++ eessi_bot_event_handler.py | 1 + eessi_bot_job_manager.py | 1 + tasks/build.py | 15 ++++++++++++++- tools/config.py | 1 + 6 files changed, 26 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8aa1b931..60a153df 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,12 @@ The example file (`app.cfg.example`) includes notes on what you have to adjust t The section `[github]` contains information for connecting to GitHub: +```ini +api_timeout = 10 +``` + +Time limit for requests to GitHub's REST API. + ```ini app_id = 123456 ``` diff --git a/app.cfg.example b/app.cfg.example index 0b393a4c..cac62451 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -18,6 +18,9 @@ # Also see documentation at https://github.com/EESSI/eessi-bot-software-layer/blob/main/README.md#step5.5 [github] +# API timeout, time limit for requests to GitHub's REST API +api_timeout = 10 + # replace '123456' with the ID of your GitHub App; see https://github.com/settings/apps app_id = 123456 diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 04529a7e..4a761b77 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -95,6 +95,7 @@ config.SECTION_EVENT_HANDLER: [ config.EVENT_HANDLER_SETTING_LOG_PATH], # required config.SECTION_GITHUB: [ + config.GITHUB_SETTING_API_TIMEOUT, # required config.GITHUB_SETTING_APP_ID, # required config.GITHUB_SETTING_APP_NAME, # required config.GITHUB_SETTING_INSTALLATION_ID, # required diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index fd67b913..85fba369 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -57,6 +57,7 @@ config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required config.SECTION_GITHUB: [ + # config.GITHUB_SETTING_API_TIMEOUT, # unused config.GITHUB_SETTING_APP_ID, # required # config.GITHUB_SETTING_APP_NAME, # unused config.GITHUB_SETTING_INSTALLATION_ID, # required diff --git a/tasks/build.py b/tasks/build.py index 4bdbbda0..742cc384 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -33,6 +33,7 @@ from pyghee.utils import error, log # Local application imports (anything from EESSI/eessi-bot-software-layer) +from connections import github from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd import tools.filter as tools_filter from tools.pr_comments import ChatLevels, create_comment @@ -1204,6 +1205,8 @@ def request_bot_build_issue_comments(repo_name, pr_number): status_table = {'on arch': [], 'for arch': [], 'for repo': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() + github_section = cfg.get(config.SECTION_GITHUB) + api_timeout = cfg.get(config.GITHUB_SETTING_API_TIMEOUT, 10) # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page @@ -1212,9 +1215,19 @@ def request_bot_build_issue_comments(repo_name, pr_number): url = f'https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments' all_comments = [] + # call get_instance() to obtain a (new) token (accessible via token()) + # get_instance ensures that the token is renewed if the current one is no + # longer valid or valid for less than 30 minutes + _ = github.get_instance() try: while url: - response = requests.get(url, params={'per_page': 100}) + headers = { + 'Authorization': f'Bearer {token()}', + 'Accept': 'application/vnd.github+json', + 'X-GitHub-Api-Version': '2022-11-28' + } + + response = requests.get(url, params={'per_page': 100}, timeout=api_timeout) response.raise_for_status() all_comments.extend(response.json()) diff --git a/tools/config.py b/tools/config.py index 7f814ea4..27066820 100644 --- a/tools/config.py +++ b/tools/config.py @@ -95,6 +95,7 @@ FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT = 'job_test_unknown_fmt' SECTION_GITHUB = 'github' +GITHUB_SETTING_API_TIMEOUT = 'api_timeout' GITHUB_SETTING_APP_ID = 'app_id' GITHUB_SETTING_APP_NAME = 'app_name' GITHUB_SETTING_INSTALLATION_ID = 'installation_id' From a56a9ffa3a75811ae7d469de58b7743754c3b211 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 22 Jan 2026 07:38:18 +0100 Subject: [PATCH 06/16] fix hound issue --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index 742cc384..f6dd4f23 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1222,7 +1222,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): try: while url: headers = { - 'Authorization': f'Bearer {token()}', + 'Authorization': f'Bearer {github.token()}', 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } From cec0e684a994be58a405432270ad977b1e374c43 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sat, 24 Jan 2026 20:20:41 +0100 Subject: [PATCH 07/16] fix access to token, add headers and log rate limit information --- tasks/build.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index f6dd4f23..fef9f126 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -28,6 +28,7 @@ import shutil import string import sys +import time # Third party imports (anything installed into the local Python environment) from pyghee.utils import error, log @@ -1205,8 +1206,9 @@ def request_bot_build_issue_comments(repo_name, pr_number): status_table = {'on arch': [], 'for arch': [], 'for repo': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() - github_section = cfg.get(config.SECTION_GITHUB) - api_timeout = cfg.get(config.GITHUB_SETTING_API_TIMEOUT, 10) + github_section = cfg[config.SECTION_GITHUB] + api_timeout = int(github_section.get(config.GITHUB_SETTING_API_TIMEOUT, 10)) + # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page @@ -1215,24 +1217,33 @@ def request_bot_build_issue_comments(repo_name, pr_number): url = f'https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments' all_comments = [] - # call get_instance() to obtain a (new) token (accessible via token()) + # call get_instance() to obtain a (new) token (accessible via github.token().token) # get_instance ensures that the token is renewed if the current one is no # longer valid or valid for less than 30 minutes _ = github.get_instance() try: while url: headers = { - 'Authorization': f'Bearer {github.token()}', + 'Authorization': f'Bearer {github.token().token}', 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } - response = requests.get(url, params={'per_page': 100}, timeout=api_timeout) + response = requests.get(url, headers=headers, params={'per_page': 100}, timeout=api_timeout) response.raise_for_status() all_comments.extend(response.json()) # get next URL from Link header in response (we are done if that is empty) url = response.links.get('next', {}).get('url') + log(f"{fn}(): more comments? {url!r}") + reset_time = int(response.headers.get('X-RateLimit-Reset')) + utc_time = datetime.fromtimestamp(reset_time, tz=timezone.utc) + time_left = int(reset_time - time.time()) + log(f"{fn}(): limits with token '{github.token().token[:4]}...':\n" + f" rate limit.: {response.headers.get('X-RateLimit-Limit')}\n" + f" remaining..: {response.headers.get('X-RateLimit-Remaining')}\n" + f" reset limit: {utc_time.strftime('%b %d %I:%M:%S %p UTC %Y')} (in {time_left} seconds)\n" + ) except Exception as err: log(f"{fn}(): obtaining comments for PR {pr_number} in repo {repo_name!r} failed: {err}") From c9ff8d9bfbebdc2792f817a8b45fb882d5ba58c2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sat, 24 Jan 2026 20:22:44 +0100 Subject: [PATCH 08/16] remove extra empty line --- tasks/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index fef9f126..9115ad06 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1209,7 +1209,6 @@ def request_bot_build_issue_comments(repo_name, pr_number): github_section = cfg[config.SECTION_GITHUB] api_timeout = int(github_section.get(config.GITHUB_SETTING_API_TIMEOUT, 10)) - # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page # argument at the moment the for loop is for a max of 400 comments could bump this up From dab39f32c57bfc1f2a8bf2ce44c88f4f7a86b4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sondre=20Bergsv=C3=A5g=20Risanger?= <168830227+sondrebr@users.noreply.github.com> Date: Mon, 26 Jan 2026 13:44:43 +0100 Subject: [PATCH 09/16] Fix f-string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Röblitz --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index 1af052e8..97cba3ad 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1489,7 +1489,7 @@ def cancel_jobs(jobs, user, pr, cfg): # Update job status table dt = datetime.now(timezone.utc) - update = f"\n|{dt.strftime("%b %d %X %Z %Y")}|finished|job id `{job_id}` was cancelled|" + update = f"\n|{dt.strftime('%b %d %X %Z %Y')}|finished|job id `{job_id}` was cancelled|" update_comment(int(pr_comment_id), pr, update) cancelled_jobs.append(job_id) From b446d01d35b4ced3416521ba61b7d875b7aadc97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sondre=20Bergsv=C3=A5g=20Risanger?= <168830227+sondrebr@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:09:16 +0100 Subject: [PATCH 10/16] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Röblitz --- tasks/build.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 97cba3ad..7f4bb984 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1383,7 +1383,7 @@ def get_job_ids(action_filter): # Get 'jobid:' arguments job_filter = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_JOB) if not job_filter: - log(f"{fn}(): bot: cancel needs at least one 'jobid:' argument.") + log(f"{fn}(): 'bot: cancel' command needs at least one 'jobid:' argument.") return [] # Validate job IDs @@ -1394,8 +1394,8 @@ def get_job_ids(action_filter): job_ids.append(job_id) else: log(f"{fn}(): Invalid job ID: '{job_id}'") - except Exception as e: - log(f"{fn}(): Invalid job ID: {e}") + except Exception as err: + log(f"{fn}(): Invalid job ID: {err}") return job_ids From d75ec778887d25c6d7f7ebe756e0da7c4bcc1fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sondre=20Bergsv=C3=A5g=20Risanger?= <168830227+sondrebr@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:16:37 +0100 Subject: [PATCH 11/16] Sort imported symbols alphabetically --- eessi_bot_event_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 0a6cc03c..5641caf0 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -29,8 +29,8 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from connections import github -from tasks.build import cancel_jobs, check_build_permission, get_job_ids, get_work_dirs, \ - get_node_types, request_bot_build_issue_comments, submit_build_jobs +from tasks.build import cancel_jobs, check_build_permission, get_job_ids, get_node_types, \ + get_work_dirs, request_bot_build_issue_comments, submit_build_jobs from tasks.deploy import deploy_built_artefacts, determine_job_dirs from tasks.clean_up import move_to_trash_bin from tools import config From 9db5d3280a385b3e2892049e001a7dd62ebc6327 Mon Sep 17 00:00:00 2001 From: "Sondre B. Risanger" <168830227+sondrebr@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:33:35 +0100 Subject: [PATCH 12/16] Rename constant - Renamed `FILTER_COMPONENT_JOB` to `FILTER_COMPONENT_JOBID` --- tasks/build.py | 2 +- tools/filter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 7f4bb984..61bc4248 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1381,7 +1381,7 @@ def get_job_ids(action_filter): fn = sys._getframe().f_code.co_name # Get 'jobid:' arguments - job_filter = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_JOB) + job_filter = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_JOBID) if not job_filter: log(f"{fn}(): 'bot: cancel' command needs at least one 'jobid:' argument.") return [] diff --git a/tools/filter.py b/tools/filter.py index 5027053c..54e0f5e3 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -27,13 +27,13 @@ FILTER_COMPONENT_ARCH = 'architecture' FILTER_COMPONENT_EXPORT = 'exportvariable' FILTER_COMPONENT_INST = 'instance' -FILTER_COMPONENT_JOB = 'jobid' +FILTER_COMPONENT_JOBID = 'jobid' FILTER_COMPONENT_REPO = 'repository' FILTER_COMPONENTS = [FILTER_COMPONENT_ACCEL, FILTER_COMPONENT_ARCH, FILTER_COMPONENT_EXPORT, FILTER_COMPONENT_INST, - FILTER_COMPONENT_JOB, + FILTER_COMPONENT_JOBID, FILTER_COMPONENT_REPO ] From f452c5d2a6296e0f10098ae1361c8fabe74bddbe Mon Sep 17 00:00:00 2001 From: "Sondre B. Risanger" <168830227+sondrebr@users.noreply.github.com> Date: Tue, 27 Jan 2026 09:28:44 +0100 Subject: [PATCH 13/16] Filter only by job IDs when getting work dirs --- tasks/build.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 61bc4248..df0c9eca 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1411,20 +1411,11 @@ def get_work_dirs(job_ids, cfg): Returns: work_dirs (dict): dict mapping each job_id to its work_dir """ - - buildenv = get_build_env_cfg(cfg) poll_command = cfg[config.SECTION_JOB_MANAGER][config.JOB_MANAGER_SETTING_POLL_COMMAND] - job_name = buildenv[config.BUILDENV_SETTING_JOB_NAME] - - user = os.getenv("USER", None) - if user is None: - raise Exception("Environment variable $USER is not set.") # squeue only the given job IDs cs_jobs = ",".join(job_ids) - command_line = f"{poll_command} --noheader --Format=JobId:0@,WorkDir:0 --user={user} --job={cs_jobs}" - if job_name: - command_line += f" --name={job_name}" + command_line = f"{poll_command} --noheader --Format=JobId:0@,WorkDir:0 --job={cs_jobs}" out, err, exit_code = run_cmd(command_line, "Get WorkDirs of jobs") # All output lines are formatted as '{job_id}@{work_dir}' From 30398f165f940a4539d4ffdbb3af5b809ea484ea Mon Sep 17 00:00:00 2001 From: "Sondre B. Risanger" <168830227+sondrebr@users.noreply.github.com> Date: Tue, 27 Jan 2026 10:08:22 +0100 Subject: [PATCH 14/16] Pass work_dirs to cancel_jobs as-is --- eessi_bot_event_handler.py | 7 ++----- tasks/build.py | 6 +++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 5641caf0..59678b1c 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -722,15 +722,12 @@ def handle_bot_command_cancel(self, event_info, bot_command): return "\n - No cancellable jobs were given." # Log skipped jobs - jobs = [] for job_id in job_ids: - if job_id in work_dirs: - jobs.append((job_id, work_dirs.get(job_id))) - else: + if job_id not in work_dirs.keys(): log(f"Skipping job {job_id} - not found") # Cancel jobs - cancelled_jobs = cancel_jobs(jobs, user, pr, self.cfg) + cancelled_jobs = cancel_jobs(work_dirs, user, pr, self.cfg) if len(cancelled_jobs) == 0: return "\n - No jobs were cancelled." else: diff --git a/tasks/build.py b/tasks/build.py index df0c9eca..0522da6a 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1431,10 +1431,10 @@ def get_work_dirs(job_ids, cfg): def cancel_jobs(jobs, user, pr, cfg): """ - Cancels a list of build jobs. + Cancels the given build jobs. Args: - jobs (list): (job_id, work_dir) tuples of the jobs to cancel + jobs (dict): dictionary mapping each job_id to cancel to its work_dir user (str): The user who sent the 'bot: cancel' command pr (github.PullRequest.PullRequest): instance representing the pull request cfg (ConfigParser): Instance containing full configuration from app.cfg @@ -1448,7 +1448,7 @@ def cancel_jobs(jobs, user, pr, cfg): cancel_command = buildenv[config.BUILDENV_SETTING_CANCEL_COMMAND] cancelled_jobs = [] - for job_id, work_dir in jobs: + for job_id, work_dir in jobs.items(): # Get job owner and PR comment ID from metadata metadata_path = os.path.join(work_dir, f"_bot_job{job_id}.metadata") metadata = job_metadata.get_section_from_file( From 2785cd66fad81b3823264accdb7858c6bff703c7 Mon Sep 17 00:00:00 2001 From: "Sondre B. Risanger" <168830227+sondrebr@users.noreply.github.com> Date: Wed, 28 Jan 2026 09:35:38 +0100 Subject: [PATCH 15/16] Update bot comments --- README.md | 4 ++-- app.cfg.example | 2 +- eessi_bot_event_handler.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 87226dac..58bb369f 100644 --- a/README.md +++ b/README.md @@ -566,11 +566,11 @@ name on GitHub. Thus, one could not - by accident - give build permissions to an unknown account. ```ini -no_build_permission_comment = The `bot: build ...` command has been used by user `{build_labeler}`, but this person does not have permission to trigger builds. +no_build_permission_comment = GH account `{build_labeler}` is not authorized to trigger or cancel build jobs. ``` `no_build_permission_comment` defines a comment (template) that is used when -the account trying to trigger build jobs has no permission to do so. +the account trying to trigger or cancel build jobs has no permission to do so. ```ini allow_update_submit_opts = false diff --git a/app.cfg.example b/app.cfg.example index a5b1441c..f4981d6a 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -164,7 +164,7 @@ cancel_command = /usr/bin/scancel build_permission = -NOT_ALLOWED_GH_ACCOUNT_NAME- # template for comment when user who set a label has no permission to trigger build jobs -no_build_permission_comment = Label `bot:build` has been set by user `{build_labeler}`, but this person does not have permission to trigger builds +no_build_permission_comment = GH account `{build_labeler}` is not authorized to trigger or cancel build jobs. # whether or not to allow updating the submit options via custom module det_submit_opts # Should only be enabled (true) with care because this will result in code from the target diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 59678b1c..31ba82ae 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -707,7 +707,7 @@ def handle_bot_command_cancel(self, event_info, bot_command): # -> No need to proceed if user cannot submit jobs if not check_build_permission(pr, event_info): self.log(f"User '{user}' does not have build permission - skipping cancellation.") - return f"\n - User '{user}' cannot submit build jobs." + return f"\n - User `{user}` cannot submit or cancel build jobs." # Get valid 'jobid:' arguments job_ids = get_job_ids(bot_command.action_filters) From a5af596631cc4cdb2821aa49161a8ea92339b30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Wed, 28 Jan 2026 12:58:38 +0100 Subject: [PATCH 16/16] release notes for v0.11.0 --- RELEASE_NOTES | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 3a764054..832a5798 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,6 +1,28 @@ This file contains a description of the major changes to the EESSI build-and-deploy bot. For more detailed information, please see the git log. +v0.11.0 (28 January 2026) +-------------------------- + +This is a minor release of the EESSI build-and-deploy bot. + +Bug fixes: +* consider all builds for `bot: status [last_build]` command (#357) + * this also replaces running `curl` by using the `requests` library for one `curl` call + +Improvements: +* adds support for new command `bot: cancel jobid:[JOBID] ...` (#359) + * only the owner of a job can cancel it + * multiple jobs can be cancelled by specifying multiple `jobid:[JOBID]` + arguments separated by space + +Changes to 'app.cfg' settings (see README.md and app.cfg.example for details): +* CHANGED (required) 'no_build_permission_comment' in section '[buildenv]' + Note! sites using the old value may see misleading comments added by the bot, + but the bot will work without the change. +* NEW (required) 'cancel_command' in section '[buildenv]' + + v0.10.0 (13 November 2025) --------------------------