From 97d66258252f6457b156590d3df2af5a8a89da3d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 2 Dec 2025 21:58:55 +0100 Subject: [PATCH 1/6] use requests library instead of 'curl' and obtain all comments --- tasks/build.py | 298 +++++++++++++++++++++++++------------------------ 1 file changed, 154 insertions(+), 144 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 165ab544..f0c8aa19 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -24,6 +24,7 @@ import json import os import re +import requests import shutil import string import sys @@ -1207,154 +1208,163 @@ def request_bot_build_issue_comments(repo_name, pr_number): # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page # argument at the moment the for loop is for a max of 400 comments could bump this up - for x in range(1, 5): - curl_cmd = f'curl -L https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments?per_page=100&page={x}' - curl_output, curl_error, curl_exit_code = run_cmd(curl_cmd, "fetch all comments") - - comments = json.loads(curl_output) - - for comment in comments: - # iterate through the comments to find the one where the status of the build was in - submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] - accelerator_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR] - instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] - instance_repo_re = template_to_regex(instance_repo_fmt) - comment_body = comment['body'].split('\n') - instance_repo_match = re.match(instance_repo_re, comment_body[0]) - # Check if this body starts with an initial comment from the bot (first item is always the instance + repo - # it is building for) - # Then, check that it has at least 4 lines so that we can safely index up to that number - if instance_repo_match and len(comment_body) >= 4: - # Set some defaults - repo_id = "" - on_arch = "" - for_arch = "" - date = "" - status = "" - url = "" - result = "" - - log(f"{fn}(): found bot build response in issue, processing...") - - # First, extract the repo_id - log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") - repo_id = instance_repo_match.group('repo_id') - - # Then, try to match the architecture we build on. - # First try this including accelerator, to see if one was defined - on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] - on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) - on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) - on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) + + url = f'https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments' + all_comments = [] + + try: + while url: + response = requests.get(url, params={'per_page': 100}) + respone.raise_for_status() + + all_comments.extend(response.json()) + # get next URL from Link header in response (we are done if that is empty) + url = response.links.get('next', {}).get('url') + + except Exception as err: + log(f"{fn}(): obtaining comments for PR {pr_number} in repo {repo_name!r} failed: {err}") + return status_table + + for comment in all_comments: + # iterate through the comments to find the one where the status of the build was in + submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] + accelerator_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR] + instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] + instance_repo_re = template_to_regex(instance_repo_fmt) + comment_body = comment['body'].split('\n') + instance_repo_match = re.match(instance_repo_re, comment_body[0]) + # Check if this body starts with an initial comment from the bot (first item is always the instance + repo + # it is building for) + # Then, check that it has at least 4 lines so that we can safely index up to that number + if instance_repo_match and len(comment_body) >= 4: + # Set some defaults + repo_id = "" + on_arch = "" + for_arch = "" + date = "" + status = "" + url = "" + result = "" + + log(f"{fn}(): found bot build response in issue, processing...") + + # First, extract the repo_id + log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") + repo_id = instance_repo_match.group('repo_id') + + # Then, try to match the architecture we build on. + # First try this including accelerator, to see if one was defined + on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] + on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) + on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) + on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) + if on_arch_match: + # Pattern with accelerator matched, append to status_table + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " + f"with accelerator {on_arch_match.group('accelerator')}") + on_arch = f"`{on_arch_match.group('on_arch')}`, `{on_arch_match.group('accelerator')}`" + else: + # Pattern with accelerator did not match, retry without accelerator + on_arch_re = template_to_regex(on_arch_fmt) + on_arch_match = re.match(on_arch_re, comment_body[1]) if on_arch_match: - # Pattern with accelerator matched, append to status_table - log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " - f"with accelerator {on_arch_match.group('accelerator')}") - on_arch = f"`{on_arch_match.group('on_arch')}`, `{on_arch_match.group('accelerator')}`" + # Pattern without accelerator matched, append to status_table + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") + on_arch = f"`{on_arch_match.group('on_arch')}`" else: - # Pattern with accelerator did not match, retry without accelerator - on_arch_re = template_to_regex(on_arch_fmt) - on_arch_match = re.match(on_arch_re, comment_body[1]) - if on_arch_match: - # Pattern without accelerator matched, append to status_table - log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") - on_arch = f"`{on_arch_match.group('on_arch')}`" - else: - # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' - msg = "Could not match regular expression for extracting the architecture to build on.\n" - msg += "String to be matched:\n" - msg += f"{comment_body[1]}\n" - msg += "First regex attempted:\n" - msg += f"{on_arch_re_with_accel.pattern}\n" - msg += "Second regex attempted:\n" - msg += f"{on_arch_re.pattern}\n" - raise ValueError(msg) - - # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator - for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] - for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) - for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) - for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build on.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[1]}\n" + msg += "First regex attempted:\n" + msg += f"{on_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{on_arch_re.pattern}\n" + raise ValueError(msg) + + # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator + for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] + for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) + for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) + for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) + if for_arch_match: + # Pattern with accelerator matched, append to status_table + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " + f"with accelerator {for_arch_match.group('accelerator')}") + for_arch = f"`{for_arch_match.group('for_arch')}`, `{for_arch_match.group('accelerator')}`" + else: + # Pattern with accelerator did not match, retry without accelerator + for_arch_re = template_to_regex(for_arch_fmt) + for_arch_match = re.match(for_arch_re, comment_body[2]) if for_arch_match: - # Pattern with accelerator matched, append to status_table - log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " - f"with accelerator {for_arch_match.group('accelerator')}") - for_arch = f"`{for_arch_match.group('for_arch')}`, `{for_arch_match.group('accelerator')}`" + # Pattern without accelerator matched, append to status_table + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") + for_arch = f"`{for_arch_match.group('for_arch')}`" else: - # Pattern with accelerator did not match, retry without accelerator - for_arch_re = template_to_regex(for_arch_fmt) - for_arch_match = re.match(for_arch_re, comment_body[2]) - if for_arch_match: - # Pattern without accelerator matched, append to status_table - log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") - for_arch = f"`{for_arch_match.group('for_arch')}`" - else: - # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' - msg = "Could not match regular expression for extracting the architecture to build for.\n" - msg += "String to be matched:\n" - msg += f"{comment_body[2]}\n" - msg += "First regex attempted:\n" - msg += f"{for_arch_re_with_accel.pattern}\n" - msg += "Second regex attempted:\n" - msg += f"{for_arch_re.pattern}\n" - raise ValueError(msg) - - # get date, status, url and result from the markdown table - comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] - - # Convert markdown table to a dictionary - lines = comment_table.split('\n') - rows = [] - keys = [] - for i, row in enumerate(lines): - values = {} - if i == 0: - for key in row.split('|'): - keys.append(key.strip()) - elif i == 1: - continue + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build for.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[2]}\n" + msg += "First regex attempted:\n" + msg += f"{for_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{for_arch_re.pattern}\n" + raise ValueError(msg) + + # get date, status, url and result from the markdown table + comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] + + # Convert markdown table to a dictionary + lines = comment_table.split('\n') + rows = [] + keys = [] + for i, row in enumerate(lines): + values = {} + if i == 0: + for key in row.split('|'): + keys.append(key.strip()) + elif i == 1: + continue + else: + for j, value in enumerate(row.split('|')): + if j > 0 and j < len(keys) - 1: + values[keys[j]] = value.strip() + rows.append(values) + + # add date, status, url to status_table if + for row in rows: + if row['job status'] == 'finished': + date = row['date'] + status = row['job status'] + url = comment['html_url'] + if 'FAILURE' in row['comment']: + result = ':cry: FAILURE' + elif 'SUCCESS' in row['comment']: + result = ':grin: SUCCESS' + elif 'UNKNOWN' in row['comment']: + result = ':shrug: UNKNOWN' else: - for j, value in enumerate(row.split('|')): - if j > 0 and j < len(keys) - 1: - values[keys[j]] = value.strip() - rows.append(values) - - # add date, status, url to status_table if - for row in rows: - if row['job status'] == 'finished': - date = row['date'] - status = row['job status'] - url = comment['html_url'] - if 'FAILURE' in row['comment']: - result = ':cry: FAILURE' - elif 'SUCCESS' in row['comment']: - result = ':grin: SUCCESS' - elif 'UNKNOWN' in row['comment']: - result = ':shrug: UNKNOWN' - else: - result = row['comment'] - elif row['job status'] in ['submitted', 'received', 'running']: - # Make sure that if the job is not finished yet, we also put something useful in these fields - # It is useful to know a job is submitted, running, etc - date = row['date'] - status = row['job status'] - url = comment['html_url'] result = row['comment'] - else: - # Don't do anything for the test line for now - we might add an extra entry to the status - # table later to reflect the test result - continue - - # Add all entries to status_table. We do this at the end of this loop so that the operation is - # more or less 'atomic', i.e. all vectors in the status_table dict have the same length - status_table['for repo'].append(repo_id) - status_table['on arch'].append(on_arch) - status_table['for arch'].append(for_arch) - status_table['date'].append(date) - status_table['status'].append(status) - status_table['url'].append(url) - status_table['result'].append(result) - - if len(comments) != 100: - break + elif row['job status'] in ['submitted', 'received', 'running']: + # Make sure that if the job is not finished yet, we also put something useful in these fields + # It is useful to know a job is submitted, running, etc + date = row['date'] + status = row['job status'] + url = comment['html_url'] + result = row['comment'] + else: + # Don't do anything for the test line for now - we might add an extra entry to the status + # table later to reflect the test result + continue + + # Add all entries to status_table. We do this at the end of this loop so that the operation is + # more or less 'atomic', i.e. all vectors in the status_table dict have the same length + status_table['for repo'].append(repo_id) + status_table['on arch'].append(on_arch) + status_table['for arch'].append(for_arch) + status_table['date'].append(date) + status_table['status'].append(status) + status_table['url'].append(url) + status_table['result'].append(result) + return status_table From 5c22855a6ffa1736c9fe22552e57b49b03d72754 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 2 Dec 2025 22:06:38 +0100 Subject: [PATCH 2/6] fix spelling typo --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index f0c8aa19..4bdbbda0 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1215,7 +1215,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): try: while url: response = requests.get(url, params={'per_page': 100}) - respone.raise_for_status() + response.raise_for_status() all_comments.extend(response.json()) # get next URL from Link header in response (we are done if that is empty) From 4947dce2bc49342ec86f01956bf38e5225075919 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 22 Jan 2026 07:14:47 +0100 Subject: [PATCH 3/6] use configurable GH API timeout + token for higher rate limits --- README.md | 6 ++++++ app.cfg.example | 3 +++ eessi_bot_event_handler.py | 1 + eessi_bot_job_manager.py | 1 + tasks/build.py | 15 ++++++++++++++- tools/config.py | 1 + 6 files changed, 26 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8aa1b931..60a153df 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,12 @@ The example file (`app.cfg.example`) includes notes on what you have to adjust t The section `[github]` contains information for connecting to GitHub: +```ini +api_timeout = 10 +``` + +Time limit for requests to GitHub's REST API. + ```ini app_id = 123456 ``` diff --git a/app.cfg.example b/app.cfg.example index 0b393a4c..cac62451 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -18,6 +18,9 @@ # Also see documentation at https://github.com/EESSI/eessi-bot-software-layer/blob/main/README.md#step5.5 [github] +# API timeout, time limit for requests to GitHub's REST API +api_timeout = 10 + # replace '123456' with the ID of your GitHub App; see https://github.com/settings/apps app_id = 123456 diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 04529a7e..4a761b77 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -95,6 +95,7 @@ config.SECTION_EVENT_HANDLER: [ config.EVENT_HANDLER_SETTING_LOG_PATH], # required config.SECTION_GITHUB: [ + config.GITHUB_SETTING_API_TIMEOUT, # required config.GITHUB_SETTING_APP_ID, # required config.GITHUB_SETTING_APP_NAME, # required config.GITHUB_SETTING_INSTALLATION_ID, # required diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index fd67b913..85fba369 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -57,6 +57,7 @@ config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required config.SECTION_GITHUB: [ + # config.GITHUB_SETTING_API_TIMEOUT, # unused config.GITHUB_SETTING_APP_ID, # required # config.GITHUB_SETTING_APP_NAME, # unused config.GITHUB_SETTING_INSTALLATION_ID, # required diff --git a/tasks/build.py b/tasks/build.py index 4bdbbda0..742cc384 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -33,6 +33,7 @@ from pyghee.utils import error, log # Local application imports (anything from EESSI/eessi-bot-software-layer) +from connections import github from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd import tools.filter as tools_filter from tools.pr_comments import ChatLevels, create_comment @@ -1204,6 +1205,8 @@ def request_bot_build_issue_comments(repo_name, pr_number): status_table = {'on arch': [], 'for arch': [], 'for repo': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() + github_section = cfg.get(config.SECTION_GITHUB) + api_timeout = cfg.get(config.GITHUB_SETTING_API_TIMEOUT, 10) # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page @@ -1212,9 +1215,19 @@ def request_bot_build_issue_comments(repo_name, pr_number): url = f'https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments' all_comments = [] + # call get_instance() to obtain a (new) token (accessible via token()) + # get_instance ensures that the token is renewed if the current one is no + # longer valid or valid for less than 30 minutes + _ = github.get_instance() try: while url: - response = requests.get(url, params={'per_page': 100}) + headers = { + 'Authorization': f'Bearer {token()}', + 'Accept': 'application/vnd.github+json', + 'X-GitHub-Api-Version': '2022-11-28' + } + + response = requests.get(url, params={'per_page': 100}, timeout=api_timeout) response.raise_for_status() all_comments.extend(response.json()) diff --git a/tools/config.py b/tools/config.py index 7f814ea4..27066820 100644 --- a/tools/config.py +++ b/tools/config.py @@ -95,6 +95,7 @@ FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT = 'job_test_unknown_fmt' SECTION_GITHUB = 'github' +GITHUB_SETTING_API_TIMEOUT = 'api_timeout' GITHUB_SETTING_APP_ID = 'app_id' GITHUB_SETTING_APP_NAME = 'app_name' GITHUB_SETTING_INSTALLATION_ID = 'installation_id' From a56a9ffa3a75811ae7d469de58b7743754c3b211 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 22 Jan 2026 07:38:18 +0100 Subject: [PATCH 4/6] fix hound issue --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index 742cc384..f6dd4f23 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1222,7 +1222,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): try: while url: headers = { - 'Authorization': f'Bearer {token()}', + 'Authorization': f'Bearer {github.token()}', 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } From cec0e684a994be58a405432270ad977b1e374c43 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sat, 24 Jan 2026 20:20:41 +0100 Subject: [PATCH 5/6] fix access to token, add headers and log rate limit information --- tasks/build.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index f6dd4f23..fef9f126 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -28,6 +28,7 @@ import shutil import string import sys +import time # Third party imports (anything installed into the local Python environment) from pyghee.utils import error, log @@ -1205,8 +1206,9 @@ def request_bot_build_issue_comments(repo_name, pr_number): status_table = {'on arch': [], 'for arch': [], 'for repo': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() - github_section = cfg.get(config.SECTION_GITHUB) - api_timeout = cfg.get(config.GITHUB_SETTING_API_TIMEOUT, 10) + github_section = cfg[config.SECTION_GITHUB] + api_timeout = int(github_section.get(config.GITHUB_SETTING_API_TIMEOUT, 10)) + # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page @@ -1215,24 +1217,33 @@ def request_bot_build_issue_comments(repo_name, pr_number): url = f'https://api.github.com/repos/{repo_name}/issues/{pr_number}/comments' all_comments = [] - # call get_instance() to obtain a (new) token (accessible via token()) + # call get_instance() to obtain a (new) token (accessible via github.token().token) # get_instance ensures that the token is renewed if the current one is no # longer valid or valid for less than 30 minutes _ = github.get_instance() try: while url: headers = { - 'Authorization': f'Bearer {github.token()}', + 'Authorization': f'Bearer {github.token().token}', 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } - response = requests.get(url, params={'per_page': 100}, timeout=api_timeout) + response = requests.get(url, headers=headers, params={'per_page': 100}, timeout=api_timeout) response.raise_for_status() all_comments.extend(response.json()) # get next URL from Link header in response (we are done if that is empty) url = response.links.get('next', {}).get('url') + log(f"{fn}(): more comments? {url!r}") + reset_time = int(response.headers.get('X-RateLimit-Reset')) + utc_time = datetime.fromtimestamp(reset_time, tz=timezone.utc) + time_left = int(reset_time - time.time()) + log(f"{fn}(): limits with token '{github.token().token[:4]}...':\n" + f" rate limit.: {response.headers.get('X-RateLimit-Limit')}\n" + f" remaining..: {response.headers.get('X-RateLimit-Remaining')}\n" + f" reset limit: {utc_time.strftime('%b %d %I:%M:%S %p UTC %Y')} (in {time_left} seconds)\n" + ) except Exception as err: log(f"{fn}(): obtaining comments for PR {pr_number} in repo {repo_name!r} failed: {err}") From c9ff8d9bfbebdc2792f817a8b45fb882d5ba58c2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sat, 24 Jan 2026 20:22:44 +0100 Subject: [PATCH 6/6] remove extra empty line --- tasks/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index fef9f126..9115ad06 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1209,7 +1209,6 @@ def request_bot_build_issue_comments(repo_name, pr_number): github_section = cfg[config.SECTION_GITHUB] api_timeout = int(github_section.get(config.GITHUB_SETTING_API_TIMEOUT, 10)) - # for loop because github has max 100 items per request. # if the pr has more than 100 comments we need to use per_page # argument at the moment the for loop is for a max of 400 comments could bump this up