Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 30 additions & 12 deletions src/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import math
import os
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout
import time
from datetime import datetime, timezone
from threading import Thread

Expand Down Expand Up @@ -193,31 +193,50 @@ def update_fb():
helpers.write_json_files(file_path=file_path, data=data)


def _get_stats_with_timeout(repo, timeout=60):
def _get_stats_with_timeout(repo, headers: dict, timeout=60, retry_after=2):
"""
Fetch commit activity for a repo, capping total wait time.

Parameters
----------
repo :
PyGithub Repository object.
headers : dict
HTTP headers including the GitHub authorisation token.
timeout : int
Maximum seconds to wait before giving up (GitHub may return 202 while
computing stats, causing PyGithub to retry indefinitely without this guard).
computing stats).
retry_after : int
Seconds to wait between 202 responses.

Returns
-------
list or None
Weekly commit-activity objects, or None on timeout.
Weekly commit-activity data, or None on timeout.
"""
with ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(repo.get_stats_commit_activity)
try:
return future.result(timeout=timeout)
except FuturesTimeout:
url = f'https://api.github.com/repos/{repo.owner.login}/{repo.name}/stats/commit_activity'
deadline = time.monotonic() + timeout

while True:
remaining = deadline - time.monotonic()
if remaining <= 0:
log.warning(f'Timeout fetching commit activity for {repo.name}, skipping.')
return None

response = helpers.s.get(
url=url,
headers=headers,
timeout=min(helpers.DEFAULT_TIMEOUT, remaining),
)
if response.status_code == 202:
time.sleep(min(retry_after, max(0, deadline - time.monotonic())))
continue
if response.status_code == 204:
return None

response.raise_for_status()
return response.json()


def _seed_star_history(repo, total: int, initial_samples: int) -> list[dict]:
"""
Expand Down Expand Up @@ -447,11 +466,10 @@ def _process_github_repo(repo, headers: dict, graphql_url: str) -> None:
helpers.write_json_files(file_path=file_path, data=languages)

# commit activity (last year, weekly buckets)
commit_activity = _get_stats_with_timeout(repo)
commit_activity = _get_stats_with_timeout(repo, headers)
if commit_activity:
commits = [week.raw_data for week in commit_activity]
file_path = os.path.join(BASE_DIR, 'github', 'commitActivity', repo.name)
helpers.write_json_files(file_path=file_path, data=commits)
helpers.write_json_files(file_path=file_path, data=commit_activity)

# open pull requests
pulls_data = []
Expand Down
55 changes: 28 additions & 27 deletions tests/unit/test_updater.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# standard imports
import json
from concurrent.futures import TimeoutError as FuturesTimeout
from datetime import datetime, timezone
from types import SimpleNamespace

Expand All @@ -25,6 +24,10 @@ def json(self):
raise self._raises
return self._payload

def raise_for_status(self):
if self.status_code >= 400:
raise requests.exceptions.HTTPError(self.text)


class FakeWeek:
def __init__(self, week, total):
Expand Down Expand Up @@ -229,38 +232,36 @@ def fake_get(url):
assert 'paging' not in writes[0][1]


def test_get_stats_with_timeout_success_and_timeout(monkeypatch):
class FutureOk:
def result(self, timeout):
return [1]

class FutureTimeout:
def result(self, timeout):
raise FuturesTimeout()

class Pool:
def __init__(self, future):
self.future = future

def __enter__(self):
return self
def test_get_stats_with_timeout_success_retry_and_timeout(monkeypatch):
repo = SimpleNamespace(name='x', owner=SimpleNamespace(login='owner'))
headers = {'Authorization': 'token'}
calls = []

def __exit__(self, *args):
return False
def fake_get(url, headers, timeout):
calls.append((url, headers, timeout))
if len(calls) == 1:
return FakeResponse(status=202)
return FakeResponse([{'week': 1, 'total': 2}])

def submit(self, func):
return self.future
sleeps = []
monkeypatch.setattr(updater.helpers.s, 'get', fake_get)
monkeypatch.setattr(updater.time, 'sleep', lambda seconds: sleeps.append(seconds))
monkeypatch.setattr(updater.time, 'monotonic', lambda: 0)

monkeypatch.setattr(updater, 'ThreadPoolExecutor', lambda max_workers: Pool(FutureOk()))
repo = SimpleNamespace(name='x', get_stats_commit_activity=lambda: [1])
assert updater._get_stats_with_timeout(repo) == [1]
assert updater._get_stats_with_timeout(repo, headers) == [{'week': 1, 'total': 2}]
assert calls[0][0] == 'https://api.github.com/repos/owner/x/stats/commit_activity'
assert calls[0][1] == headers
assert calls[0][2] == updater.helpers.DEFAULT_TIMEOUT
assert sleeps == [2]

warnings = []
monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg))
monkeypatch.setattr(updater, 'ThreadPoolExecutor', lambda max_workers: Pool(FutureTimeout()))
assert updater._get_stats_with_timeout(repo) is None
assert updater._get_stats_with_timeout(repo, headers, timeout=0) is None
assert warnings

monkeypatch.setattr(updater.helpers.s, 'get', lambda url, headers, timeout: FakeResponse(status=204))
assert updater._get_stats_with_timeout(repo, headers) is None


def test_seed_star_history(monkeypatch):
repo = FakeRepo(stars=250)
Expand Down Expand Up @@ -331,7 +332,7 @@ def test_process_github_repo(monkeypatch, tmp_path):
'save_image_from_url',
lambda **kwargs: writes.append(('img', kwargs['file_path']))
)
monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo: [FakeWeek(1, 1)])
monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo, headers: [{'week': 1, 'total': 1}])
monkeypatch.setattr(updater, '_collect_star_history', lambda repo: [{'date': '2026-01-01', 'stars': 1}])
monkeypatch.setattr(updater, '_fetch_code_scanning_alerts', lambda repo: [])
monkeypatch.setattr(
Expand All @@ -357,7 +358,7 @@ def post_ok(url, json, headers):
def test_process_github_repo_error_and_avatar_skip(monkeypatch, tmp_path):
monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages'))
monkeypatch.setattr(updater.helpers, 'write_json_files', lambda **kwargs: None)
monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo: None)
monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo, headers: None)
monkeypatch.setattr(updater, '_collect_star_history', lambda repo: [])
monkeypatch.setattr(updater, '_fetch_code_scanning_alerts', lambda repo: [])
monkeypatch.setattr(updater, '_build_code_scanning_history', lambda alerts: [])
Expand Down
Loading