From 478ddc0a621529816a59f9c413ebc8260d287da2 Mon Sep 17 00:00:00 2001
From: jmicanek <jmicanek@redhat.com>
Date: Mon, 7 Aug 2023 10:15:48 +0200
Subject: [PATCH] Run skopeo inspect in parallel threads

Repotracker takes longs time to run when inspecting repository tags via skopeo. Since skopeo process is IO bound, we can multithread the calls to skopeo and significantly decrease the run time. Note that this is still inferior to using Quay API directly.
---
 repotracker/container.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/repotracker/container.py b/repotracker/container.py
index 88d5b2e..8baa778 100644
--- a/repotracker/container.py
+++ b/repotracker/container.py
@@ -9,6 +9,9 @@
 import requests
 from repotracker.utils import format_ts, format_time
 
+from concurrent.futures import ThreadPoolExecutor
+from itertools import repeat
+
 log = logging.getLogger(__name__)
 
 
@@ -80,11 +83,13 @@ def inspect_image_repo(repo, token=None):
     # Record info for whatever skopeo wants to tell us when we don't specify a tag.
     default = inspect_tag(repo)
     tags = default["RepoTags"]
-    for tag in tags:
-        try:
-            results[tag] = inspect_tag(repo, tag=tag)
-        except:
-            log.error("Could not query %s:%s", repo, tag, exc_info=True)
+    with ThreadPoolExecutor(max_workers=60) as executor:
+        # submit tasks and process results
+        for result in executor.map(inspect_tag, repeat(repo), tags):
+            try:
+                results[result.get("tag")] = result
+            except:
+                log.error("Could not query %s:%s", repo, result.tag, exc_info=True)
     return results
 
 
@@ -114,7 +119,10 @@ def inspect_tag(repo, tag=None):
         raise RuntimeError(
             "Error inspecting {0}:{1}: {2}".format(repo, tag, proc.stderr)
         )
-    return json.loads(proc.stdout)
+    result = json.loads(proc.stdout)
+    if tag:
+        result["tag"] = tag
+    return result
 
 
 def gen_result(repo, tag, tagdata):