diff --git a/productmd/cli/verify.py b/productmd/cli/verify.py
index 6d95da2..1bfba0e 100644
--- a/productmd/cli/verify.py
+++ b/productmd/cli/verify.py
@@ -120,7 +120,7 @@ def run(args: object) -> None:
# Each batch computes checksums in parallel, then displays results
# before the next batch starts.
use_parallel = parallel_checksums > 1
- batch_size = parallel_checksums if use_parallel else total
+ batch_size = parallel_checksums if use_parallel else max(total, 1)
if use_parallel and show_bar:
sys.stdout.write(f"Verifying with {parallel_checksums} threads...\n")
diff --git a/productmd/convert.py b/productmd/convert.py
index a0ddfce..ecba3c2 100644
--- a/productmd/convert.py
+++ b/productmd/convert.py
@@ -40,11 +40,36 @@
__all__ = (
"LocationEntry",
"MetadataType",
+ "REPO_FIELDS",
"iter_all_locations",
"upgrade_to_v2",
"downgrade_to_v1",
)
+#: Variant path fields that are YUM repository roots containing repodata/.
+#: For these fields the ``repomd.xml`` file can be used as the checksum
+#: target during upgrade, since it in turn references all other repodata
+#: files by checksum. Used by both :mod:`convert` and :mod:`localize`.
+REPO_FIELDS = frozenset({"repository", "debug_repository", "source_repository"})
+
+#: Relative path from a repository root to the repomd.xml index file.
+_REPOMD_RELATIVE = os.path.join("repodata", "repomd.xml")
+
+
+def _checksum_target(entry, compose_path):
+ """Return the absolute file path to checksum for *entry*, or ``None``.
+
+ Repository variant paths (``repository``, ``debug_repository``,
+ ``source_repository``) target ``repodata/repomd.xml`` under the
+ repo root. Non-repo variant paths are directory references and
+ have no checksum target.
+ """
+ if entry.metadata_type == MetadataType.VARIANT_PATH:
+ if entry.field_name not in REPO_FIELDS:
+ return None
+ return os.path.join(compose_path, entry.path, _REPOMD_RELATIVE)
+ return os.path.join(compose_path, entry.path)
+
class MetadataType(str, Enum):
"""Type of metadata entry in a compose.
@@ -330,7 +355,10 @@ def upgrade_to_v2(
:param modules: :class:`~productmd.modules.Modules` instance to upgrade
:param composeinfo: :class:`~productmd.composeinfo.ComposeInfo` instance to upgrade
:param base_url: Base URL prefix for constructing remote URLs
- :param compute_checksums: Compute SHA-256 checksums and sizes from local files
+ :param compute_checksums: Compute SHA-256 checksums and sizes from local files.
+ For repository variant paths (``repository``, ``debug_repository``,
+ ``source_repository``), the checksum is computed on the
+ ``repodata/repomd.xml`` file under the repository root.
:param compose_path: Path to local compose root (required when *compute_checksums* is True)
:param strict_checksums: Raise :class:`FileNotFoundError` instead of warning
when a file cannot be found for checksum computation
@@ -386,15 +414,15 @@ def upgrade_to_v2(
# When checksums are not requested or parallel_checksums <= 1,
# batch_size is set to total so everything runs in one pass.
use_parallel = compute_checksums and compose_path is not None and parallel_checksums > 1
- batch_size = parallel_checksums if use_parallel else total
+ batch_size = parallel_checksums if use_parallel else max(total, 1)
# Validate missing files upfront before starting any threads.
# This ensures strict_checksums errors are raised early.
if compute_checksums and compose_path is not None:
for entry in entries:
- if entry.metadata_type == MetadataType.VARIANT_PATH:
+ file_path = _checksum_target(entry, compose_path)
+ if file_path is None:
continue
- file_path = os.path.join(compose_path, entry.path)
if not os.path.isfile(file_path):
if strict_checksums:
raise FileNotFoundError(f"Cannot compute checksum: file not found: {file_path}")
@@ -415,9 +443,9 @@ def upgrade_to_v2(
tasks_in_group = []
for offset, entry in enumerate(group):
idx = group_start + offset
- if entry.metadata_type == MetadataType.VARIANT_PATH:
+ file_path = _checksum_target(entry, compose_path)
+ if file_path is None:
continue
- file_path = os.path.join(compose_path, entry.path)
if os.path.isfile(file_path):
tasks_in_group.append((idx, file_path))
diff --git a/productmd/localize.py b/productmd/localize.py
index 90bdee9..96cedc2 100644
--- a/productmd/localize.py
+++ b/productmd/localize.py
@@ -47,7 +47,7 @@
import defusedxml.ElementTree as ET
from productmd.common import _get_default_headers
-from productmd.convert import downgrade_to_v1, iter_all_locations
+from productmd.convert import REPO_FIELDS, downgrade_to_v1, iter_all_locations
__all__ = (
@@ -225,9 +225,6 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
#: Default chunk size for streaming downloads (8 KB)
_CHUNK_SIZE = 8192
-#: Variant path fields that are YUM repository roots containing repodata/
-_REPO_FIELDS = frozenset({"repository", "debug_repository", "source_repository"})
-
#: XML namespace used in repomd.xml
_REPOMD_NS = "http://linux.duke.edu/metadata/repo"
@@ -618,7 +615,7 @@ def _collect_download_tasks(
# Variant paths: repository fields need repodata downloading,
# all other fields are directory references (not downloadable).
if entry.metadata_type == "variant_path":
- if entry.field_name in _REPO_FIELDS:
+ if entry.field_name in REPO_FIELDS:
repo_entries.append((entry.location.url, entry.location.local_path, entry.location))
continue
diff --git a/tests/integration/fixtures/compose/Server/aarch64/os/repodata/repomd.xml b/tests/integration/fixtures/compose/Server/aarch64/os/repodata/repomd.xml
new file mode 100644
index 0000000..69bec57
--- /dev/null
+++ b/tests/integration/fixtures/compose/Server/aarch64/os/repodata/repomd.xml
@@ -0,0 +1,9 @@
+
+
+ 1738627200
+
+ d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
+
+ 0
+
+
diff --git a/tests/integration/fixtures/compose/Server/x86_64/os/repodata/repomd.xml b/tests/integration/fixtures/compose/Server/x86_64/os/repodata/repomd.xml
new file mode 100644
index 0000000..c5e4722
--- /dev/null
+++ b/tests/integration/fixtures/compose/Server/x86_64/os/repodata/repomd.xml
@@ -0,0 +1,9 @@
+
+
+ 1738627200
+
+ e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+
+ 0
+
+
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py
index 895cdd8..53e5b92 100644
--- a/tests/integration/test_integration.py
+++ b/tests/integration/test_integration.py
@@ -65,9 +65,11 @@ def _download_metadata(dest_dir):
"Server/x86_64/iso/boot.iso",
"Server/x86_64/os/GPL",
"Server/x86_64/os/Packages/b/bash-5.2.26-3.fc41.x86_64.rpm",
+ "Server/x86_64/os/repodata/repomd.xml",
"Server/aarch64/iso/boot.iso",
"Server/aarch64/os/GPL",
"Server/aarch64/os/Packages/b/bash-5.2.26-3.fc41.aarch64.rpm",
+ "Server/aarch64/os/repodata/repomd.xml",
]
@@ -340,6 +342,46 @@ def test_upgrade_single_file_discovers_compose_root(self):
assert img["location"]["checksum"] is not None
assert img["location"]["checksum"].startswith("sha256:")
+ def test_upgrade_computes_repomd_checksums(self):
+ """Test that upgrade with --compute-checksums populates checksums for repo variant paths."""
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ compose_dir = os.path.join(tmp_dir, "compose")
+ _download_compose(compose_dir)
+
+ output_dir = os.path.join(tmp_dir, "v2-repomd")
+ result = _run_productmd(
+ "upgrade",
+ "--output",
+ output_dir,
+ "--base-url",
+ f"{HTTP_BASE_URL}/",
+ "--compute-checksums",
+ compose_dir,
+ )
+
+ assert result.returncode == 0, f"stdout: {result.stdout}\nstderr: {result.stderr}"
+
+ with open(os.path.join(output_dir, "composeinfo.json")) as f:
+ data = json.load(f)
+
+ # Repository variant paths should have checksums computed
+ # from their repodata/repomd.xml files.
+ variant = data["payload"]["variants"]["Server"]
+ repo_paths = variant["paths"]["repository"]
+ for arch in repo_paths:
+ loc = repo_paths[arch]
+ assert isinstance(loc, dict), "v2.0 repo path should be a Location dict"
+ assert loc.get("checksum") is not None, f"repository[{arch}] should have a checksum from repomd.xml"
+ assert loc["checksum"].startswith("sha256:"), f"repository[{arch}] checksum should be SHA-256"
+ assert loc.get("size") is not None, f"repository[{arch}] should have a size from repomd.xml"
+
+ # Non-repo variant paths (os_tree, packages) should NOT have checksums
+ os_tree_paths = variant["paths"]["os_tree"]
+ for arch in os_tree_paths:
+ loc = os_tree_paths[arch]
+ assert isinstance(loc, dict), "v2.0 os_tree path should be a Location dict"
+ assert loc.get("checksum") is None, f"os_tree[{arch}] should NOT have a checksum"
+
class TestDowngradeIntegration:
"""Test downgrading v2.0 metadata to v1.2 via the CLI."""
diff --git a/tests/test_convert.py b/tests/test_convert.py
index d1e5167..abc2419 100644
--- a/tests/test_convert.py
+++ b/tests/test_convert.py
@@ -810,3 +810,324 @@ def test_downgrade_then_upgrade(self):
assert e.location is not None
assert e.location.url.startswith("https://new-cdn.example.com/")
assert e.location.local_path == e.path
+
+
+# ---------------------------------------------------------------------------
+# Helpers: composeinfo with repository variant paths
+# ---------------------------------------------------------------------------
+
+
+def _create_composeinfo_with_repos():
+ """Create ComposeInfo with repository variant paths for checksum tests."""
+ ci = ComposeInfo()
+ ci.release.name = "Fedora"
+ ci.release.short = "Fedora"
+ ci.release.version = "41"
+ ci.release.type = "ga"
+ ci.compose.id = "Fedora-41-20260204.0"
+ ci.compose.type = "production"
+ ci.compose.date = "20260204"
+ ci.compose.respin = 0
+
+ variant = Variant(ci)
+ variant.id = "Server"
+ variant.uid = "Server"
+ variant.name = "Fedora Server"
+ variant.type = "variant"
+ variant.arches = set(["x86_64", "aarch64"])
+ variant.paths.os_tree = {
+ "x86_64": "Server/x86_64/os",
+ "aarch64": "Server/aarch64/os",
+ }
+ variant.paths.packages = {
+ "x86_64": "Server/x86_64/os/Packages",
+ "aarch64": "Server/aarch64/os/Packages",
+ }
+ variant.paths.repository = {
+ "x86_64": "Server/x86_64/os",
+ "aarch64": "Server/aarch64/os",
+ }
+ variant.paths.source_repository = {
+ "x86_64": "Server/source/tree",
+ "aarch64": "Server/source/tree",
+ }
+ ci.variants.add(variant)
+ return ci
+
+
+_SAMPLE_REPOMD_XML = b"""\
+
+
+
+ abc123
+
+ 12345
+
+
+"""
+
+
+def _create_repomd_file(compose_dir, repo_path):
+ """Create a repodata/repomd.xml file under a repo path in the compose dir."""
+ repodata_dir = compose_dir / repo_path / "repodata"
+ repodata_dir.mkdir(parents=True, exist_ok=True)
+ repomd_file = repodata_dir / "repomd.xml"
+ repomd_file.write_bytes(_SAMPLE_REPOMD_XML)
+ return repomd_file
+
+
+# ---------------------------------------------------------------------------
+# Tests: repomd.xml checksum computation for repository variant paths
+# ---------------------------------------------------------------------------
+
+
+class TestUpgradeRepomdChecksums:
+ """Tests for repomd.xml checksum computation during upgrade."""
+
+ def test_repo_variant_path_gets_repomd_checksum(self, tmp_path):
+ """Test that repository variant paths get checksum from repomd.xml."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+ result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ entries = list(iter_all_locations(composeinfo=result["composeinfo"]))
+ repo_entries = [e for e in entries if e.field_name == "repository"]
+ assert len(repo_entries) == 2
+ for e in repo_entries:
+ assert e.location is not None
+ assert e.location.checksum is not None
+ assert e.location.checksum.startswith("sha256:")
+ assert e.location.size is not None
+ assert e.location.size == len(_SAMPLE_REPOMD_XML)
+
+ def test_source_repo_variant_path_gets_checksum(self, tmp_path):
+ """Test that source_repository variant paths also get checksums."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+ result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ entries = list(iter_all_locations(composeinfo=result["composeinfo"]))
+ src_entries = [e for e in entries if e.field_name == "source_repository"]
+ assert len(src_entries) == 2
+ for e in src_entries:
+ assert e.location is not None
+ assert e.location.checksum is not None
+ assert e.location.checksum.startswith("sha256:")
+
+ def test_non_repo_variant_paths_have_no_checksum(self, tmp_path):
+ """Test that os_tree and packages variant paths still get no checksum."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+ result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ entries = list(iter_all_locations(composeinfo=result["composeinfo"]))
+ non_repo = [e for e in entries if e.field_name in ("os_tree", "packages")]
+ assert len(non_repo) > 0
+ for e in non_repo:
+ assert e.location is not None
+ # Non-repo variant paths should have no computed checksum
+ assert e.location.checksum is None
+ assert e.location.size is None
+
+ def test_missing_repomd_warns(self, tmp_path):
+ """Test that missing repomd.xml emits a warning."""
+ compose_dir = tmp_path / "compose"
+ compose_dir.mkdir()
+ # Don't create any repomd.xml files
+
+ ci = _create_composeinfo_with_repos()
+ with pytest.warns(UserWarning, match="file not found"):
+ upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ def test_missing_repomd_strict_raises(self, tmp_path):
+ """Test that missing repomd.xml with strict_checksums raises."""
+ compose_dir = tmp_path / "compose"
+ compose_dir.mkdir()
+
+ ci = _create_composeinfo_with_repos()
+ with pytest.raises(FileNotFoundError, match="repomd.xml"):
+ upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ strict_checksums=True,
+ )
+
+ def test_missing_repomd_no_checksum_in_location(self, tmp_path):
+ """Test that missing repomd.xml results in no checksum on Location."""
+ compose_dir = tmp_path / "compose"
+ compose_dir.mkdir()
+
+ ci = _create_composeinfo_with_repos()
+ with pytest.warns(UserWarning):
+ result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ entries = list(iter_all_locations(composeinfo=result["composeinfo"]))
+ repo_entries = [e for e in entries if e.field_name == "repository"]
+ for e in repo_entries:
+ assert e.location is not None
+ # Location is created but without checksum/size
+ assert e.location.checksum is None
+ assert e.location.size is None
+
+ def test_shared_repo_path_both_arches_get_checksum(self, tmp_path):
+ """Test that shared source_repository path gives both arches a checksum."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+ result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ entries = list(iter_all_locations(composeinfo=result["composeinfo"]))
+ src_entries = [e for e in entries if e.field_name == "source_repository"]
+ # Both arches point to the same path and both get checksums
+ assert len(src_entries) == 2
+ checksums = {e.location.checksum for e in src_entries}
+ assert len(checksums) == 1 # same file -> same checksum
+ assert all(c.startswith("sha256:") for c in checksums)
+
+ def test_progress_callback_includes_repo_paths(self, tmp_path):
+ """Test that progress_callback fires for repo variant paths too."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+ calls = []
+
+ def on_progress(processed, total, path, checksum):
+ calls.append((processed, total, path, checksum))
+
+ upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ progress_callback=on_progress,
+ )
+
+ # Total entries: 2 os_tree + 2 packages + 2 repository + 2 source_repository = 8
+ assert len(calls) == 8
+ # Find repo entries in the callbacks
+ repo_calls = [c for c in calls if c[3] is not None]
+ # 2 repository + 2 source_repository = 4 with checksums
+ assert len(repo_calls) == 4
+ for _, _, path, checksum in repo_calls:
+ assert checksum.startswith("sha256:")
+
+ @pytest.mark.filterwarnings("ignore::UserWarning")
+ def test_parallel_checksums_with_repos(self, tmp_path):
+ """Test that parallel checksum computation works for repo paths."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+
+ # Sequential
+ seq_result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ parallel_checksums=1,
+ )
+
+ # Parallel
+ par_result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ parallel_checksums=4,
+ )
+
+ seq_entries = list(iter_all_locations(composeinfo=seq_result["composeinfo"]))
+ par_entries = list(iter_all_locations(composeinfo=par_result["composeinfo"]))
+
+ for se, pe in zip(seq_entries, par_entries):
+ assert se.location.checksum == pe.location.checksum
+ assert se.location.size == pe.location.size
+
+ def test_without_compute_checksums_repos_have_no_checksum(self):
+ """Test that without compute_checksums, repo paths have no checksum."""
+ ci = _create_composeinfo_with_repos()
+ result = upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ )
+
+ entries = list(iter_all_locations(composeinfo=result["composeinfo"]))
+ repo_entries = [e for e in entries if e.field_name == "repository"]
+ for e in repo_entries:
+ assert e.location is not None
+ assert e.location.checksum is None
+ assert e.location.size is None
+
+ def test_originals_not_modified(self, tmp_path):
+ """Test that original composeinfo is not modified by checksum computation."""
+ compose_dir = tmp_path / "compose"
+ _create_repomd_file(compose_dir, "Server/x86_64/os")
+ _create_repomd_file(compose_dir, "Server/aarch64/os")
+ _create_repomd_file(compose_dir, "Server/source/tree")
+
+ ci = _create_composeinfo_with_repos()
+ original_entries = list(iter_all_locations(composeinfo=ci))
+
+ upgrade_to_v2(
+ composeinfo=ci,
+ base_url="https://cdn.example.com/",
+ compute_checksums=True,
+ compose_path=str(compose_dir),
+ )
+
+ # Original should still have no locations
+ for e in original_entries:
+ assert e.location is None