diff --git a/productmd/cli/verify.py b/productmd/cli/verify.py index 6d95da2..1bfba0e 100644 --- a/productmd/cli/verify.py +++ b/productmd/cli/verify.py @@ -120,7 +120,7 @@ def run(args: object) -> None: # Each batch computes checksums in parallel, then displays results # before the next batch starts. use_parallel = parallel_checksums > 1 - batch_size = parallel_checksums if use_parallel else total + batch_size = parallel_checksums if use_parallel else max(total, 1) if use_parallel and show_bar: sys.stdout.write(f"Verifying with {parallel_checksums} threads...\n") diff --git a/productmd/convert.py b/productmd/convert.py index a0ddfce..ecba3c2 100644 --- a/productmd/convert.py +++ b/productmd/convert.py @@ -40,11 +40,36 @@ __all__ = ( "LocationEntry", "MetadataType", + "REPO_FIELDS", "iter_all_locations", "upgrade_to_v2", "downgrade_to_v1", ) +#: Variant path fields that are YUM repository roots containing repodata/. +#: For these fields the ``repomd.xml`` file can be used as the checksum +#: target during upgrade, since it in turn references all other repodata +#: files by checksum. Used by both :mod:`convert` and :mod:`localize`. +REPO_FIELDS = frozenset({"repository", "debug_repository", "source_repository"}) + +#: Relative path from a repository root to the repomd.xml index file. +_REPOMD_RELATIVE = os.path.join("repodata", "repomd.xml") + + +def _checksum_target(entry, compose_path): + """Return the absolute file path to checksum for *entry*, or ``None``. + + Repository variant paths (``repository``, ``debug_repository``, + ``source_repository``) target ``repodata/repomd.xml`` under the + repo root. Non-repo variant paths are directory references and + have no checksum target. + """ + if entry.metadata_type == MetadataType.VARIANT_PATH: + if entry.field_name not in REPO_FIELDS: + return None + return os.path.join(compose_path, entry.path, _REPOMD_RELATIVE) + return os.path.join(compose_path, entry.path) + class MetadataType(str, Enum): """Type of metadata entry in a compose. @@ -330,7 +355,10 @@ def upgrade_to_v2( :param modules: :class:`~productmd.modules.Modules` instance to upgrade :param composeinfo: :class:`~productmd.composeinfo.ComposeInfo` instance to upgrade :param base_url: Base URL prefix for constructing remote URLs - :param compute_checksums: Compute SHA-256 checksums and sizes from local files + :param compute_checksums: Compute SHA-256 checksums and sizes from local files. + For repository variant paths (``repository``, ``debug_repository``, + ``source_repository``), the checksum is computed on the + ``repodata/repomd.xml`` file under the repository root. :param compose_path: Path to local compose root (required when *compute_checksums* is True) :param strict_checksums: Raise :class:`FileNotFoundError` instead of warning when a file cannot be found for checksum computation @@ -386,15 +414,15 @@ def upgrade_to_v2( # When checksums are not requested or parallel_checksums <= 1, # batch_size is set to total so everything runs in one pass. use_parallel = compute_checksums and compose_path is not None and parallel_checksums > 1 - batch_size = parallel_checksums if use_parallel else total + batch_size = parallel_checksums if use_parallel else max(total, 1) # Validate missing files upfront before starting any threads. # This ensures strict_checksums errors are raised early. if compute_checksums and compose_path is not None: for entry in entries: - if entry.metadata_type == MetadataType.VARIANT_PATH: + file_path = _checksum_target(entry, compose_path) + if file_path is None: continue - file_path = os.path.join(compose_path, entry.path) if not os.path.isfile(file_path): if strict_checksums: raise FileNotFoundError(f"Cannot compute checksum: file not found: {file_path}") @@ -415,9 +443,9 @@ def upgrade_to_v2( tasks_in_group = [] for offset, entry in enumerate(group): idx = group_start + offset - if entry.metadata_type == MetadataType.VARIANT_PATH: + file_path = _checksum_target(entry, compose_path) + if file_path is None: continue - file_path = os.path.join(compose_path, entry.path) if os.path.isfile(file_path): tasks_in_group.append((idx, file_path)) diff --git a/productmd/localize.py b/productmd/localize.py index 90bdee9..96cedc2 100644 --- a/productmd/localize.py +++ b/productmd/localize.py @@ -47,7 +47,7 @@ import defusedxml.ElementTree as ET from productmd.common import _get_default_headers -from productmd.convert import downgrade_to_v1, iter_all_locations +from productmd.convert import REPO_FIELDS, downgrade_to_v1, iter_all_locations __all__ = ( @@ -225,9 +225,6 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): #: Default chunk size for streaming downloads (8 KB) _CHUNK_SIZE = 8192 -#: Variant path fields that are YUM repository roots containing repodata/ -_REPO_FIELDS = frozenset({"repository", "debug_repository", "source_repository"}) - #: XML namespace used in repomd.xml _REPOMD_NS = "http://linux.duke.edu/metadata/repo" @@ -618,7 +615,7 @@ def _collect_download_tasks( # Variant paths: repository fields need repodata downloading, # all other fields are directory references (not downloadable). if entry.metadata_type == "variant_path": - if entry.field_name in _REPO_FIELDS: + if entry.field_name in REPO_FIELDS: repo_entries.append((entry.location.url, entry.location.local_path, entry.location)) continue diff --git a/tests/integration/fixtures/compose/Server/aarch64/os/repodata/repomd.xml b/tests/integration/fixtures/compose/Server/aarch64/os/repodata/repomd.xml new file mode 100644 index 0000000..69bec57 --- /dev/null +++ b/tests/integration/fixtures/compose/Server/aarch64/os/repodata/repomd.xml @@ -0,0 +1,9 @@ + + + 1738627200 + + d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592 + + 0 + + diff --git a/tests/integration/fixtures/compose/Server/x86_64/os/repodata/repomd.xml b/tests/integration/fixtures/compose/Server/x86_64/os/repodata/repomd.xml new file mode 100644 index 0000000..c5e4722 --- /dev/null +++ b/tests/integration/fixtures/compose/Server/x86_64/os/repodata/repomd.xml @@ -0,0 +1,9 @@ + + + 1738627200 + + e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + + 0 + + diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index 895cdd8..53e5b92 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -65,9 +65,11 @@ def _download_metadata(dest_dir): "Server/x86_64/iso/boot.iso", "Server/x86_64/os/GPL", "Server/x86_64/os/Packages/b/bash-5.2.26-3.fc41.x86_64.rpm", + "Server/x86_64/os/repodata/repomd.xml", "Server/aarch64/iso/boot.iso", "Server/aarch64/os/GPL", "Server/aarch64/os/Packages/b/bash-5.2.26-3.fc41.aarch64.rpm", + "Server/aarch64/os/repodata/repomd.xml", ] @@ -340,6 +342,46 @@ def test_upgrade_single_file_discovers_compose_root(self): assert img["location"]["checksum"] is not None assert img["location"]["checksum"].startswith("sha256:") + def test_upgrade_computes_repomd_checksums(self): + """Test that upgrade with --compute-checksums populates checksums for repo variant paths.""" + with tempfile.TemporaryDirectory() as tmp_dir: + compose_dir = os.path.join(tmp_dir, "compose") + _download_compose(compose_dir) + + output_dir = os.path.join(tmp_dir, "v2-repomd") + result = _run_productmd( + "upgrade", + "--output", + output_dir, + "--base-url", + f"{HTTP_BASE_URL}/", + "--compute-checksums", + compose_dir, + ) + + assert result.returncode == 0, f"stdout: {result.stdout}\nstderr: {result.stderr}" + + with open(os.path.join(output_dir, "composeinfo.json")) as f: + data = json.load(f) + + # Repository variant paths should have checksums computed + # from their repodata/repomd.xml files. + variant = data["payload"]["variants"]["Server"] + repo_paths = variant["paths"]["repository"] + for arch in repo_paths: + loc = repo_paths[arch] + assert isinstance(loc, dict), "v2.0 repo path should be a Location dict" + assert loc.get("checksum") is not None, f"repository[{arch}] should have a checksum from repomd.xml" + assert loc["checksum"].startswith("sha256:"), f"repository[{arch}] checksum should be SHA-256" + assert loc.get("size") is not None, f"repository[{arch}] should have a size from repomd.xml" + + # Non-repo variant paths (os_tree, packages) should NOT have checksums + os_tree_paths = variant["paths"]["os_tree"] + for arch in os_tree_paths: + loc = os_tree_paths[arch] + assert isinstance(loc, dict), "v2.0 os_tree path should be a Location dict" + assert loc.get("checksum") is None, f"os_tree[{arch}] should NOT have a checksum" + class TestDowngradeIntegration: """Test downgrading v2.0 metadata to v1.2 via the CLI.""" diff --git a/tests/test_convert.py b/tests/test_convert.py index d1e5167..abc2419 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -810,3 +810,324 @@ def test_downgrade_then_upgrade(self): assert e.location is not None assert e.location.url.startswith("https://new-cdn.example.com/") assert e.location.local_path == e.path + + +# --------------------------------------------------------------------------- +# Helpers: composeinfo with repository variant paths +# --------------------------------------------------------------------------- + + +def _create_composeinfo_with_repos(): + """Create ComposeInfo with repository variant paths for checksum tests.""" + ci = ComposeInfo() + ci.release.name = "Fedora" + ci.release.short = "Fedora" + ci.release.version = "41" + ci.release.type = "ga" + ci.compose.id = "Fedora-41-20260204.0" + ci.compose.type = "production" + ci.compose.date = "20260204" + ci.compose.respin = 0 + + variant = Variant(ci) + variant.id = "Server" + variant.uid = "Server" + variant.name = "Fedora Server" + variant.type = "variant" + variant.arches = set(["x86_64", "aarch64"]) + variant.paths.os_tree = { + "x86_64": "Server/x86_64/os", + "aarch64": "Server/aarch64/os", + } + variant.paths.packages = { + "x86_64": "Server/x86_64/os/Packages", + "aarch64": "Server/aarch64/os/Packages", + } + variant.paths.repository = { + "x86_64": "Server/x86_64/os", + "aarch64": "Server/aarch64/os", + } + variant.paths.source_repository = { + "x86_64": "Server/source/tree", + "aarch64": "Server/source/tree", + } + ci.variants.add(variant) + return ci + + +_SAMPLE_REPOMD_XML = b"""\ + + + + abc123 + + 12345 + + +""" + + +def _create_repomd_file(compose_dir, repo_path): + """Create a repodata/repomd.xml file under a repo path in the compose dir.""" + repodata_dir = compose_dir / repo_path / "repodata" + repodata_dir.mkdir(parents=True, exist_ok=True) + repomd_file = repodata_dir / "repomd.xml" + repomd_file.write_bytes(_SAMPLE_REPOMD_XML) + return repomd_file + + +# --------------------------------------------------------------------------- +# Tests: repomd.xml checksum computation for repository variant paths +# --------------------------------------------------------------------------- + + +class TestUpgradeRepomdChecksums: + """Tests for repomd.xml checksum computation during upgrade.""" + + def test_repo_variant_path_gets_repomd_checksum(self, tmp_path): + """Test that repository variant paths get checksum from repomd.xml.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + entries = list(iter_all_locations(composeinfo=result["composeinfo"])) + repo_entries = [e for e in entries if e.field_name == "repository"] + assert len(repo_entries) == 2 + for e in repo_entries: + assert e.location is not None + assert e.location.checksum is not None + assert e.location.checksum.startswith("sha256:") + assert e.location.size is not None + assert e.location.size == len(_SAMPLE_REPOMD_XML) + + def test_source_repo_variant_path_gets_checksum(self, tmp_path): + """Test that source_repository variant paths also get checksums.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + entries = list(iter_all_locations(composeinfo=result["composeinfo"])) + src_entries = [e for e in entries if e.field_name == "source_repository"] + assert len(src_entries) == 2 + for e in src_entries: + assert e.location is not None + assert e.location.checksum is not None + assert e.location.checksum.startswith("sha256:") + + def test_non_repo_variant_paths_have_no_checksum(self, tmp_path): + """Test that os_tree and packages variant paths still get no checksum.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + entries = list(iter_all_locations(composeinfo=result["composeinfo"])) + non_repo = [e for e in entries if e.field_name in ("os_tree", "packages")] + assert len(non_repo) > 0 + for e in non_repo: + assert e.location is not None + # Non-repo variant paths should have no computed checksum + assert e.location.checksum is None + assert e.location.size is None + + def test_missing_repomd_warns(self, tmp_path): + """Test that missing repomd.xml emits a warning.""" + compose_dir = tmp_path / "compose" + compose_dir.mkdir() + # Don't create any repomd.xml files + + ci = _create_composeinfo_with_repos() + with pytest.warns(UserWarning, match="file not found"): + upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + def test_missing_repomd_strict_raises(self, tmp_path): + """Test that missing repomd.xml with strict_checksums raises.""" + compose_dir = tmp_path / "compose" + compose_dir.mkdir() + + ci = _create_composeinfo_with_repos() + with pytest.raises(FileNotFoundError, match="repomd.xml"): + upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + strict_checksums=True, + ) + + def test_missing_repomd_no_checksum_in_location(self, tmp_path): + """Test that missing repomd.xml results in no checksum on Location.""" + compose_dir = tmp_path / "compose" + compose_dir.mkdir() + + ci = _create_composeinfo_with_repos() + with pytest.warns(UserWarning): + result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + entries = list(iter_all_locations(composeinfo=result["composeinfo"])) + repo_entries = [e for e in entries if e.field_name == "repository"] + for e in repo_entries: + assert e.location is not None + # Location is created but without checksum/size + assert e.location.checksum is None + assert e.location.size is None + + def test_shared_repo_path_both_arches_get_checksum(self, tmp_path): + """Test that shared source_repository path gives both arches a checksum.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + entries = list(iter_all_locations(composeinfo=result["composeinfo"])) + src_entries = [e for e in entries if e.field_name == "source_repository"] + # Both arches point to the same path and both get checksums + assert len(src_entries) == 2 + checksums = {e.location.checksum for e in src_entries} + assert len(checksums) == 1 # same file -> same checksum + assert all(c.startswith("sha256:") for c in checksums) + + def test_progress_callback_includes_repo_paths(self, tmp_path): + """Test that progress_callback fires for repo variant paths too.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + calls = [] + + def on_progress(processed, total, path, checksum): + calls.append((processed, total, path, checksum)) + + upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + progress_callback=on_progress, + ) + + # Total entries: 2 os_tree + 2 packages + 2 repository + 2 source_repository = 8 + assert len(calls) == 8 + # Find repo entries in the callbacks + repo_calls = [c for c in calls if c[3] is not None] + # 2 repository + 2 source_repository = 4 with checksums + assert len(repo_calls) == 4 + for _, _, path, checksum in repo_calls: + assert checksum.startswith("sha256:") + + @pytest.mark.filterwarnings("ignore::UserWarning") + def test_parallel_checksums_with_repos(self, tmp_path): + """Test that parallel checksum computation works for repo paths.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + + # Sequential + seq_result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + parallel_checksums=1, + ) + + # Parallel + par_result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + parallel_checksums=4, + ) + + seq_entries = list(iter_all_locations(composeinfo=seq_result["composeinfo"])) + par_entries = list(iter_all_locations(composeinfo=par_result["composeinfo"])) + + for se, pe in zip(seq_entries, par_entries): + assert se.location.checksum == pe.location.checksum + assert se.location.size == pe.location.size + + def test_without_compute_checksums_repos_have_no_checksum(self): + """Test that without compute_checksums, repo paths have no checksum.""" + ci = _create_composeinfo_with_repos() + result = upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + ) + + entries = list(iter_all_locations(composeinfo=result["composeinfo"])) + repo_entries = [e for e in entries if e.field_name == "repository"] + for e in repo_entries: + assert e.location is not None + assert e.location.checksum is None + assert e.location.size is None + + def test_originals_not_modified(self, tmp_path): + """Test that original composeinfo is not modified by checksum computation.""" + compose_dir = tmp_path / "compose" + _create_repomd_file(compose_dir, "Server/x86_64/os") + _create_repomd_file(compose_dir, "Server/aarch64/os") + _create_repomd_file(compose_dir, "Server/source/tree") + + ci = _create_composeinfo_with_repos() + original_entries = list(iter_all_locations(composeinfo=ci)) + + upgrade_to_v2( + composeinfo=ci, + base_url="https://cdn.example.com/", + compute_checksums=True, + compose_path=str(compose_dir), + ) + + # Original should still have no locations + for e in original_entries: + assert e.location is None