Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion productmd/cli/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def run(args: object) -> None:
# Each batch computes checksums in parallel, then displays results
# before the next batch starts.
use_parallel = parallel_checksums > 1
batch_size = parallel_checksums if use_parallel else total
batch_size = parallel_checksums if use_parallel else max(total, 1)

if use_parallel and show_bar:
sys.stdout.write(f"Verifying with {parallel_checksums} threads...\n")
Expand Down
40 changes: 34 additions & 6 deletions productmd/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,36 @@
__all__ = (
"LocationEntry",
"MetadataType",
"REPO_FIELDS",
"iter_all_locations",
"upgrade_to_v2",
"downgrade_to_v1",
)

#: Variant path fields that are YUM repository roots containing repodata/.
#: For these fields the ``repomd.xml`` file can be used as the checksum
#: target during upgrade, since it in turn references all other repodata
#: files by checksum. Used by both :mod:`convert` and :mod:`localize`.
REPO_FIELDS = frozenset({"repository", "debug_repository", "source_repository"})

#: Relative path from a repository root to the repomd.xml index file.
_REPOMD_RELATIVE = os.path.join("repodata", "repomd.xml")


def _checksum_target(entry, compose_path):
"""Return the absolute file path to checksum for *entry*, or ``None``.

Repository variant paths (``repository``, ``debug_repository``,
``source_repository``) target ``repodata/repomd.xml`` under the
repo root. Non-repo variant paths are directory references and
have no checksum target.
"""
if entry.metadata_type == MetadataType.VARIANT_PATH:
if entry.field_name not in REPO_FIELDS:
return None
return os.path.join(compose_path, entry.path, _REPOMD_RELATIVE)
return os.path.join(compose_path, entry.path)


class MetadataType(str, Enum):
"""Type of metadata entry in a compose.
Expand Down Expand Up @@ -330,7 +355,10 @@ def upgrade_to_v2(
:param modules: :class:`~productmd.modules.Modules` instance to upgrade
:param composeinfo: :class:`~productmd.composeinfo.ComposeInfo` instance to upgrade
:param base_url: Base URL prefix for constructing remote URLs
:param compute_checksums: Compute SHA-256 checksums and sizes from local files
:param compute_checksums: Compute SHA-256 checksums and sizes from local files.
For repository variant paths (``repository``, ``debug_repository``,
``source_repository``), the checksum is computed on the
``repodata/repomd.xml`` file under the repository root.
:param compose_path: Path to local compose root (required when *compute_checksums* is True)
:param strict_checksums: Raise :class:`FileNotFoundError` instead of warning
when a file cannot be found for checksum computation
Expand Down Expand Up @@ -386,15 +414,15 @@ def upgrade_to_v2(
# When checksums are not requested or parallel_checksums <= 1,
# batch_size is set to total so everything runs in one pass.
use_parallel = compute_checksums and compose_path is not None and parallel_checksums > 1
batch_size = parallel_checksums if use_parallel else total
batch_size = parallel_checksums if use_parallel else max(total, 1)

# Validate missing files upfront before starting any threads.
# This ensures strict_checksums errors are raised early.
if compute_checksums and compose_path is not None:
for entry in entries:
if entry.metadata_type == MetadataType.VARIANT_PATH:
file_path = _checksum_target(entry, compose_path)
if file_path is None:
continue
file_path = os.path.join(compose_path, entry.path)
if not os.path.isfile(file_path):
if strict_checksums:
raise FileNotFoundError(f"Cannot compute checksum: file not found: {file_path}")
Expand All @@ -415,9 +443,9 @@ def upgrade_to_v2(
tasks_in_group = []
for offset, entry in enumerate(group):
idx = group_start + offset
if entry.metadata_type == MetadataType.VARIANT_PATH:
file_path = _checksum_target(entry, compose_path)
if file_path is None:
continue
file_path = os.path.join(compose_path, entry.path)
if os.path.isfile(file_path):
tasks_in_group.append((idx, file_path))

Expand Down
7 changes: 2 additions & 5 deletions productmd/localize.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import defusedxml.ElementTree as ET

from productmd.common import _get_default_headers
from productmd.convert import downgrade_to_v1, iter_all_locations
from productmd.convert import REPO_FIELDS, downgrade_to_v1, iter_all_locations


__all__ = (
Expand Down Expand Up @@ -225,9 +225,6 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
#: Default chunk size for streaming downloads (8 KB)
_CHUNK_SIZE = 8192

#: Variant path fields that are YUM repository roots containing repodata/
_REPO_FIELDS = frozenset({"repository", "debug_repository", "source_repository"})

#: XML namespace used in repomd.xml
_REPOMD_NS = "http://linux.duke.edu/metadata/repo"

Expand Down Expand Up @@ -618,7 +615,7 @@ def _collect_download_tasks(
# Variant paths: repository fields need repodata downloading,
# all other fields are directory references (not downloadable).
if entry.metadata_type == "variant_path":
if entry.field_name in _REPO_FIELDS:
if entry.field_name in REPO_FIELDS:
repo_entries.append((entry.location.url, entry.location.local_path, entry.location))
continue

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<repomd xmlns="http://linux.duke.edu/metadata/repo">
<revision>1738627200</revision>
<data type="primary">
<checksum type="sha256">d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592</checksum>
<location href="repodata/d7a8fbb-primary.xml.gz"/>
<size>0</size>
</data>
</repomd>
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<repomd xmlns="http://linux.duke.edu/metadata/repo">
<revision>1738627200</revision>
<data type="primary">
<checksum type="sha256">e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855</checksum>
<location href="repodata/e3b0c44-primary.xml.gz"/>
<size>0</size>
</data>
</repomd>
42 changes: 42 additions & 0 deletions tests/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ def _download_metadata(dest_dir):
"Server/x86_64/iso/boot.iso",
"Server/x86_64/os/GPL",
"Server/x86_64/os/Packages/b/bash-5.2.26-3.fc41.x86_64.rpm",
"Server/x86_64/os/repodata/repomd.xml",
"Server/aarch64/iso/boot.iso",
"Server/aarch64/os/GPL",
"Server/aarch64/os/Packages/b/bash-5.2.26-3.fc41.aarch64.rpm",
"Server/aarch64/os/repodata/repomd.xml",
]


Expand Down Expand Up @@ -340,6 +342,46 @@ def test_upgrade_single_file_discovers_compose_root(self):
assert img["location"]["checksum"] is not None
assert img["location"]["checksum"].startswith("sha256:")

def test_upgrade_computes_repomd_checksums(self):
"""Test that upgrade with --compute-checksums populates checksums for repo variant paths."""
with tempfile.TemporaryDirectory() as tmp_dir:
compose_dir = os.path.join(tmp_dir, "compose")
_download_compose(compose_dir)

output_dir = os.path.join(tmp_dir, "v2-repomd")
result = _run_productmd(
"upgrade",
"--output",
output_dir,
"--base-url",
f"{HTTP_BASE_URL}/",
"--compute-checksums",
compose_dir,
)

assert result.returncode == 0, f"stdout: {result.stdout}\nstderr: {result.stderr}"

with open(os.path.join(output_dir, "composeinfo.json")) as f:
data = json.load(f)

# Repository variant paths should have checksums computed
# from their repodata/repomd.xml files.
variant = data["payload"]["variants"]["Server"]
repo_paths = variant["paths"]["repository"]
for arch in repo_paths:
loc = repo_paths[arch]
assert isinstance(loc, dict), "v2.0 repo path should be a Location dict"
assert loc.get("checksum") is not None, f"repository[{arch}] should have a checksum from repomd.xml"
assert loc["checksum"].startswith("sha256:"), f"repository[{arch}] checksum should be SHA-256"
assert loc.get("size") is not None, f"repository[{arch}] should have a size from repomd.xml"

# Non-repo variant paths (os_tree, packages) should NOT have checksums
os_tree_paths = variant["paths"]["os_tree"]
for arch in os_tree_paths:
loc = os_tree_paths[arch]
assert isinstance(loc, dict), "v2.0 os_tree path should be a Location dict"
assert loc.get("checksum") is None, f"os_tree[{arch}] should NOT have a checksum"


class TestDowngradeIntegration:
"""Test downgrading v2.0 metadata to v1.2 via the CLI."""
Expand Down
Loading
Loading