From 01e9dd78913f28e8de71a0e35e9fc54d23a783e0 Mon Sep 17 00:00:00 2001 From: Lukasz Juranek Date: Sat, 31 Jan 2026 08:17:15 +0100 Subject: [PATCH 1/2] Add sbom generation tooling (#2232) --- sbom/BUILD.bazel | 29 ++ sbom/SBOM_Implementation_Approach_SCORE.md | 161 +++++++++ sbom/SBOM_Readme.md | 85 +++++ sbom/defs.bzl | 116 ++++++ sbom/extensions.bzl | 316 +++++++++++++++++ sbom/internal/BUILD | 24 ++ sbom/internal/__init__.py | 1 + sbom/internal/aspect.bzl | 91 +++++ sbom/internal/generator/BUILD | 34 ++ sbom/internal/generator/__init__.py | 1 + .../internal/generator/cyclonedx_formatter.py | 251 +++++++++++++ sbom/internal/generator/purl.py | 246 +++++++++++++ sbom/internal/generator/sbom_generator.py | 330 ++++++++++++++++++ sbom/internal/generator/spdx_formatter.py | 180 ++++++++++ sbom/internal/metadata_rule.bzl | 51 +++ sbom/internal/providers.bzl | 29 ++ sbom/internal/rules.bzl | 167 +++++++++ sbom/repos.bzl | 203 +++++++++++ sbom/repository_rules.bzl | 288 +++++++++++++++ sbom/tests/BUILD | 25 ++ sbom/tests/__init__.py | 1 + sbom/tests/compare_sbom_vs_query.sh | 297 ++++++++++++++++ sbom/tests/test_cyclonedx_formatter.py | 142 ++++++++ sbom/tests/test_purl.py | 109 ++++++ sbom/tests/test_spdx_formatter.py | 109 ++++++ 25 files changed, 3286 insertions(+) create mode 100644 sbom/BUILD.bazel create mode 100644 sbom/SBOM_Implementation_Approach_SCORE.md create mode 100644 sbom/SBOM_Readme.md create mode 100644 sbom/defs.bzl create mode 100644 sbom/extensions.bzl create mode 100644 sbom/internal/BUILD create mode 100644 sbom/internal/__init__.py create mode 100644 sbom/internal/aspect.bzl create mode 100644 sbom/internal/generator/BUILD create mode 100644 sbom/internal/generator/__init__.py create mode 100644 sbom/internal/generator/cyclonedx_formatter.py create mode 100644 sbom/internal/generator/purl.py create mode 100644 sbom/internal/generator/sbom_generator.py create mode 100644 sbom/internal/generator/spdx_formatter.py create mode 100644 sbom/internal/metadata_rule.bzl create mode 100644 sbom/internal/providers.bzl create mode 100644 sbom/internal/rules.bzl create mode 100644 sbom/repos.bzl create mode 100644 sbom/repository_rules.bzl create mode 100644 sbom/tests/BUILD create mode 100644 sbom/tests/__init__.py create mode 100755 sbom/tests/compare_sbom_vs_query.sh create mode 100644 sbom/tests/test_cyclonedx_formatter.py create mode 100644 sbom/tests/test_purl.py create mode 100644 sbom/tests/test_spdx_formatter.py diff --git a/sbom/BUILD.bazel b/sbom/BUILD.bazel new file mode 100644 index 0000000..790e22d --- /dev/null +++ b/sbom/BUILD.bazel @@ -0,0 +1,29 @@ +# SBOM Generation Package +# +# This package provides Bazel-native SBOM (Software Bill of Materials) generation +# using module extensions and aspects. +# +# Public API: +# - load("@score_tooling//sbom:defs.bzl", "sbom") +# - use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + +load("@rules_python//python:defs.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "defs.bzl", + "extensions.bzl", + "repos.bzl", + "repository_rules.bzl", +]) + +# Filegroup for all SBOM-related bzl files +filegroup( + name = "bzl_files", + srcs = [ + "defs.bzl", + "extensions.bzl", + "//sbom/internal:bzl_files", + ], +) diff --git a/sbom/SBOM_Implementation_Approach_SCORE.md b/sbom/SBOM_Implementation_Approach_SCORE.md new file mode 100644 index 0000000..5f5648b --- /dev/null +++ b/sbom/SBOM_Implementation_Approach_SCORE.md @@ -0,0 +1,161 @@ +# Detailed SBOM Implementation Approach for Eclipse SCORE + +## Executive Summary + +This proposal addresses the existing backlog items ([#2144](https://github.com/eclipse-score/score/issues/2144), [#2232](https://github.com/eclipse-score/score/issues/2232), [#2060](https://github.com/eclipse-score/score/issues/2060), [#2103](https://github.com/eclipse-score/score/issues/2103)) and provides a comprehensive implementation roadmap for SBOM generation in Eclipse SCORE. + +--- + +## High-Level Architecture + + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ SCORE SBOM ARCHITECTURE │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Rust │ │ C++ │ │ Bazel │ │ +│ │ Cargo.toml │ │ http_archive│ │ MODULE.bazel│ │ +│ │ (metadata) │ │ git_override│ │ (bazel_dep) │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ SBOM GENERATOR MODULE │ │ +│ │ ┌──────────────────────┐ ┌──────────────────────┐ │ │ +│ │ │ Bazel Aspect │ │ Metadata Extension │ │ │ +│ │ │ (dep graph traversal│ │ (license/supplier │ │ │ +│ │ │ via sbom_aspect) │ │ from MODULE.bazel)│ │ │ +│ │ └──────────────────────┘ └──────────────────────┘ │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────┴─────────────────┐ │ +│ ▼ ▼ │ +│ ┌─────────────┐ ┌─────────────┐ │ +│ │ SPDX 2.3 │ │ CycloneDX │ │ +│ │ .spdx.json │ │ 1.6 .json │ │ +│ └─────────────┘ └─────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### 2.2 Integration with Existing SCORE Tooling + +Dash is a **license compliance checker** only (no SBOM output, no VEX). +SBOM generation is a new, separate module that complements Dash. + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ eclipse-score/tooling │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ EXISTING NEW (IMPLEMENTED) │ +│ ──────── ────────────────── │ +│ ├── dash/ ├── sbom/ │ +│ │ └── dash_license_checker │ ├── defs.bzl │ +│ │ (license compliance) │ │ └── sbom() macro │ +│ ├── cr_checker/ │ ├── extensions.bzl │ +│ │ └── copyright_checker │ │ └── sbom.license() │ +│ │ (header validation) │ ├── internal/ │ +│ │ │ │ ├── aspect.bzl (dep traversal) │ +│ │ │ │ ├── rules.bzl (build rule) │ +│ │ │ │ └── generator/ │ +│ │ │ │ ├── sbom_generator.py │ +│ │ │ │ ├── spdx_formatter.py │ +│ │ │ │ ├── cyclonedx_formatter.py │ +│ │ │ │ └── purl.py │ +│ │ │ └── tests/ │ +│ │ │ │ +│ COMPLEMENTARY WORKFLOW │ │ +│ ────────────────────── │ │ +│ Dash: checks if dependency │ │ +│ licenses are allowed by policy │ │ +│ SBOM: generates .spdx.json / │ │ +│ .cdx.json listing all deps │ │ +│ with name, version, license, │ │ +│ supplier, PURL │ │ +│ │ │ +│ VALIDATION (external, optional) │ │ +│ ──────────────────────────── │ │ +│ pip install spdx-tools │ │ +│ pyspdxtools -i out.spdx.json │ │ +│ Or: https://tools.spdx.org │ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## 3. SBOM Generation Chain + +When `bazel build //:my_sbom` is invoked, the following chain executes: + +``` + ┌──────────────────────────────────────────────────────────────────────┐ + │ PHASE 1: Loading (MODULE.bazel) │ + │ │ + │ sbom_metadata module extension iterates ALL modules in workspace: │ + │ - Collects sbom.license() tags (name, license, supplier, version) │ + │ - Collects sbom.license(type="cargo") tags (Rust crates) │ + │ - Writes metadata.json to @sbom_metadata repository │ + └──────────────────────────┬───────────────────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────────────────────────────────────────────┐ + │ PHASE 2: Analysis (aspect.bzl) │ + │ │ + │ sbom_aspect is attached to `targets` attr of sbom_rule. │ + │ For each target in targets = ["//src:app"]: │ + │ - Traverses deps, srcs, proc_macro_deps, hdrs, etc. │ + │ - Recursively collects SbomDepsInfo from all transitive deps │ + │ - Builds depsets of: │ + │ * external_repos (e.g. "score_kyron", "crates__tokio-1.10") │ + │ * transitive_deps (all labels in the dep graph) │ + └──────────────────────────┬───────────────────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────────────────────────────────────────────┐ + │ PHASE 3: Execution (rules.bzl → sbom_generator.py) │ + │ │ + │ _sbom_impl combines aspect output + extension metadata: │ + │ 1. Reads external_repos and transitive_deps from SbomDepsInfo │ + │ 2. Reads metadata.json from @sbom_metadata extension │ + │ 3. Writes _deps.json with all data + config │ + │ 4. Runs sbom_generator.py which: │ + │ a. Filters repos by exclude_patterns (removes build tools) │ + │ b. Resolves each repo to a component (name, version, PURL) │ + │ c. Merges extension metadata (license, supplier, version) │ + │ d. Calls spdx_formatter.py → {name}.spdx.json │ + │ e. Calls cyclonedx_formatter.py → {name}.cdx.json │ + └──────────────────────────────────────────────────────────────────────┘ +``` + +### Key files in the chain + +| File | Phase | Role | +|------|-------|------| +| `extensions.bzl` | Loading | Collects `sbom.license()` from all modules (all dep types) | +| `internal/aspect.bzl` | Analysis | Traverses target dep graph, returns `SbomDepsInfo` | +| `internal/providers.bzl` | Analysis | Defines `SbomDepsInfo` provider (external_repos, transitive_deps) | +| `internal/rules.bzl` | Execution | Joins aspect + extension data, invokes Python generator | +| `internal/generator/sbom_generator.py` | Execution | Resolves repos to components, calls formatters | +| `internal/generator/spdx_formatter.py` | Execution | Produces SPDX 2.3 JSON | +| `internal/generator/cyclonedx_formatter.py` | Execution | Produces CycloneDX 1.6 JSON | +| `internal/generator/purl.py` | Execution | Generates Package URLs for components | +| `defs.bzl` | Public API | `sbom()` macro | + +## 4. Tool Selection + +### 4.1 Implemented Tool Stack + +| Component | Tool Used | Status | Rationale | +|-----------|-----------|--------|-----------| +| SBOM Framework | Custom Bazel rules (aspects + module extension) | Implemented | Native Bazel integration, hermetic builds | +| Dependency Discovery | Bazel aspect (sbom_aspect) | Implemented | Traverses transitive deps of any target | +| Rust Crate Metadata | `sbom.license(type = "cargo")` in MODULE.bazel | Implemented | Manual license/supplier, auto PURL | +| SPDX Generation | Custom Python formatter (spdx_formatter.py) | Implemented | SPDX 2.3 JSON, validated at tools.spdx.org | +| CycloneDX Generation | Custom Python formatter (cyclonedx_formatter.py) | Implemented | CycloneDX 1.6 JSON | +| License Data | `sbom.license()` in MODULE.bazel | Implemented | Manual declaration per dependency | +| SPDX Validation | [spdx-tools](https://github.com/spdx/tools-python) (external) | Available | For offline validation | +| License Compliance | Existing Dash (separate tool) | Existing | Complements SBOM, not integrated | + +--- diff --git a/sbom/SBOM_Readme.md b/sbom/SBOM_Readme.md new file mode 100644 index 0000000..6bf8e4a --- /dev/null +++ b/sbom/SBOM_Readme.md @@ -0,0 +1,85 @@ +# SBOM Setup Guide + +## 1. Configure MODULE.bazel + +Add the following at the end of your `MODULE.bazel`: + +```starlark +# Load the SBOM extension and make the generated metadata repo available +sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") +use_repo(sbom_ext, "sbom_metadata") + +# Declare license/supplier for each dependency: + +# For bazel_dep() modules — version is read from the module graph, no need to specify it: +sbom_ext.license(name = "googletest", license = "BSD-3-Clause", supplier = "Google LLC") + +# For http_archive deps — version is NOT in the module graph, must be specified: +sbom_ext.license(name = "boost", license = "BSL-1.0", version = "1.87.0", supplier = "Boost.org") + +# For git_override deps — specify version (commit) + remote so a PURL can be generated: +sbom_ext.license(name = "iceoryx2", license = "Apache-2.0", supplier = "Eclipse Foundation", + version = "d3d1c9a", remote = "https://github.com/eclipse-iceoryx/iceoryx2.git") + +# For Rust crates (type = "cargo" generates pkg:cargo/ PURL): +sbom_ext.license(name = "tokio", license = "MIT", version = "1.10", type = "cargo", + supplier = "Tokio Contributors") +``` + +## 2. Add SBOM target in BUILD + +```starlark +load("@score_tooling//sbom:defs.bzl", "sbom") + +sbom( + name = "my_sbom", + targets = ["//my/app:binary"], + component_name = "my_application", + component_version = "1.0.0", +) +``` + +## 3. Build + +```bash +bazel build //:my_sbom +``` + +## 4. Output + +Two files in `bazel-bin/`: + +- `my_sbom.spdx.json` -- SPDX 2.3 +- `my_sbom.cdx.json` -- CycloneDX 1.6 + +--- + +## Auto-extracted vs manual fields + +**Always auto-extracted:** + +| Field | Source | +|-------|--------| +| Dependency list | Aspect traverses transitive deps of your targets | +| Version (bazel_dep) | From module graph | +| Version (crates) | From crate repo name | +| PURL | Generated from URLs/remotes | + +**What is excluded from the SBOM:** + +- Dependencies not in the transitive dep graph of your `targets` (e.g. `dev_dependency = True` lint/formatting tools that your binary never links against) +- Build toolchain repos matching `exclude_patterns` (e.g. `rules_rust`, `rules_cc`, `bazel_tools`, `platforms`) + +**What you must provide manually:** + +| Field | Where | When | +|-------|-------|------| +| license | `sbom_ext.license()` | All dependencies | +| supplier | `sbom_ext.license()` | Recommended for NTIA compliance | +| version | `sbom_ext.license()` | For http_archive/git/crate deps (auto-extracted for bazel_dep) | + +--- + +## Example + +See `reference_integration/BUILD:39-66` for working SBOM targets and `reference_integration/MODULE.bazel:69-77` for the metadata extension setup. diff --git a/sbom/defs.bzl b/sbom/defs.bzl new file mode 100644 index 0000000..bbabe7d --- /dev/null +++ b/sbom/defs.bzl @@ -0,0 +1,116 @@ +"""Public API for SBOM generation. + +This module provides the sbom() macro, which is the main entry point for +generating Software Bill of Materials for Bazel targets. + +Example usage: + load("@score_tooling//sbom:defs.bzl", "sbom") + + sbom( + name = "product_sbom", + targets = [ + "//feature_showcase/rust:orch_per_example", + "//feature_showcase/rust:kyron_example", + ], + component_version = "1.0.0", + ) +""" + +load("//sbom/internal:rules.bzl", "sbom_rule") + +def sbom( + name, + targets, + metadata_json = "@sbom_metadata//:metadata.json", + repo_metadata = None, + output_formats = ["spdx", "cyclonedx"], + producer_name = "Eclipse Foundation", + producer_url = "https://projects.eclipse.org/projects/automotive.score", + component_name = None, + component_version = None, + namespace = None, + exclude_patterns = None, + **kwargs): + """Generates SBOM for specified targets. + + This macro creates an SBOM (Software Bill of Materials) for the specified + targets, traversing their transitive dependencies and generating output + in SPDX 2.3 and/or CycloneDX 1.6 format. + + Prerequisites: + In your MODULE.bazel, you must enable the sbom_metadata extension: + ``` + sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + use_repo(sbom_ext, "sbom_metadata") + ``` + + Args: + name: Rule name, also used as output filename prefix + targets: List of targets to include in SBOM + metadata_json: Label to the metadata.json file from sbom_metadata extension + output_formats: List of formats to generate ("spdx", "cyclonedx") + producer_name: SBOM producer organization name + producer_url: SBOM producer URL + component_name: Main component name (defaults to rule name) + component_version: Component version string + namespace: SBOM namespace URI (defaults to https://eclipse.dev/score) + exclude_patterns: Repo patterns to exclude (e.g., build tools) + **kwargs: Additional arguments passed to the underlying rule + + Outputs: + {name}.spdx.json - SPDX 2.3 format (if "spdx" in output_formats) + {name}.cdx.json - CycloneDX 1.6 format (if "cyclonedx" in output_formats) + + Example: + # Single target SBOM + sbom( + name = "my_app_sbom", + targets = ["//src:my_app"], + component_version = "1.0.0", + ) + + # Multi-target SBOM + sbom( + name = "product_sbom", + targets = [ + "//feature_showcase/rust:orch_per_example", + "//feature_showcase/rust:kyron_example", + ], + component_name = "score_reference_integration", + component_version = "0.5.0-beta", + ) + + # SPDX only + sbom( + name = "my_spdx_sbom", + targets = ["//src:my_app"], + output_formats = ["spdx"], + ) + """ + default_exclude_patterns = [ + "rules_rust", + "rules_cc", + "bazel_tools", + "platforms", + "bazel_skylib", + "rules_python", + "rules_proto", + "protobuf", + "local_config_", + "remote_", + ] + + sbom_rule( + name = name, + targets = targets, + metadata_json = metadata_json, + repo_metadata = repo_metadata if repo_metadata else [], + output_formats = output_formats, + producer_name = producer_name, + producer_url = producer_url, + component_name = component_name if component_name else name, + component_version = component_version if component_version else "", + namespace = namespace if namespace else "https://eclipse.dev/score", + exclude_patterns = exclude_patterns if exclude_patterns else default_exclude_patterns, + **kwargs + ) diff --git a/sbom/extensions.bzl b/sbom/extensions.bzl new file mode 100644 index 0000000..99aec09 --- /dev/null +++ b/sbom/extensions.bzl @@ -0,0 +1,316 @@ +"""Module extension to collect dependency metadata from bzlmod. + +This extension collects version and metadata information for all modules, +crates, and other dependencies in the workspace, making it available for +SBOM generation. + +Usage in MODULE.bazel: + sbom = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + + # For bazel_dep modules (version auto-extracted from module): + sbom.license(name = "flatbuffers", license = "Apache-2.0", supplier = "Google LLC") + + # For http_archive dependencies (provide version explicitly): + sbom.license(name = "boost", license = "BSL-1.0", version = "1.87.0", supplier = "Boost.org") + + # For git_repository dependencies: + sbom.license(name = "iceoryx2", license = "Apache-2.0", supplier = "Eclipse Foundation", + version = "0.5.0", remote = "https://github.com/eclipse-iceoryx/iceoryx2.git") + + # For Rust crates (type = "cargo" generates pkg:cargo/ PURL): + sbom.license(name = "tokio", license = "MIT", version = "1.10", type = "cargo", + supplier = "Tokio Contributors") +""" + +def _generate_purl_from_url(url, name, version): + """Generate Package URL from download URL.""" + if not url: + return "pkg:generic/{}@{}".format(name, version or "unknown") + + version_str = version or "unknown" + + # GitHub + if "github.com" in url: + parts = url.split("github.com/") + if len(parts) > 1: + path_parts = parts[1].split("/") + if len(path_parts) >= 2: + owner = path_parts[0] + repo = path_parts[1].split(".")[0].split("/")[0] + return "pkg:github/{}/{}@{}".format(owner, repo, version_str) + + # GitLab + if "gitlab.com" in url or "gitlab" in url: + if "gitlab.com/" in url: + parts = url.split("gitlab.com/") + if len(parts) > 1: + path_parts = parts[1].split("/") + if len(path_parts) >= 2: + owner = path_parts[0] + repo = path_parts[1].split(".")[0] + return "pkg:gitlab/{}/{}@{}".format(owner, repo, version_str) + + return "pkg:generic/{}@{}".format(name, version_str) + +def _generate_purl_from_git(remote, name, version): + """Generate Package URL from git remote.""" + if not remote: + return "pkg:generic/{}@{}".format(name, version or "unknown") + + version_str = version or "unknown" + + # GitHub (https or ssh) + if "github.com" in remote: + if "github.com:" in remote: + path = remote.split("github.com:")[-1] + else: + path = remote.split("github.com/")[-1] + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + return "pkg:github/{}/{}@{}".format(parts[0], parts[1], version_str) + + # GitLab + if "gitlab" in remote: + if "gitlab.com:" in remote: + path = remote.split("gitlab.com:")[-1] + elif "gitlab.com/" in remote: + path = remote.split("gitlab.com/")[-1] + else: + return "pkg:generic/{}@{}".format(name, version_str) + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + return "pkg:gitlab/{}/{}@{}".format(parts[0], parts[1], version_str) + + return "pkg:generic/{}@{}".format(name, version_str) + +def _extract_version_from_url(url): + """Extract version from URL patterns.""" + if not url: + return None + + # Try common patterns + for sep in ["/v", "/archive/v", "/archive/", "/releases/download/v", "/releases/download/"]: + if sep in url: + rest = url.split(sep)[-1] + version = rest.split("/")[0].split(".tar")[0].split(".zip")[0] + if version and len(version) > 0 and (version[0].isdigit() or version[0] == "v"): + return version.lstrip("v") + + # Try filename pattern: name-version.tar.gz + filename = url.split("/")[-1] + if "-" in filename: + parts = filename.rsplit("-", 1) + if len(parts) == 2: + version = parts[1].split(".tar")[0].split(".zip")[0] + if version and version[0].isdigit(): + return version + + return None + +def _sbom_metadata_repo_impl(repository_ctx): + """Implementation of the sbom_metadata repository rule.""" + repository_ctx.file("metadata.json", repository_ctx.attr.metadata_content) + repository_ctx.file("BUILD.bazel", """\ +# Generated SBOM metadata repository +exports_files(["metadata.json"]) +""") + +_sbom_metadata_repo = repository_rule( + implementation = _sbom_metadata_repo_impl, + attrs = { + "metadata_content": attr.string(mandatory = True), + }, +) + +def _sbom_metadata_impl(module_ctx): + """Collects SBOM metadata from all modules in dependency graph.""" + all_http_archives = {} + all_git_repos = {} + all_licenses = {} + all_modules = {} + all_crates = {} + + for mod in module_ctx.modules: + module_name = mod.name + module_version = mod.version + + # Collect module info from bazel_dep automatically + if module_name and module_version: + all_modules[module_name] = { + "version": module_version, + "purl": "pkg:bazel/{}@{}".format(module_name, module_version), + } + + # Collect http_archive metadata + for tag in mod.tags.http_archive: + url = tag.urls[0] if tag.urls else (tag.url if hasattr(tag, "url") and tag.url else "") + version = tag.version if tag.version else _extract_version_from_url(url) + purl = tag.purl if tag.purl else _generate_purl_from_url(url, tag.name, version) + + all_http_archives[tag.name] = { + "version": version or "unknown", + "url": url, + "purl": purl, + "license": tag.license if tag.license else "", + "supplier": tag.supplier if tag.supplier else "", + "sha256": tag.sha256 if tag.sha256 else "", + "declared_by": module_name, + } + + # Collect git_repository metadata + for tag in mod.tags.git_repository: + version = tag.tag if tag.tag else (tag.commit[:12] if tag.commit else "unknown") + purl = tag.purl if tag.purl else _generate_purl_from_git(tag.remote, tag.name, version) + + all_git_repos[tag.name] = { + "version": version, + "remote": tag.remote, + "commit": tag.commit if tag.commit else "", + "tag": tag.tag if tag.tag else "", + "purl": purl, + "license": tag.license if tag.license else "", + "supplier": tag.supplier if tag.supplier else "", + "declared_by": module_name, + } + + # Collect license info for bazel_dep modules, http_archive, git_repository, and crate deps + for tag in mod.tags.license: + # Check dependency type + dep_type = tag.type if hasattr(tag, "type") and tag.type else "" + + # Check if this has URL info (http_archive dependency) + url = "" + if hasattr(tag, "urls") and tag.urls: + url = tag.urls[0] + elif hasattr(tag, "url") and tag.url: + url = tag.url + + # Check if this has remote info (git_repository dependency) + remote = tag.remote if hasattr(tag, "remote") and tag.remote else "" + + # Get explicit version if provided + explicit_version = tag.version if hasattr(tag, "version") and tag.version else "" + + # Get supplier if provided + supplier = tag.supplier if hasattr(tag, "supplier") and tag.supplier else "" + + if dep_type == "cargo": + # Rust crate + version = explicit_version if explicit_version else "unknown" + all_crates[tag.name] = { + "version": version, + "purl": tag.purl if tag.purl else "pkg:cargo/{}@{}".format(tag.name, version), + "license": tag.license, + "supplier": supplier, + } + elif url or (explicit_version and not remote): + # http_archive dependency + version = explicit_version if explicit_version else _extract_version_from_url(url) + purl = tag.purl if tag.purl else _generate_purl_from_url(url, tag.name, version) + all_http_archives[tag.name] = { + "version": version or "unknown", + "url": url, + "purl": purl, + "license": tag.license, + "supplier": supplier, + "declared_by": module_name, + } + elif remote: + # git_repository dependency + version = explicit_version if explicit_version else "unknown" + purl = tag.purl if tag.purl else _generate_purl_from_git(remote, tag.name, version) + all_git_repos[tag.name] = { + "version": version, + "remote": remote, + "commit": "", + "tag": "", + "purl": purl, + "license": tag.license, + "supplier": supplier, + "declared_by": module_name, + } + else: + # bazel_dep module license override + all_licenses[tag.name] = { + "license": tag.license, + "supplier": supplier, + "purl": tag.purl if tag.purl else "", + } + + # Apply license/supplier overrides to modules + for name, license_info in all_licenses.items(): + if name in all_modules: + all_modules[name]["license"] = license_info["license"] + if license_info.get("supplier"): + all_modules[name]["supplier"] = license_info["supplier"] + if license_info["purl"]: + all_modules[name]["purl"] = license_info["purl"] + + # Generate metadata JSON + metadata_content = json.encode({ + "modules": all_modules, + "http_archives": all_http_archives, + "git_repositories": all_git_repos, + "crates": all_crates, + "licenses": all_licenses, + }) + + _sbom_metadata_repo( + name = "sbom_metadata", + metadata_content = metadata_content, + ) + +# Tag for http_archive dependencies - mirrors http_archive attributes +_http_archive_tag = tag_class( + doc = "SBOM metadata for http_archive dependency (mirrors http_archive attrs)", + attrs = { + "name": attr.string(mandatory = True, doc = "Repository name"), + "urls": attr.string_list(doc = "Download URLs"), + "url": attr.string(doc = "Single download URL (alternative to urls)"), + "version": attr.string(doc = "Version (auto-extracted from URL if not provided)"), + "sha256": attr.string(doc = "SHA256 checksum"), + "license": attr.string(doc = "SPDX license identifier"), + "supplier": attr.string(doc = "Supplier/organization name"), + "purl": attr.string(doc = "Package URL (auto-generated if not provided)"), + }, +) + +# Tag for git_repository dependencies - mirrors git_repository attributes +_git_repository_tag = tag_class( + doc = "SBOM metadata for git_repository dependency (mirrors git_repository attrs)", + attrs = { + "name": attr.string(mandatory = True, doc = "Repository name"), + "remote": attr.string(mandatory = True, doc = "Git remote URL"), + "commit": attr.string(doc = "Git commit hash"), + "tag": attr.string(doc = "Git tag"), + "license": attr.string(doc = "SPDX license identifier"), + "supplier": attr.string(doc = "Supplier/organization name"), + "purl": attr.string(doc = "Package URL (auto-generated if not provided)"), + }, +) + +# Tag to add license info to any dependency (bazel_dep, http_archive, git_repository, or crate) +_license_tag = tag_class( + doc = "Add license/supplier metadata for any dependency", + attrs = { + "name": attr.string(mandatory = True, doc = "Dependency name"), + "license": attr.string(mandatory = True, doc = "SPDX license identifier"), + "supplier": attr.string(doc = "Supplier/organization name (e.g., 'Boost.org', 'Google LLC')"), + "version": attr.string(doc = "Version string (for http_archive/git_repository/crate; auto-extracted for bazel_dep)"), + "type": attr.string(doc = "Dependency type: 'cargo' for Rust crates (affects PURL generation). Leave empty for auto-detection."), + "purl": attr.string(doc = "Override Package URL"), + "url": attr.string(doc = "Download URL for http_archive (for PURL generation)"), + "urls": attr.string_list(doc = "Download URLs for http_archive (for PURL generation)"), + "remote": attr.string(doc = "Git remote URL for git_repository (for PURL generation)"), + }, +) + +sbom_metadata = module_extension( + implementation = _sbom_metadata_impl, + tag_classes = { + "http_archive": _http_archive_tag, + "git_repository": _git_repository_tag, + "license": _license_tag, + }, + doc = "Collects SBOM metadata from dependency declarations", +) diff --git a/sbom/internal/BUILD b/sbom/internal/BUILD new file mode 100644 index 0000000..6237649 --- /dev/null +++ b/sbom/internal/BUILD @@ -0,0 +1,24 @@ +# Internal SBOM implementation package +# +# This package contains internal implementation details for SBOM generation. +# External consumers should use the public API in //sbom:defs.bzl + +package(default_visibility = ["//sbom:__subpackages__"]) + +exports_files([ + "aspect.bzl", + "metadata_rule.bzl", + "providers.bzl", + "rules.bzl", +]) + +# Filegroup for all internal bzl files +filegroup( + name = "bzl_files", + srcs = [ + "aspect.bzl", + "metadata_rule.bzl", + "providers.bzl", + "rules.bzl", + ], +) diff --git a/sbom/internal/__init__.py b/sbom/internal/__init__.py new file mode 100644 index 0000000..bd5f6fd --- /dev/null +++ b/sbom/internal/__init__.py @@ -0,0 +1 @@ +"""SBOM internal implementation package.""" diff --git a/sbom/internal/aspect.bzl b/sbom/internal/aspect.bzl new file mode 100644 index 0000000..00f5b7b --- /dev/null +++ b/sbom/internal/aspect.bzl @@ -0,0 +1,91 @@ +"""Aspect to traverse and collect transitive dependencies of a target. + +This aspect traverses the dependency graph of specified targets and collects +information about all dependencies, including external repositories, which +is essential for SBOM generation. +""" + +load(":providers.bzl", "SbomDepsInfo") + +def _sbom_aspect_impl(target, ctx): + """Collects transitive dependency information for SBOM generation. + + Args: + target: The target being analyzed + ctx: The aspect context + + Returns: + A list containing SbomDepsInfo provider + """ + direct_deps = [] + transitive_deps_list = [] + external_repos_list = [] + external_repos_direct = [] + + # Get this target's label info + label = target.label + if label.workspace_name: + # This is an external dependency + external_repos_direct.append(label.workspace_name) + + # Collect from rule attributes that represent dependencies + dep_attrs = ["deps", "srcs", "data", "proc_macro_deps", "crate_root", "compile_data"] + for attr_name in dep_attrs: + if hasattr(ctx.rule.attr, attr_name): + attr_val = getattr(ctx.rule.attr, attr_name) + if type(attr_val) == "list": + for dep in attr_val: + if hasattr(dep, "label"): + direct_deps.append(dep.label) + if SbomDepsInfo in dep: + # Propagate transitive deps from dependencies + transitive_deps_list.append(dep[SbomDepsInfo].transitive_deps) + external_repos_list.append(dep[SbomDepsInfo].external_repos) + elif attr_val != None and hasattr(attr_val, "label"): + # Single target attribute (e.g., crate_root) + direct_deps.append(attr_val.label) + if SbomDepsInfo in attr_val: + transitive_deps_list.append(attr_val[SbomDepsInfo].transitive_deps) + external_repos_list.append(attr_val[SbomDepsInfo].external_repos) + + # Handle cc_library specific attributes + cc_dep_attrs = ["hdrs", "textual_hdrs", "implementation_deps"] + for attr_name in cc_dep_attrs: + if hasattr(ctx.rule.attr, attr_name): + attr_val = getattr(ctx.rule.attr, attr_name) + if type(attr_val) == "list": + for dep in attr_val: + if hasattr(dep, "label"): + direct_deps.append(dep.label) + if SbomDepsInfo in dep: + transitive_deps_list.append(dep[SbomDepsInfo].transitive_deps) + external_repos_list.append(dep[SbomDepsInfo].external_repos) + + return [SbomDepsInfo( + direct_deps = depset(direct_deps), + transitive_deps = depset( + direct = [label], + transitive = transitive_deps_list, + ), + external_repos = depset( + direct = external_repos_direct, + transitive = external_repos_list, + ), + )] + +sbom_aspect = aspect( + implementation = _sbom_aspect_impl, + attr_aspects = [ + "deps", + "srcs", + "data", + "proc_macro_deps", + "crate_root", + "compile_data", + "hdrs", + "textual_hdrs", + "implementation_deps", + ], + provides = [SbomDepsInfo], + doc = "Traverses target dependencies and collects SBOM-relevant information", +) diff --git a/sbom/internal/generator/BUILD b/sbom/internal/generator/BUILD new file mode 100644 index 0000000..d22cd19 --- /dev/null +++ b/sbom/internal/generator/BUILD @@ -0,0 +1,34 @@ +# SBOM Generator Python package +# +# This package contains the Python tools for generating SBOM files +# in SPDX 2.3 and CycloneDX 1.6 formats. + +load("@rules_python//python:defs.bzl", "py_binary", "py_library") + +package(default_visibility = ["//sbom:__subpackages__"]) + +py_binary( + name = "sbom_generator", + srcs = ["sbom_generator.py"], + main = "sbom_generator.py", + deps = [ + ":cyclonedx_formatter", + ":purl", + ":spdx_formatter", + ], +) + +py_library( + name = "spdx_formatter", + srcs = ["spdx_formatter.py"], +) + +py_library( + name = "cyclonedx_formatter", + srcs = ["cyclonedx_formatter.py"], +) + +py_library( + name = "purl", + srcs = ["purl.py"], +) diff --git a/sbom/internal/generator/__init__.py b/sbom/internal/generator/__init__.py new file mode 100644 index 0000000..a34c1c3 --- /dev/null +++ b/sbom/internal/generator/__init__.py @@ -0,0 +1 @@ +"""SBOM generator package.""" diff --git a/sbom/internal/generator/cyclonedx_formatter.py b/sbom/internal/generator/cyclonedx_formatter.py new file mode 100644 index 0000000..67d6fd2 --- /dev/null +++ b/sbom/internal/generator/cyclonedx_formatter.py @@ -0,0 +1,251 @@ +"""CycloneDX 1.6 JSON formatter for SBOM generation. + +This module generates CycloneDX 1.6 compliant JSON output from the component +information collected by the Bazel aspect and module extension. + +CycloneDX 1.6 Specification: https://cyclonedx.org/docs/1.6/json/ +""" + +import uuid +from typing import Any + + +def generate_cyclonedx( + components: list[dict[str, Any]], + config: dict[str, Any], + timestamp: str, +) -> dict[str, Any]: + """Generate CycloneDX 1.6 JSON document. + + Args: + components: List of component dictionaries + config: Configuration dictionary with producer info + timestamp: ISO 8601 timestamp + + Returns: + CycloneDX 1.6 compliant dictionary + """ + component_name = config.get("component_name", "unknown") + component_version = config.get("component_version", "") + producer_name = config.get("producer_name", "Eclipse Foundation") + producer_url = config.get("producer_url", "") + + # Generate serial number (URN UUID) + serial_number = f"urn:uuid:{uuid.uuid4()}" + + cdx_doc: dict[str, Any] = { + "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": serial_number, + "version": 1, + "metadata": { + "timestamp": timestamp, + "tools": { + "components": [ + { + "type": "application", + "name": "score-sbom-generator", + "description": "Eclipse SCORE SBOM Generator (Bazel-native)", + "publisher": producer_name, + } + ] + }, + "component": { + "type": "application", + "name": component_name, + "version": component_version if component_version else "unversioned", + "bom-ref": _generate_bom_ref(component_name, component_version), + "purl": f"pkg:github/eclipse-score/{component_name}@{component_version}" + if component_version + else None, + "supplier": { + "name": producer_name, + "url": [producer_url] if producer_url else [], + }, + }, + "supplier": { + "name": producer_name, + "url": [producer_url] if producer_url else [], + }, + }, + "components": [], + "dependencies": [], + } + + # Clean up None values from metadata.component + if cdx_doc["metadata"]["component"].get("purl") is None: + del cdx_doc["metadata"]["component"]["purl"] + + # Root component bom-ref for dependencies + root_bom_ref = _generate_bom_ref(component_name, component_version) + + # Add components + dependency_refs = [] + for comp in components: + cdx_component = _create_cdx_component(comp) + cdx_doc["components"].append(cdx_component) + dependency_refs.append(cdx_component["bom-ref"]) + + # Add root dependency (main component depends on all components) + direct_deps = dependency_refs + cdx_doc["dependencies"].append( + { + "ref": root_bom_ref, + "dependsOn": direct_deps, + } + ) + + # Add each component's dependency entry + for comp in components: + name = comp.get("name", "") + version = comp.get("version", "") + bom_ref = _generate_bom_ref(name, version) + + cdx_doc["dependencies"].append( + { + "ref": bom_ref, + "dependsOn": [], + } + ) + + return cdx_doc + + +def _create_cdx_component(component: dict[str, Any]) -> dict[str, Any]: + """Create a CycloneDX component from component data. + + Args: + component: Component dictionary + + Returns: + CycloneDX component dictionary + """ + name = component.get("name", "unknown") + version = component.get("version", "unknown") + purl = component.get("purl", "") + license_id = component.get("license", "") + supplier = component.get("supplier", "") + comp_type = component.get("type", "library") + source = component.get("source", "") + url = component.get("url", "") + + cdx_comp: dict[str, Any] = { + "type": _map_type_to_cdx_type(comp_type), + "name": name, + "version": version, + "bom-ref": _generate_bom_ref(name, version), + } + + # Add PURL + if purl: + cdx_comp["purl"] = purl + + # Add license + if license_id: + cdx_comp["licenses"] = [ + { + "license": { + "id": license_id, + } + } + ] + + # Add supplier + if supplier: + cdx_comp["supplier"] = { + "name": supplier, + } + + # Add external references + external_refs = [] + + # Add download/source URL + if url: + external_refs.append( + { + "type": "distribution", + "url": url, + } + ) + elif source == "crates.io": + external_refs.append( + { + "type": "distribution", + "url": f"https://crates.io/crates/{name}/{version}", + } + ) + + # Add VCS URL for git sources + if source == "git" and url: + external_refs.append( + { + "type": "vcs", + "url": url, + } + ) + + if external_refs: + cdx_comp["externalReferences"] = external_refs + + return cdx_comp + + +def _map_type_to_cdx_type(comp_type: str) -> str: + """Map component type to CycloneDX component type. + + Args: + comp_type: Component type string + + Returns: + CycloneDX component type string + """ + type_mapping = { + "application": "application", + "library": "library", + "framework": "framework", + "file": "file", + "container": "container", + "firmware": "firmware", + "device": "device", + "data": "data", + "operating-system": "operating-system", + "device-driver": "device-driver", + "machine-learning-model": "machine-learning-model", + "platform": "platform", + } + return type_mapping.get(comp_type, "library") + + +def _generate_bom_ref(name: str, version: str) -> str: + """Generate a unique bom-ref for a component. + + Args: + name: Component name + version: Component version + + Returns: + Unique bom-ref string + """ + # Create a deterministic but unique reference + sanitized_name = _sanitize_name(name) + sanitized_version = _sanitize_name(version) if version else "unknown" + return f"{sanitized_name}@{sanitized_version}" + + +def _sanitize_name(value: str) -> str: + """Sanitize a string for use in bom-ref. + + Args: + value: String to sanitize + + Returns: + Sanitized string + """ + result = [] + for char in value: + if char.isalnum() or char in (".", "-", "_"): + result.append(char) + elif char in (" ", "/"): + result.append("-") + return "".join(result) or "unknown" diff --git a/sbom/internal/generator/purl.py b/sbom/internal/generator/purl.py new file mode 100644 index 0000000..f2b4b48 --- /dev/null +++ b/sbom/internal/generator/purl.py @@ -0,0 +1,246 @@ +"""PURL (Package URL) generation utilities. + +This module provides utilities for generating Package URLs (PURLs) for +various types of dependencies in the SBOM. + +PURL Specification: https://github.com/package-url/purl-spec +""" + +import urllib.parse +from typing import Any + + +def generate_purl_for_label(label: str, metadata: dict[str, Any] | None = None) -> str: + """Generate a PURL for a Bazel label. + + Args: + label: Bazel label string (e.g., @score_kyron//src:lib) + metadata: Optional metadata dictionary for version info + + Returns: + PURL string + """ + # Parse the label + if label.startswith("@"): + # External repository + parts = label[1:].split("//", 1) + repo_name = parts[0] + target_path = parts[1] if len(parts) > 1 else "" + + # Check if we have metadata for this repo + if metadata: + return _generate_purl_from_metadata(repo_name, metadata) + + # Generate a generic PURL + return generate_generic_purl(repo_name, "unknown") + + # Local label + return f"pkg:bazel/{urllib.parse.quote(label)}@local" + + +def generate_cargo_purl(crate_name: str, version: str) -> str: + """Generate a PURL for a Cargo crate. + + Args: + crate_name: Name of the crate + version: Version string + + Returns: + PURL string + """ + # Normalize crate name (Cargo uses hyphens, not underscores) + normalized_name = crate_name.replace("_", "-") + return ( + f"pkg:cargo/{urllib.parse.quote(normalized_name)}@{urllib.parse.quote(version)}" + ) + + +def generate_github_purl( + owner: str, + repo: str, + version: str, + commit: str | None = None, +) -> str: + """Generate a PURL for a GitHub repository. + + Args: + owner: GitHub owner/organization + repo: Repository name + version: Version string or tag + commit: Optional commit SHA + + Returns: + PURL string + """ + purl = f"pkg:github/{urllib.parse.quote(owner)}/{urllib.parse.quote(repo)}@{urllib.parse.quote(version)}" + if commit: + purl += f"?vcs_commit={urllib.parse.quote(commit)}" + return purl + + +def generate_bazel_module_purl(module_name: str, version: str) -> str: + """Generate a PURL for a Bazel module. + + Args: + module_name: Name of the Bazel module + version: Version string + + Returns: + PURL string + """ + return f"pkg:bazel/{urllib.parse.quote(module_name)}@{urllib.parse.quote(version)}" + + +def generate_generic_purl( + name: str, version: str, qualifiers: dict[str, str] | None = None +) -> str: + """Generate a generic PURL. + + Args: + name: Package name + version: Version string + qualifiers: Optional qualifiers dictionary + + Returns: + PURL string + """ + purl = f"pkg:generic/{urllib.parse.quote(name)}@{urllib.parse.quote(version)}" + if qualifiers: + qualifier_str = "&".join( + f"{urllib.parse.quote(k)}={urllib.parse.quote(v)}" + for k, v in sorted(qualifiers.items()) + ) + purl += f"?{qualifier_str}" + return purl + + +def parse_purl(purl: str) -> dict[str, Any]: + """Parse a PURL into its components. + + Args: + purl: PURL string + + Returns: + Dictionary with type, namespace, name, version, qualifiers, subpath + """ + if not purl.startswith("pkg:"): + raise ValueError(f"Invalid PURL: must start with 'pkg:': {purl}") + + # Remove pkg: prefix + remainder = purl[4:] + + # Split off subpath + subpath = None + if "#" in remainder: + remainder, subpath = remainder.rsplit("#", 1) + subpath = urllib.parse.unquote(subpath) + + # Split off qualifiers + qualifiers: dict[str, str] = {} + if "?" in remainder: + remainder, qualifier_str = remainder.rsplit("?", 1) + for pair in qualifier_str.split("&"): + if "=" in pair: + key, value = pair.split("=", 1) + qualifiers[urllib.parse.unquote(key)] = urllib.parse.unquote(value) + + # Split type and rest + if "/" not in remainder: + raise ValueError(f"Invalid PURL: no type separator: {purl}") + + type_part, remainder = remainder.split("/", 1) + + # Split version + version = None + if "@" in remainder: + remainder, version = remainder.rsplit("@", 1) + version = urllib.parse.unquote(version) + + # Split namespace and name + parts = remainder.rsplit("/", 1) + if len(parts) == 2: + namespace = urllib.parse.unquote(parts[0]) + name = urllib.parse.unquote(parts[1]) + else: + namespace = None + name = urllib.parse.unquote(parts[0]) + + return { + "type": type_part, + "namespace": namespace, + "name": name, + "version": version, + "qualifiers": qualifiers, + "subpath": subpath, + } + + +def _generate_purl_from_metadata(repo_name: str, metadata: dict[str, Any]) -> str: + """Generate a PURL using metadata. + + Args: + repo_name: Repository name + metadata: Metadata dictionary + + Returns: + PURL string + """ + # Check modules + modules = metadata.get("modules", {}) + if repo_name in modules: + mod = modules[repo_name] + if "purl" in mod: + return mod["purl"] + version = mod.get("version", "unknown") + return generate_bazel_module_purl(repo_name, version) + + # Check rust_deps + rust_deps = metadata.get("rust_deps", {}) + if repo_name in rust_deps: + dep = rust_deps[repo_name] + if "purl" in dep: + return dep["purl"] + version = dep.get("version", "unknown") + return generate_cargo_purl(repo_name, version) + + # Check cpp_deps + cpp_deps = metadata.get("cpp_deps", {}) + if repo_name in cpp_deps: + dep = cpp_deps[repo_name] + if "purl" in dep: + return dep["purl"] + version = dep.get("version", "unknown") + return generate_generic_purl(repo_name, version) + + # Check crates + crates = metadata.get("crates", {}) + if repo_name in crates: + crate = crates[repo_name] + if "purl" in crate: + return crate["purl"] + version = crate.get("version", "unknown") + return generate_cargo_purl(repo_name, version) + + # Fallback + return generate_generic_purl(repo_name, "unknown") + + +def validate_purl(purl: str) -> bool: + """Validate a PURL string. + + Args: + purl: PURL string to validate + + Returns: + True if valid, False otherwise + """ + try: + parsed = parse_purl(purl) + # Basic validation + if not parsed["type"]: + return False + if not parsed["name"]: + return False + return True + except (ValueError, KeyError): + return False diff --git a/sbom/internal/generator/sbom_generator.py b/sbom/internal/generator/sbom_generator.py new file mode 100644 index 0000000..4c79960 --- /dev/null +++ b/sbom/internal/generator/sbom_generator.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +"""SBOM generator - creates SPDX and CycloneDX output from Bazel aspect data. + +This is the main entry point for SBOM generation. It reads dependency +information collected by the Bazel aspect and metadata from the module +extension, then generates SBOM files in SPDX 2.3 and CycloneDX 1.6 formats. +""" + +import argparse +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from sbom.internal.generator.spdx_formatter import generate_spdx +from sbom.internal.generator.cyclonedx_formatter import generate_cyclonedx + + +def main() -> int: + """Main entry point for SBOM generation.""" + parser = argparse.ArgumentParser(description="Generate SBOM from Bazel deps") + parser.add_argument("--input", required=True, help="Input JSON from Bazel rule") + parser.add_argument( + "--metadata", required=True, help="Metadata JSON from module extension" + ) + parser.add_argument("--spdx-output", help="SPDX 2.3 JSON output file") + parser.add_argument("--cyclonedx-output", help="CycloneDX 1.6 output file") + args = parser.parse_args() + + # Load dependency data from Bazel + with open(args.input, encoding="utf-8") as f: + data = json.load(f) + + # Load metadata from module extension + with open(args.metadata, encoding="utf-8") as f: + metadata = json.load(f) + + # Load and merge repo metadata files (from sbom_http_archive) + repo_metadata_files = data.get("repo_metadata_files", []) + for repo_meta_path in repo_metadata_files: + try: + with open(repo_meta_path, encoding="utf-8") as f: + repo_meta = json.load(f) + # Add to http_archives metadata + name = repo_meta.get("name", "").split("+")[-1] # Handle bzlmod names + if name: + if "http_archives" not in metadata: + metadata["http_archives"] = {} + metadata["http_archives"][name] = repo_meta + except (OSError, json.JSONDecodeError): + pass # Skip files that can't be read + + # Filter external repos (exclude build tools) + external_repos = data.get("external_repos", []) + exclude_patterns = data.get("exclude_patterns", []) + filtered_repos = filter_repos(external_repos, exclude_patterns) + + # Build component list with metadata + components = [] + + for repo in filtered_repos: + component = resolve_component(repo, metadata) + if component: + components.append(component) + + # Deduplicate components by name + components = deduplicate_components(components) + + # Generate timestamp in SPDX-compliant format (YYYY-MM-DDTHH:MM:SSZ) + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Get configuration + config = data.get("config", {}) + + # Generate outputs + if args.spdx_output: + spdx = generate_spdx(components, config, timestamp) + Path(args.spdx_output).write_text(json.dumps(spdx, indent=2), encoding="utf-8") + + if args.cyclonedx_output: + cdx = generate_cyclonedx(components, config, timestamp) + Path(args.cyclonedx_output).write_text( + json.dumps(cdx, indent=2), encoding="utf-8" + ) + + return 0 + + +def filter_repos(repos: list[str], exclude_patterns: list[str]) -> list[str]: + """Filter out build tool repositories based on exclude patterns. + + Crates from crate_universe are always kept even if they match exclude patterns, + since they are legitimate dependencies, not build tools. + + Args: + repos: List of repository names + exclude_patterns: Patterns to exclude + + Returns: + Filtered list of repository names + """ + filtered = [] + for repo in repos: + # Always keep crates from crate_universe - these are real dependencies + if "crate_index__" in repo or "crates_io__" in repo or "_crates__" in repo: + filtered.append(repo) + continue + + should_exclude = False + for pattern in exclude_patterns: + if pattern in repo: + should_exclude = True + break + if not should_exclude: + filtered.append(repo) + return filtered + + +def resolve_component( + repo_name: str, metadata: dict[str, Any] +) -> dict[str, Any] | None: + """Resolve repository to component with version and PURL. + + Args: + repo_name: Name of the repository + metadata: Metadata dictionary from module extension + + Returns: + Component dictionary or None if not resolved + """ + # Normalize repo name - bzlmod adds "+" suffix to module repos + normalized_name = repo_name.rstrip("+") + + # Get license overrides + licenses = metadata.get("licenses", {}) + + # Check if it's a bazel_dep module + modules = metadata.get("modules", {}) + if normalized_name in modules: + mod = modules[normalized_name] + license_info = licenses.get(normalized_name, {}) + return { + "name": normalized_name, + "version": mod.get("version", "unknown"), + "purl": mod.get("purl", f"pkg:bazel/{normalized_name}@unknown"), + "type": "library", + "supplier": mod.get("supplier", "") or license_info.get("supplier", ""), + "license": mod.get("license", "") or license_info.get("license", ""), + } + + # Check if it's an http_archive dependency (declared via sbom.http_archive or sbom.license) + http_archives = metadata.get("http_archives", {}) + if normalized_name in http_archives: + archive = http_archives[normalized_name] + return { + "name": normalized_name, + "version": archive.get("version", "unknown"), + "purl": archive.get("purl", f"pkg:generic/{normalized_name}@unknown"), + "type": "library", + "url": archive.get("url", ""), + "license": archive.get("license", ""), + "supplier": archive.get("supplier", ""), + } + + # Check if it's a git_repository dependency (declared via sbom.git_repository or sbom.license) + git_repos = metadata.get("git_repositories", {}) + if normalized_name in git_repos: + repo = git_repos[normalized_name] + return { + "name": normalized_name, + "version": repo.get("version", "unknown"), + "purl": repo.get("purl", f"pkg:generic/{normalized_name}@unknown"), + "type": "library", + "url": repo.get("remote", ""), + "license": repo.get("license", ""), + "supplier": repo.get("supplier", ""), + } + + # Check if it's a crate declared via sbom.license(type="cargo") + crates = metadata.get("crates", {}) + if normalized_name in crates: + crate = crates[normalized_name] + return { + "name": normalized_name, + "version": crate.get("version", "unknown"), + "purl": crate.get("purl", f"pkg:cargo/{normalized_name}@unknown"), + "type": "library", + "license": crate.get("license", ""), + "supplier": crate.get("supplier", ""), + } + + # Check if it's a bazel_dep module with license info (third-party modules + # that don't use the sbom extension won't appear in 'modules', but their + # license/supplier info is stored in 'licenses' via sbom.license() tags) + if normalized_name in licenses: + license_info = licenses[normalized_name] + return { + "name": normalized_name, + "version": "unknown", + "purl": license_info.get("purl") or f"pkg:bazel/{normalized_name}@unknown", + "type": "library", + "license": license_info.get("license", ""), + "supplier": license_info.get("supplier", ""), + } + + # Handle score_ prefixed repos that might be modules + if normalized_name.startswith("score_"): + license_info = licenses.get(normalized_name, {}) + return { + "name": normalized_name, + "version": "unknown", + "purl": f"pkg:github/eclipse-score/{normalized_name}@unknown", + "type": "library", + "supplier": license_info.get("supplier", "") or "Eclipse Foundation", + "license": license_info.get("license", ""), + } + + # Handle crate universe repos - bzlmod format + # e.g., rules_rust++crate+crate_index__serde-1.0.228 + # e.g., rules_rust++crate+crate_index__iceoryx2-qnx8-0.7.0 + manual_crates = metadata.get("crates", {}) + + if "crate_index__" in repo_name or "crate+" in repo_name: + # Extract the crate info part after crate_index__ + if "crate_index__" in repo_name: + crate_part = repo_name.split("crate_index__")[-1] + else: + crate_part = repo_name.split("+")[-1] + + # Parse name-version format (e.g., "serde-1.0.228") + # Handle complex names like "iceoryx2-qnx8-0.7.0" where last part is version + parts = crate_part.split("-") + if len(parts) >= 2: + # Find the version part (starts with a digit) + version_idx = -1 + for i, part in enumerate(parts): + if part and part[0].isdigit(): + version_idx = i + break + + if version_idx > 0: + crate_name = "-".join(parts[:version_idx]).replace("_", "-") + version = "-".join(parts[version_idx:]) + + # Look up manual sbom.license(type="cargo") declarations + crate_meta = manual_crates.get(crate_name, {}) + + result = { + "name": crate_name, + "version": version, + "purl": f"pkg:cargo/{crate_name}@{version}", + "type": "library", + } + if crate_meta.get("license"): + result["license"] = crate_meta["license"] + if crate_meta.get("supplier"): + result["supplier"] = crate_meta["supplier"] + if crate_meta.get("repository"): + result["url"] = crate_meta["repository"] + return result + + # Handle legacy crate universe format (e.g., crates_io__tokio-1.10.0) + if repo_name.startswith("crates_io__") or "_crates__" in repo_name: + parts = repo_name.split("__") + if len(parts) >= 2: + crate_info = parts[-1] + # Try to split by last hyphen to get name-version + last_hyphen = crate_info.rfind("-") + if last_hyphen > 0: + crate_name = crate_info[:last_hyphen].replace("_", "-") + version = crate_info[last_hyphen + 1 :] + + # Look up manual sbom.license(type="cargo") declarations + crate_meta = manual_crates.get(crate_name, {}) + + result = { + "name": crate_name, + "version": version, + "purl": f"pkg:cargo/{crate_name}@{version}", + "type": "library", + } + if crate_meta.get("license"): + result["license"] = crate_meta["license"] + if crate_meta.get("supplier"): + result["supplier"] = crate_meta["supplier"] + if crate_meta.get("repository"): + result["url"] = crate_meta["repository"] + return result + + # Unknown repository - return with unknown version + return { + "name": repo_name, + "version": "unknown", + "purl": f"pkg:generic/{repo_name}@unknown", + "type": "library", + } + + +def deduplicate_components(components: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Remove duplicate components, keeping the one with most metadata. + + Args: + components: List of component dictionaries + + Returns: + Deduplicated list of components + """ + seen: dict[str, dict[str, Any]] = {} + for comp in components: + name = comp.get("name", "") + if name not in seen: + seen[name] = comp + else: + # Keep the one with more information (non-unknown version preferred) + existing = seen[name] + if ( + existing.get("version") == "unknown" + and comp.get("version") != "unknown" + ): + seen[name] = comp + elif comp.get("license") and not existing.get("license"): + # Prefer component with license info + seen[name] = comp + + return list(seen.values()) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sbom/internal/generator/spdx_formatter.py b/sbom/internal/generator/spdx_formatter.py new file mode 100644 index 0000000..c36ff7a --- /dev/null +++ b/sbom/internal/generator/spdx_formatter.py @@ -0,0 +1,180 @@ +"""SPDX 2.3 JSON formatter for SBOM generation. + +This module generates SPDX 2.3 compliant JSON output from the component +information collected by the Bazel aspect and module extension. + +SPDX 2.3 Specification: https://spdx.github.io/spdx-spec/v2.3/ +""" + +import uuid +from typing import Any + + +def generate_spdx( + components: list[dict[str, Any]], + config: dict[str, Any], + timestamp: str, +) -> dict[str, Any]: + """Generate SPDX 2.3 JSON document. + + Args: + components: List of component dictionaries + config: Configuration dictionary with producer info + timestamp: ISO 8601 timestamp + + Returns: + SPDX 2.3 compliant dictionary + """ + + namespace = config.get("namespace", "https://eclipse.dev/score") + component_name = config.get("component_name", "unknown") + component_version = config.get("component_version", "") + producer_name = config.get("producer_name", "Eclipse Foundation") + + doc_uuid = uuid.uuid4() + + packages: list[dict[str, Any]] = [] + relationships: list[dict[str, Any]] = [] + + # Root package + root_spdx_id = "SPDXRef-RootPackage" + root_package: dict[str, Any] = { + "SPDXID": root_spdx_id, + "name": component_name, + "versionInfo": component_version if component_version else "unversioned", + "downloadLocation": "https://github.com/eclipse-score", + "supplier": f"Organization: {producer_name}", + "primaryPackagePurpose": "APPLICATION", + "filesAnalyzed": False, + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "NOASSERTION", + "copyrightText": "NOASSERTION", + } + packages.append(root_package) + + # DESCRIBES relationship + relationships.append( + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relationshipType": "DESCRIBES", + "relatedSpdxElement": root_spdx_id, + } + ) + + # Add dependency packages + for comp in components: + pkg, spdx_id = _create_spdx_package(comp) + packages.append(pkg) + + # Root depends on each component + relationships.append( + { + "spdxElementId": root_spdx_id, + "relationshipType": "DEPENDS_ON", + "relatedSpdxElement": spdx_id, + } + ) + + return { + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": f"SBOM for {component_name}", + "documentNamespace": f"{namespace}/spdx/{_sanitize_id(component_name)}-{doc_uuid}", + "creationInfo": { + "created": timestamp, + "creators": [ + f"Organization: {producer_name}", + "Tool: score-sbom-generator", + ], + }, + "packages": packages, + "relationships": relationships, + } + + +def _create_spdx_package( + component: dict[str, Any], +) -> tuple[dict[str, Any], str]: + """Create an SPDX 2.3 Package for a component. + + Args: + component: Component dictionary + + Returns: + Tuple of (SPDX Package dictionary, spdx_id string) + """ + name = component.get("name", "unknown") + version = component.get("version", "unknown") + purl = component.get("purl", "") + license_id = component.get("license", "") + supplier = component.get("supplier", "") + comp_type = component.get("type", "library") + + spdx_id = f"SPDXRef-{_sanitize_id(name)}-{_sanitize_id(version)}" + + # Determine download location + url = component.get("url", "") + source = component.get("source", "") + if url: + download_location = url + elif source == "crates.io": + download_location = f"https://crates.io/crates/{name}/{version}" + else: + download_location = "NOASSERTION" + + package: dict[str, Any] = { + "SPDXID": spdx_id, + "name": name, + "versionInfo": version, + "downloadLocation": download_location, + "primaryPackagePurpose": _map_type_to_purpose(comp_type), + "filesAnalyzed": False, + "licenseConcluded": license_id if license_id else "NOASSERTION", + "licenseDeclared": license_id if license_id else "NOASSERTION", + "copyrightText": "NOASSERTION", + } + + if supplier: + package["supplier"] = f"Organization: {supplier}" + + # Add PURL as external reference + if purl: + package["externalRefs"] = [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": purl, + }, + ] + + return package, spdx_id + + +def _map_type_to_purpose(comp_type: str) -> str: + """Map component type to SPDX 2.3 primary package purpose.""" + type_mapping = { + "application": "APPLICATION", + "library": "LIBRARY", + "framework": "FRAMEWORK", + "file": "FILE", + "container": "CONTAINER", + "firmware": "FIRMWARE", + "device": "DEVICE", + "data": "DATA", + } + return type_mapping.get(comp_type, "LIBRARY") + + +def _sanitize_id(value: str) -> str: + """Sanitize a string for use in SPDX IDs. + + SPDX 2.3 IDs must match [a-zA-Z0-9.-]+ + """ + result = [] + for char in value: + if char.isalnum() or char in (".", "-"): + result.append(char) + elif char in ("_", " ", "/", "@"): + result.append("-") + return "".join(result) or "unknown" diff --git a/sbom/internal/metadata_rule.bzl b/sbom/internal/metadata_rule.bzl new file mode 100644 index 0000000..ab31f13 --- /dev/null +++ b/sbom/internal/metadata_rule.bzl @@ -0,0 +1,51 @@ +"""Rule to expose SBOM metadata collected by the module extension. + +This rule wraps the metadata JSON file generated by the module extension +and makes it available for the SBOM generation action. +""" + +load(":providers.bzl", "SbomMetadataInfo") + +def _sbom_metadata_rule_impl(ctx): + """Implementation of sbom_metadata_rule. + + The metadata is passed as a JSON file to the SBOM generator action, + rather than being parsed at analysis time. + + Args: + ctx: The rule context + + Returns: + A list of providers including SbomMetadataInfo with file reference + """ + metadata_file = ctx.file.metadata_json + + # We can't read files at analysis time in Bazel rules, so we pass + # the file reference and let the generator read it at execution time. + # The SbomMetadataInfo provider carries empty dicts here - the actual + # metadata is read by the Python generator from the JSON file. + return [ + DefaultInfo(files = depset([metadata_file])), + SbomMetadataInfo( + modules = {}, + crates = {}, + rust_deps = {}, + cpp_deps = {}, + http_archives = {}, + ), + # Also provide the file itself for the rule to use + OutputGroupInfo(metadata_file = depset([metadata_file])), + ] + +sbom_metadata_rule = rule( + implementation = _sbom_metadata_rule_impl, + attrs = { + "metadata_json": attr.label( + mandatory = True, + allow_single_file = [".json"], + doc = "JSON file containing SBOM metadata", + ), + }, + provides = [SbomMetadataInfo], + doc = "Exposes SBOM metadata collected by the module extension", +) diff --git a/sbom/internal/providers.bzl b/sbom/internal/providers.bzl new file mode 100644 index 0000000..756ab67 --- /dev/null +++ b/sbom/internal/providers.bzl @@ -0,0 +1,29 @@ +"""Providers for SBOM data propagation. + +This module defines the providers used to pass SBOM-related information +between different phases of the build: +- SbomDepsInfo: Collected by aspect - deps of a specific target +- SbomMetadataInfo: Collected by extension - metadata for all modules +""" + +# Collected by aspect - deps of a specific target +SbomDepsInfo = provider( + doc = "Transitive dependency information for SBOM generation", + fields = { + "direct_deps": "depset of direct dependency labels", + "transitive_deps": "depset of all transitive dependency labels", + "external_repos": "depset of external repository names used", + }, +) + +# Collected by extension - metadata for all modules +SbomMetadataInfo = provider( + doc = "Metadata about all available modules/crates", + fields = { + "modules": "dict of module_name -> {version, commit, registry, purl}", + "crates": "dict of crate_name -> {version, checksum, purl}", + "rust_deps": "dict of rust_dep_name -> {version, source, url, purl, license}", + "cpp_deps": "dict of cpp_dep_name -> {version, url, purl, sha256, license}", + "http_archives": "dict of repo_name -> {url, version, sha256, purl}", + }, +) diff --git a/sbom/internal/rules.bzl b/sbom/internal/rules.bzl new file mode 100644 index 0000000..bd13564 --- /dev/null +++ b/sbom/internal/rules.bzl @@ -0,0 +1,167 @@ +"""SBOM generation rule implementation. + +This module contains the main _sbom_impl rule that combines data from +the aspect (target dependencies) with metadata from the module extension +to generate SPDX and CycloneDX format SBOMs. +""" + +load(":aspect.bzl", "sbom_aspect") +load(":providers.bzl", "SbomDepsInfo") + +def _sbom_impl(ctx): + """Generates SBOM by combining aspect data with extension metadata. + + Args: + ctx: The rule context + + Returns: + DefaultInfo with generated SBOM files + """ + + # Collect all external repos used by targets + all_external_repos = depset(transitive = [ + target[SbomDepsInfo].external_repos + for target in ctx.attr.targets + ]) + + # Collect all transitive deps + all_transitive_deps = depset(transitive = [ + target[SbomDepsInfo].transitive_deps + for target in ctx.attr.targets + ]) + + # Get the metadata JSON file from the extension + metadata_file = ctx.file.metadata_json + + # Collect SBOM_METADATA.json files from additional repos (sbom_http_archive) + repo_metadata_files = [] + for f in ctx.files.repo_metadata: + repo_metadata_files.append(f) + + # Create input file with dependency info for Python generator + deps_json = ctx.actions.declare_file(ctx.attr.name + "_deps.json") + + # Build target labels list + target_labels = [str(t.label) for t in ctx.attr.targets] + + # Build exclude patterns list + exclude_patterns = ctx.attr.exclude_patterns + + # Include paths to repo metadata files + repo_metadata_paths = [f.path for f in repo_metadata_files] + + deps_data = { + "external_repos": all_external_repos.to_list(), + "transitive_deps": [str(d) for d in all_transitive_deps.to_list()], + "target_labels": target_labels, + "exclude_patterns": exclude_patterns, + "repo_metadata_files": repo_metadata_paths, + "config": { + "producer_name": ctx.attr.producer_name, + "producer_url": ctx.attr.producer_url, + "component_name": ctx.attr.component_name if ctx.attr.component_name else ctx.attr.name, + "component_version": ctx.attr.component_version, + "namespace": ctx.attr.namespace, + }, + } + + ctx.actions.write( + output = deps_json, + content = json.encode(deps_data), + ) + + # Declare outputs + outputs = [] + args = ctx.actions.args() + args.add("--input", deps_json) + args.add("--metadata", metadata_file) + + if "spdx" in ctx.attr.output_formats: + spdx_out = ctx.actions.declare_file(ctx.attr.name + ".spdx.json") + outputs.append(spdx_out) + args.add("--spdx-output", spdx_out) + + if "cyclonedx" in ctx.attr.output_formats: + cdx_out = ctx.actions.declare_file(ctx.attr.name + ".cdx.json") + outputs.append(cdx_out) + args.add("--cyclonedx-output", cdx_out) + + # Build inputs list + generator_inputs = [deps_json, metadata_file] + repo_metadata_files + + # Run Python generator + ctx.actions.run( + inputs = generator_inputs, + outputs = outputs, + executable = ctx.executable._generator, + arguments = [args], + mnemonic = "SbomGenerate", + progress_message = "Generating SBOM for %s" % ctx.attr.name, + ) + + return [DefaultInfo(files = depset(outputs))] + +sbom_rule = rule( + implementation = _sbom_impl, + attrs = { + "targets": attr.label_list( + mandatory = True, + aspects = [sbom_aspect], + doc = "Targets to generate SBOM for", + ), + "output_formats": attr.string_list( + default = ["spdx", "cyclonedx"], + doc = "Output formats: spdx, cyclonedx", + ), + "producer_name": attr.string( + default = "Eclipse Foundation", + doc = "SBOM producer organization name", + ), + "producer_url": attr.string( + default = "https://projects.eclipse.org/projects/automotive.score", + doc = "SBOM producer URL", + ), + "component_name": attr.string( + doc = "Component name (defaults to rule name)", + ), + "component_version": attr.string( + default = "", + doc = "Component version", + ), + "namespace": attr.string( + default = "https://eclipse.dev/score", + doc = "SBOM namespace URI", + ), + "exclude_patterns": attr.string_list( + default = [ + "rules_rust", + "rules_cc", + "bazel_tools", + "platforms", + "bazel_skylib", + "rules_python", + "rules_proto", + "protobuf", + "local_config_", + "remote_", + ], + doc = "External repo patterns to exclude (build tools)", + ), + "metadata_json": attr.label( + mandatory = True, + allow_single_file = [".json"], + doc = "Metadata JSON file from sbom_metadata extension", + ), + "repo_metadata": attr.label_list( + allow_files = [".json"], + default = [], + doc = "SBOM_METADATA.json files from sbom_http_archive repos", + ), + "_generator": attr.label( + default = "//sbom/internal/generator:sbom_generator", + executable = True, + cfg = "exec", + ), + }, + doc = "Generates SBOM for specified targets in SPDX and CycloneDX formats", +) diff --git a/sbom/repos.bzl b/sbom/repos.bzl new file mode 100644 index 0000000..5010ad0 --- /dev/null +++ b/sbom/repos.bzl @@ -0,0 +1,203 @@ +"""Drop-in replacements for standard repository rules with SBOM support. + +These are transparent wrappers - same names, same attributes, just add 'license'. + +Usage in MODULE.bazel: + # Just change one line at the top: + http_archive = use_repo_rule("@score_tooling//sbom:repos.bzl", "http_archive") + git_repository = use_repo_rule("@score_tooling//sbom:repos.bzl", "git_repository") + + # Then use exactly as before, just add license: + http_archive( + name = "boost", + urls = ["https://github.com/boostorg/boost/..."], + strip_prefix = "boost-1.87.0", + sha256 = "...", + license = "BSL-1.0", # ← Only addition + ) +""" + +# Re-export the repo rules for use with use_repo_rule +# These work as drop-in replacements for standard repo rules + +def _extract_version_from_url(url): + """Extract version from URL patterns.""" + if not url: + return None + + # Try common patterns + for sep in ["/v", "/archive/v", "/archive/", "/releases/download/v", "/releases/download/", "/tags/"]: + if sep in url: + rest = url.split(sep)[-1] + version = rest.split("/")[0].split(".tar")[0].split(".zip")[0].split("-")[0] + if version and len(version) > 0 and (version[0].isdigit() or version == "boost"): + # Handle boost-1.87.0 pattern + if "boost-" in url: + parts = url.split("boost-")[-1].split("-")[0].split(".tar")[0] + if parts[0].isdigit(): + return parts + return version.lstrip("v") + + return None + +def _generate_purl(url, name, version): + """Generate Package URL from download URL.""" + if not url: + return "pkg:generic/{}@{}".format(name, version or "unknown") + + v = version or "unknown" + + if "github.com" in url: + parts = url.split("github.com/") + if len(parts) > 1: + path_parts = parts[1].split("/") + if len(path_parts) >= 2: + owner = path_parts[0] + repo = path_parts[1].split(".")[0].split("/")[0] + return "pkg:github/{}/{}@{}".format(owner, repo, v) + + return "pkg:generic/{}@{}".format(name, v) + +def _http_archive_impl(rctx): + """Repository rule implementation for http_archive.""" + + # Download and extract + urls = rctx.attr.urls if rctx.attr.urls else [rctx.attr.url] if rctx.attr.url else [] + + download_kwargs = {} + if rctx.attr.sha256: + download_kwargs["sha256"] = rctx.attr.sha256 + if rctx.attr.integrity: + download_kwargs["integrity"] = rctx.attr.integrity + + rctx.download_and_extract( + url = urls, + stripPrefix = rctx.attr.strip_prefix if rctx.attr.strip_prefix else "", + **download_kwargs + ) + + # Handle build file + if rctx.attr.build_file: + rctx.delete("BUILD") + rctx.delete("BUILD.bazel") + rctx.symlink(rctx.attr.build_file, "BUILD.bazel") + elif rctx.attr.build_file_content: + rctx.delete("BUILD") + rctx.delete("BUILD.bazel") + rctx.file("BUILD.bazel", rctx.attr.build_file_content) + + # Apply patches + for patch in rctx.attr.patches: + rctx.patch(patch, strip = rctx.attr.patch_strip) + + # Generate SBOM metadata file + url = urls[0] if urls else "" + version = rctx.attr.version if rctx.attr.version else _extract_version_from_url(url) + purl = rctx.attr.purl if rctx.attr.purl else _generate_purl(url, rctx.name, version) + + sbom_metadata = { + "name": rctx.name, + "version": version or "unknown", + "url": url, + "purl": purl, + "license": rctx.attr.license if rctx.attr.license else "", + "sha256": rctx.attr.sha256 if rctx.attr.sha256 else "", + } + + rctx.file("SBOM_METADATA.json", json.encode(sbom_metadata)) + +http_archive = repository_rule( + implementation = _http_archive_impl, + attrs = { + # Standard http_archive attrs + "urls": attr.string_list(doc = "Download URLs"), + "url": attr.string(doc = "Single download URL"), + "sha256": attr.string(doc = "SHA256 checksum"), + "integrity": attr.string(doc = "Subresource integrity hash"), + "strip_prefix": attr.string(doc = "Directory prefix to strip"), + "build_file": attr.label(doc = "BUILD file to use"), + "build_file_content": attr.string(doc = "Content for BUILD file"), + "patches": attr.label_list(doc = "Patch files to apply"), + "patch_strip": attr.int(default = 0, doc = "Strip prefix for patches"), + # SBOM-specific attrs + "license": attr.string(doc = "SPDX license identifier"), + "version": attr.string(doc = "Version (auto-extracted from URL if not provided)"), + "purl": attr.string(doc = "Package URL (auto-generated if not provided)"), + }, + doc = "Drop-in replacement for http_archive with license field for SBOM", +) + +def _git_repository_impl(rctx): + """Repository rule implementation for git_repository.""" + + # Clone the repository + rctx.execute([ + "git", + "clone", + "--depth=1", + "--branch", + rctx.attr.tag if rctx.attr.tag else rctx.attr.branch if rctx.attr.branch else "HEAD", + rctx.attr.remote, + ".", + ]) + + if rctx.attr.commit: + rctx.execute(["git", "fetch", "--depth=1", "origin", rctx.attr.commit]) + rctx.execute(["git", "checkout", rctx.attr.commit]) + + # Handle build file + if rctx.attr.build_file: + rctx.delete("BUILD") + rctx.delete("BUILD.bazel") + rctx.symlink(rctx.attr.build_file, "BUILD.bazel") + elif rctx.attr.build_file_content: + rctx.delete("BUILD") + rctx.delete("BUILD.bazel") + rctx.file("BUILD.bazel", rctx.attr.build_file_content) + + # Generate version + version = rctx.attr.tag if rctx.attr.tag else (rctx.attr.commit[:12] if rctx.attr.commit else "unknown") + + # Generate PURL + purl = rctx.attr.purl + if not purl: + remote = rctx.attr.remote + if "github.com" in remote: + if "github.com:" in remote: + path = remote.split("github.com:")[-1] + else: + path = remote.split("github.com/")[-1] + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + purl = "pkg:github/{}/{}@{}".format(parts[0], parts[1], version) + if not purl: + purl = "pkg:generic/{}@{}".format(rctx.name, version) + + sbom_metadata = { + "name": rctx.name, + "version": version, + "remote": rctx.attr.remote, + "commit": rctx.attr.commit if rctx.attr.commit else "", + "tag": rctx.attr.tag if rctx.attr.tag else "", + "purl": purl, + "license": rctx.attr.license if rctx.attr.license else "", + } + + rctx.file("SBOM_METADATA.json", json.encode(sbom_metadata)) + +git_repository = repository_rule( + implementation = _git_repository_impl, + attrs = { + # Standard git_repository attrs + "remote": attr.string(mandatory = True, doc = "Git remote URL"), + "commit": attr.string(doc = "Git commit hash"), + "tag": attr.string(doc = "Git tag"), + "branch": attr.string(doc = "Git branch"), + "build_file": attr.label(doc = "BUILD file to use"), + "build_file_content": attr.string(doc = "Content for BUILD file"), + # SBOM-specific attrs + "license": attr.string(doc = "SPDX license identifier"), + "purl": attr.string(doc = "Package URL (auto-generated if not provided)"), + }, + doc = "Drop-in replacement for git_repository with license field for SBOM", +) diff --git a/sbom/repository_rules.bzl b/sbom/repository_rules.bzl new file mode 100644 index 0000000..9131837 --- /dev/null +++ b/sbom/repository_rules.bzl @@ -0,0 +1,288 @@ +"""SBOM-aware repository rules. + +These wrapper macros extend standard Bazel repository rules with SBOM metadata fields. +Use these instead of the standard rules to automatically include dependencies in SBOM generation. + +Example usage in MODULE.bazel: + load("@score_tooling//sbom:repository_rules.bzl", "sbom_http_archive", "sbom_git_repository") + + sbom_http_archive( + name = "vsomeip", + urls = ["https://github.com/COVESA/vsomeip/archive/3.6.0.tar.gz"], + strip_prefix = "vsomeip-3.6.0", + # SBOM fields: + sbom_license = "MPL-2.0", + sbom_purl = "pkg:github/COVESA/vsomeip@3.6.0", + ) +""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") + +# Global registry to collect SBOM metadata during module loading +# This will be read by the module extension +_SBOM_REGISTRY = {} + +def _register_sbom_metadata(name, **kwargs): + """Register SBOM metadata for a dependency.""" + _SBOM_REGISTRY[name] = kwargs + +def get_sbom_registry(): + """Get the SBOM metadata registry.""" + return _SBOM_REGISTRY + +def sbom_http_archive( + name, + urls = None, + url = None, + strip_prefix = None, + sha256 = None, + build_file = None, + build_file_content = None, + patches = None, + patch_args = None, + patch_cmds = None, + # SBOM metadata fields + sbom_license = None, + sbom_purl = None, + sbom_version = None, + sbom_supplier = None, + sbom_description = None, + **kwargs): + """http_archive wrapper with SBOM metadata fields. + + Args: + name: Repository name + urls: List of URLs to download from + url: Single URL (alternative to urls) + strip_prefix: Directory prefix to strip + sha256: Expected SHA256 hash + build_file: BUILD file to use + build_file_content: Content for BUILD file + patches: Patch files to apply + patch_args: Arguments for patch command + patch_cmds: Shell commands to run after patching + + # SBOM fields: + sbom_license: SPDX license identifier (e.g., "MIT", "Apache-2.0", "MPL-2.0") + sbom_purl: Package URL (e.g., "pkg:github/owner/repo@version") + sbom_version: Version string (auto-extracted from URL if not provided) + sbom_supplier: Supplier/vendor name + sbom_description: Description of the component + + **kwargs: Additional arguments passed to http_archive + """ + # Extract version from URL if not provided + version = sbom_version + if not version and urls: + version = _extract_version_from_url(urls[0] if urls else url) + elif not version and url: + version = _extract_version_from_url(url) + + # Generate PURL if not provided + purl = sbom_purl + if not purl: + purl = _generate_purl_from_url(urls[0] if urls else url, name, version) + + # Register SBOM metadata + _register_sbom_metadata( + name, + type = "http_archive", + version = version or "unknown", + license = sbom_license or "", + purl = purl or f"pkg:generic/{name}@{version or 'unknown'}", + supplier = sbom_supplier or "", + description = sbom_description or "", + url = (urls[0] if urls else url) or "", + ) + + # Call the actual http_archive + http_archive( + name = name, + urls = urls, + url = url, + strip_prefix = strip_prefix, + sha256 = sha256, + build_file = build_file, + build_file_content = build_file_content, + patches = patches, + patch_args = patch_args, + patch_cmds = patch_cmds, + **kwargs + ) + +def sbom_git_repository( + name, + remote, + commit = None, + tag = None, + branch = None, + build_file = None, + build_file_content = None, + patches = None, + patch_args = None, + patch_cmds = None, + # SBOM metadata fields + sbom_license = None, + sbom_purl = None, + sbom_version = None, + sbom_supplier = None, + sbom_description = None, + **kwargs): + """git_repository wrapper with SBOM metadata fields. + + Args: + name: Repository name + remote: Git remote URL + commit: Git commit hash + tag: Git tag + branch: Git branch + build_file: BUILD file to use + build_file_content: Content for BUILD file + patches: Patch files to apply + patch_args: Arguments for patch command + patch_cmds: Shell commands to run after patching + + # SBOM fields: + sbom_license: SPDX license identifier + sbom_purl: Package URL + sbom_version: Version string (defaults to tag or commit) + sbom_supplier: Supplier/vendor name + sbom_description: Description of the component + + **kwargs: Additional arguments passed to git_repository + """ + # Determine version + version = sbom_version or tag or (commit[:12] if commit else "unknown") + + # Generate PURL if not provided + purl = sbom_purl + if not purl: + purl = _generate_purl_from_git(remote, name, version) + + # Register SBOM metadata + _register_sbom_metadata( + name, + type = "git_repository", + version = version, + license = sbom_license or "", + purl = purl or f"pkg:generic/{name}@{version}", + supplier = sbom_supplier or "", + description = sbom_description or "", + url = remote, + commit = commit or "", + tag = tag or "", + ) + + # Call the actual git_repository + git_repository( + name = name, + remote = remote, + commit = commit, + tag = tag, + branch = branch, + build_file = build_file, + build_file_content = build_file_content, + patches = patches, + patch_args = patch_args, + patch_cmds = patch_cmds, + **kwargs + ) + +def _extract_version_from_url(url): + """Extract version from URL patterns like /v1.2.3/ or -1.2.3.tar.gz""" + if not url: + return None + + # Common patterns + import_patterns = [ + # GitHub release: /v1.2.3/ or /1.2.3/ + "/v", + "/archive/v", + "/archive/", + # Version in filename: name-1.2.3.tar.gz + "-", + ] + + # Try to find version pattern + for sep in ["/v", "/archive/v", "/archive/"]: + if sep in url: + rest = url.split(sep)[-1] + # Extract until next / or .tar or .zip + version = rest.split("/")[0].split(".tar")[0].split(".zip")[0] + if version and version[0].isdigit(): + return version + + # Try filename pattern: name-version.tar.gz + filename = url.split("/")[-1] + if "-" in filename: + parts = filename.rsplit("-", 1) + if len(parts) == 2: + version = parts[1].split(".tar")[0].split(".zip")[0] + if version and version[0].isdigit(): + return version + + return None + +def _generate_purl_from_url(url, name, version): + """Generate Package URL from download URL.""" + if not url: + return None + + version_str = version or "unknown" + + # GitHub + if "github.com" in url: + parts = url.split("github.com/")[-1].split("/") + if len(parts) >= 2: + owner = parts[0] + repo = parts[1].split(".")[0] # Remove .git if present + return f"pkg:github/{owner}/{repo}@{version_str}" + + # GitLab + if "gitlab.com" in url: + parts = url.split("gitlab.com/")[-1].split("/") + if len(parts) >= 2: + owner = parts[0] + repo = parts[1].split(".")[0] + return f"pkg:gitlab/{owner}/{repo}@{version_str}" + + # Generic + return f"pkg:generic/{name}@{version_str}" + +def _generate_purl_from_git(remote, name, version): + """Generate Package URL from git remote.""" + if not remote: + return None + + version_str = version or "unknown" + + # GitHub + if "github.com" in remote: + # Handle both https and ssh URLs + if "github.com:" in remote: + path = remote.split("github.com:")[-1] + else: + path = remote.split("github.com/")[-1] + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + owner = parts[0] + repo = parts[1] + return f"pkg:github/{owner}/{repo}@{version_str}" + + # GitLab + if "gitlab" in remote: + if "gitlab.com:" in remote: + path = remote.split("gitlab.com:")[-1] + elif "gitlab.com/" in remote: + path = remote.split("gitlab.com/")[-1] + else: + path = remote.split("/")[-2:] + path = "/".join(path) + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + owner = parts[0] + repo = parts[1] + return f"pkg:gitlab/{owner}/{repo}@{version_str}" + + return f"pkg:generic/{name}@{version_str}" diff --git a/sbom/tests/BUILD b/sbom/tests/BUILD new file mode 100644 index 0000000..f00ecdb --- /dev/null +++ b/sbom/tests/BUILD @@ -0,0 +1,25 @@ +# SBOM Tests Package +# +# This package contains tests for the SBOM generation system. + +load("@rules_python//python:defs.bzl", "py_test") + +package(default_visibility = ["//visibility:private"]) + +py_test( + name = "test_purl", + srcs = ["test_purl.py"], + deps = ["//sbom/internal/generator:purl"], +) + +py_test( + name = "test_spdx_formatter", + srcs = ["test_spdx_formatter.py"], + deps = ["//sbom/internal/generator:spdx_formatter"], +) + +py_test( + name = "test_cyclonedx_formatter", + srcs = ["test_cyclonedx_formatter.py"], + deps = ["//sbom/internal/generator:cyclonedx_formatter"], +) diff --git a/sbom/tests/__init__.py b/sbom/tests/__init__.py new file mode 100644 index 0000000..b82b623 --- /dev/null +++ b/sbom/tests/__init__.py @@ -0,0 +1 @@ +"""SBOM tests package.""" diff --git a/sbom/tests/compare_sbom_vs_query.sh b/sbom/tests/compare_sbom_vs_query.sh new file mode 100755 index 0000000..d218d5d --- /dev/null +++ b/sbom/tests/compare_sbom_vs_query.sh @@ -0,0 +1,297 @@ +#!/usr/bin/env bash +# SBOM vs Bazel Query comparison test. +# +# Compares the external dependencies discovered by 'bazel query' against the +# components listed in a generated SBOM (.spdx.json), to validate that the +# SBOM aspect is capturing the same dependency set as Bazel's own query engine. +# +# Usage (run from your workspace root, e.g. reference_integration/): +# +# bash ../tooling/sbom/tests/compare_sbom_vs_query.sh \ +# //:sbom_kyron \ +# //feature_showcase/rust:kyron_example +# +# # Multiple build targets: +# bash ../tooling/sbom/tests/compare_sbom_vs_query.sh \ +# //:sbom_all \ +# //feature_showcase/rust:orch_per_example \ +# //feature_showcase/rust:kyron_example + +set -euo pipefail + +# --------------------------------------------------------------------------- +# Args +# --------------------------------------------------------------------------- +if [ $# -lt 2 ]; then + echo "Usage: $0 [ ...]" + echo "" + echo " sbom_target - The SBOM Bazel target (e.g. //:sbom_kyron)" + echo " build_target - One or more build targets the SBOM covers" + exit 1 +fi + +SBOM_TARGET="$1" +shift +BUILD_TARGETS=("$@") + +# Derive the SBOM target name for locating the .spdx.json output. +# //:sbom_kyron -> sbom_kyron +SBOM_NAME="${SBOM_TARGET##*:}" + +# --------------------------------------------------------------------------- +# Default exclude patterns (must match tooling/sbom/defs.bzl) +# --------------------------------------------------------------------------- +EXCLUDE_PATTERNS=( + "rules_rust" + "rules_cc" + "bazel_tools" + "platforms" + "bazel_skylib" + "rules_python" + "rules_proto" + "protobuf" + "local_config_" + "remote_" +) + +# --------------------------------------------------------------------------- +# Helper: should a repo name be excluded? +# --------------------------------------------------------------------------- +should_exclude() { + local repo="$1" + # Always keep crate_universe repos + if [[ "$repo" == *"crate_index__"* ]] || \ + [[ "$repo" == *"crates_io__"* ]] || \ + [[ "$repo" == *"_crates__"* ]]; then + return 1 # keep + fi + for pattern in "${EXCLUDE_PATTERNS[@]}"; do + if [[ "$repo" == *"$pattern"* ]]; then + return 0 # exclude + fi + done + return 1 # keep +} + +# --------------------------------------------------------------------------- +# Helper: normalize a raw repo name to the component name that the SBOM +# generator would produce. Mirrors sbom_generator.py resolve_component(). +# --------------------------------------------------------------------------- +normalize_to_component_name() { + local repo="$1" + + # Strip bzlmod "+" suffix + repo="${repo%+}" + + # crate_universe (bzlmod): rules_rust++crate+crate_index__serde-1.0.228 + if [[ "$repo" == *"crate_index__"* ]]; then + local crate_part="${repo##*crate_index__}" + # Extract name: everything before the first segment that starts with a digit + # e.g. "serde-1.0.228" -> "serde", "iceoryx2-qnx8-0.7.0" -> "iceoryx2-qnx8" + local IFS='-' + read -ra parts <<< "$crate_part" + local name_parts=() + for p in "${parts[@]}"; do + if [[ "$p" =~ ^[0-9] ]]; then + break + fi + name_parts+=("$p") + done + local crate_name + crate_name="$(IFS='-'; echo "${name_parts[*]}")" + # Underscores -> hyphens (Rust convention) + crate_name="${crate_name//_/-}" + echo "$crate_name" + return + fi + + # Legacy crate format: crates_io__tokio-1.10.0 + if [[ "$repo" == crates_io__* ]] || [[ "$repo" == *"_crates__"* ]]; then + local crate_info="${repo##*__}" + # Name is everything before the last hyphen-digit segment + local crate_name="${crate_info%-*}" + crate_name="${crate_name//_/-}" + echo "$crate_name" + return + fi + + # Everything else: plain repo name + echo "$repo" +} + +# --------------------------------------------------------------------------- +# Step 1: Run bazel query to discover all external deps +# --------------------------------------------------------------------------- +echo "=== Step 1: Running bazel query for external deps ===" + +# Build the union query for multiple targets +if [ ${#BUILD_TARGETS[@]} -eq 1 ]; then + QUERY_EXPR="deps(${BUILD_TARGETS[0]})" +else + # Union of deps for all targets + PARTS="" + for t in "${BUILD_TARGETS[@]}"; do + if [ -n "$PARTS" ]; then + PARTS="$PARTS + deps($t)" + else + PARTS="deps($t)" + fi + done + QUERY_EXPR="$PARTS" +fi + +echo " Query: $QUERY_EXPR" +echo "" + +# Get all packages, extract external repo names (lines starting with @) +QUERY_RAW=$(bazel query "$QUERY_EXPR" --output=package 2>/dev/null || true) + +# Extract unique repo names from @repo//package lines +QUERY_REPOS_RAW=$(echo "$QUERY_RAW" | grep '^@' | sed 's|@\([^/]*\)//.*|\1|' | sort -u) + +# Filter and normalize +declare -A QUERY_COMPONENTS +QUERY_EXCLUDED=() + +while IFS= read -r repo; do + [ -z "$repo" ] && continue + if should_exclude "$repo"; then + QUERY_EXCLUDED+=("$repo") + continue + fi + comp_name=$(normalize_to_component_name "$repo") + QUERY_COMPONENTS["$comp_name"]=1 +done <<< "$QUERY_REPOS_RAW" + +echo " Found ${#QUERY_COMPONENTS[@]} external components (after filtering)" +echo " Excluded ${#QUERY_EXCLUDED[@]} build-tool repos" +echo "" + +# --------------------------------------------------------------------------- +# Step 2: Build SBOM and extract components from .spdx.json +# --------------------------------------------------------------------------- +echo "=== Step 2: Building SBOM target ===" +bazel build "$SBOM_TARGET" 2>&1 | tail -5 +echo "" + +SPDX_FILE="$(bazel info bazel-bin 2>/dev/null)/${SBOM_NAME}.spdx.json" + +if [ ! -f "$SPDX_FILE" ]; then + echo "ERROR: SPDX file not found at $SPDX_FILE" + exit 1 +fi + +echo " Parsing $SPDX_FILE" + +# Extract component names from SPDX packages (skip the root package which +# has SPDXID "SPDXRef-RootPackage") +SBOM_NAMES=$(python3 -c " +import json, sys +with open('$SPDX_FILE') as f: + data = json.load(f) +for pkg in data.get('packages', []): + spdx_id = pkg.get('SPDXID', '') + if spdx_id == 'SPDXRef-RootPackage': + continue + print(pkg['name']) +" | sort -u) + +declare -A SBOM_COMPONENTS +while IFS= read -r name; do + [ -z "$name" ] && continue + SBOM_COMPONENTS["$name"]=1 +done <<< "$SBOM_NAMES" + +echo " Found ${#SBOM_COMPONENTS[@]} components in SBOM" +echo "" + +# --------------------------------------------------------------------------- +# Step 3: Compare +# --------------------------------------------------------------------------- +echo "=== Step 3: Comparison ===" +echo "" + +MATCH=() +ONLY_QUERY=() +ONLY_SBOM=() + +# Check query components against SBOM +for comp in $(echo "${!QUERY_COMPONENTS[@]}" | tr ' ' '\n' | sort); do + if [[ -n "${SBOM_COMPONENTS[$comp]+x}" ]]; then + MATCH+=("$comp") + else + ONLY_QUERY+=("$comp") + fi +done + +# Check SBOM components not in query +for comp in $(echo "${!SBOM_COMPONENTS[@]}" | tr ' ' '\n' | sort); do + if [[ -z "${QUERY_COMPONENTS[$comp]+x}" ]]; then + ONLY_SBOM+=("$comp") + fi +done + +# --------------------------------------------------------------------------- +# Report +# --------------------------------------------------------------------------- +echo "--- MATCHING (${#MATCH[@]}) ---" +for c in "${MATCH[@]}"; do + echo " [OK] $c" +done +echo "" + +if [ ${#ONLY_QUERY[@]} -gt 0 ]; then + echo "--- IN QUERY BUT NOT IN SBOM (${#ONLY_QUERY[@]}) ---" + echo " (These deps were found by bazel query but are missing from SBOM)" + for c in "${ONLY_QUERY[@]}"; do + echo " [MISSING] $c" + done + echo "" +fi + +if [ ${#ONLY_SBOM[@]} -gt 0 ]; then + echo "--- IN SBOM BUT NOT IN QUERY (${#ONLY_SBOM[@]}) ---" + echo " (These appear in SBOM but were not found by bazel query)" + for c in "${ONLY_SBOM[@]}"; do + echo " [EXTRA] $c" + done + echo "" +fi + +echo "--- EXCLUDED BUILD-TOOL REPOS (${#QUERY_EXCLUDED[@]}) ---" +for c in "${QUERY_EXCLUDED[@]}"; do + echo " [SKIP] $c" +done +echo "" + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +echo "=========================================" +echo " SUMMARY" +echo "=========================================" +echo " Matching: ${#MATCH[@]}" +echo " In query only: ${#ONLY_QUERY[@]} (missing from SBOM)" +echo " In SBOM only: ${#ONLY_SBOM[@]} (extra in SBOM)" +echo " Excluded: ${#QUERY_EXCLUDED[@]} (build tools filtered)" +echo "=========================================" + +if [ ${#ONLY_QUERY[@]} -eq 0 ] && [ ${#ONLY_SBOM[@]} -eq 0 ]; then + echo " RESULT: PASS - SBOM matches bazel query" + exit 0 +else + echo " RESULT: DIFFERENCES FOUND" + echo "" + echo " Notes:" + echo " - 'In query only' items may indicate the SBOM aspect is not" + echo " traversing certain dependency attributes, OR that bazel query" + echo " includes deps (e.g. toolchain, exec config) that the aspect" + echo " intentionally skips." + echo " - 'In SBOM only' items may indicate the aspect discovers deps" + echo " through attributes that bazel query --output=package does not" + echo " surface the same way." + echo " - Some differences are expected: bazel query deps() includes" + echo " toolchain and platform deps that the SBOM aspect skips, and" + echo " the aspect may normalize names differently." + exit 1 +fi diff --git a/sbom/tests/test_cyclonedx_formatter.py b/sbom/tests/test_cyclonedx_formatter.py new file mode 100644 index 0000000..1af33b3 --- /dev/null +++ b/sbom/tests/test_cyclonedx_formatter.py @@ -0,0 +1,142 @@ +"""Tests for CycloneDX 1.6 formatter.""" + +import unittest +from datetime import datetime, timezone + +from sbom.internal.generator.cyclonedx_formatter import generate_cyclonedx + + +class TestCycloneDXFormatter(unittest.TestCase): + """Tests for CycloneDX 1.6 generation.""" + + def setUp(self): + """Set up test fixtures.""" + self.timestamp = datetime( + 2024, 1, 15, 12, 0, 0, tzinfo=timezone.utc + ).isoformat() + self.config = { + "component_name": "test-component", + "component_version": "1.0.0", + "producer_name": "Eclipse Foundation", + "producer_url": "https://eclipse.dev/score", + "namespace": "https://eclipse.dev/score", + } + self.components = [ + { + "name": "tokio", + "version": "1.10.0", + "purl": "pkg:cargo/tokio@1.10.0", + "type": "library", + "license": "MIT", + "source": "crates.io", + }, + { + "name": "serde", + "version": "1.0.0", + "purl": "pkg:cargo/serde@1.0.0", + "type": "library", + "license": "MIT OR Apache-2.0", + "source": "crates.io", + }, + ] + + def test_generate_cyclonedx_structure(self): + """Test that generated CycloneDX has correct structure.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + self.assertEqual(cdx["bomFormat"], "CycloneDX") + self.assertEqual(cdx["specVersion"], "1.6") + self.assertIn("serialNumber", cdx) + self.assertTrue(cdx["serialNumber"].startswith("urn:uuid:")) + self.assertEqual(cdx["version"], 1) + + def test_generate_cyclonedx_metadata(self): + """Test that CycloneDX metadata is correct.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + metadata = cdx["metadata"] + self.assertEqual(metadata["timestamp"], self.timestamp) + self.assertIn("tools", metadata) + self.assertIn("component", metadata) + + root_component = metadata["component"] + self.assertEqual(root_component["name"], "test-component") + self.assertEqual(root_component["version"], "1.0.0") + self.assertEqual(root_component["type"], "application") + + def test_generate_cyclonedx_components(self): + """Test that components are properly added.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + components = cdx["components"] + self.assertEqual(len(components), 2) + + component_names = {c["name"] for c in components} + self.assertEqual(component_names, {"tokio", "serde"}) + + def test_generate_cyclonedx_component_details(self): + """Test that component details are correct.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + tokio = next(c for c in cdx["components"] if c["name"] == "tokio") + + self.assertEqual(tokio["version"], "1.10.0") + self.assertEqual(tokio["type"], "library") + self.assertEqual(tokio["purl"], "pkg:cargo/tokio@1.10.0") + self.assertIn("bom-ref", tokio) + + def test_generate_cyclonedx_licenses(self): + """Test that licenses are properly set.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + tokio = next(c for c in cdx["components"] if c["name"] == "tokio") + + self.assertIn("licenses", tokio) + self.assertEqual(len(tokio["licenses"]), 1) + self.assertEqual(tokio["licenses"][0]["license"]["id"], "MIT") + + def test_generate_cyclonedx_dependencies(self): + """Test that dependencies are created.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + dependencies = cdx["dependencies"] + + # Should have root + 2 component dependency entries + self.assertEqual(len(dependencies), 3) + + # Find root dependency + root_dep = next(d for d in dependencies if d["ref"] == "test-component@1.0.0") + self.assertEqual(len(root_dep["dependsOn"]), 2) + + def test_generate_cyclonedx_external_references(self): + """Test that external references are added for crates.io sources.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + tokio = next(c for c in cdx["components"] if c["name"] == "tokio") + + self.assertIn("externalReferences", tokio) + ext_refs = tokio["externalReferences"] + + distribution_ref = next( + (r for r in ext_refs if r["type"] == "distribution"), None + ) + self.assertIsNotNone(distribution_ref) + self.assertIn("crates.io", distribution_ref["url"]) + + def test_generate_cyclonedx_with_empty_components(self): + """Test generating CycloneDX with no components.""" + cdx = generate_cyclonedx([], self.config, self.timestamp) + + self.assertEqual(len(cdx["components"]), 0) + self.assertEqual(len(cdx["dependencies"]), 1) # Just root + + def test_generate_cyclonedx_bom_refs_unique(self): + """Test that bom-refs are unique across components.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + bom_refs = [c["bom-ref"] for c in cdx["components"]] + self.assertEqual(len(bom_refs), len(set(bom_refs))) + + +if __name__ == "__main__": + unittest.main() diff --git a/sbom/tests/test_purl.py b/sbom/tests/test_purl.py new file mode 100644 index 0000000..26dbe5a --- /dev/null +++ b/sbom/tests/test_purl.py @@ -0,0 +1,109 @@ +"""Tests for PURL generation utilities.""" + +import unittest + +from sbom.internal.generator.purl import ( + generate_cargo_purl, + generate_generic_purl, + generate_github_purl, + generate_bazel_module_purl, + parse_purl, + validate_purl, +) + + +class TestPurlGeneration(unittest.TestCase): + """Tests for PURL generation functions.""" + + def test_generate_cargo_purl(self): + """Test generating Cargo crate PURLs.""" + purl = generate_cargo_purl("tokio", "1.10.0") + self.assertEqual(purl, "pkg:cargo/tokio@1.10.0") + + def test_generate_cargo_purl_with_underscore(self): + """Test that underscores are converted to hyphens for Cargo crates.""" + purl = generate_cargo_purl("tracing_subscriber", "0.3.18") + self.assertEqual(purl, "pkg:cargo/tracing-subscriber@0.3.18") + + def test_generate_github_purl(self): + """Test generating GitHub PURLs.""" + purl = generate_github_purl("eclipse-iceoryx", "iceoryx2", "0.5.0") + self.assertEqual(purl, "pkg:github/eclipse-iceoryx/iceoryx2@0.5.0") + + def test_generate_github_purl_with_commit(self): + """Test generating GitHub PURLs with commit SHA.""" + purl = generate_github_purl( + "eclipse-iceoryx", + "iceoryx2", + "main", + commit="d3d1c9a727d3dc405733081923be5dba8213d6d8", + ) + self.assertIn("vcs_commit=d3d1c9a727d3dc405733081923be5dba8213d6d8", purl) + + def test_generate_bazel_module_purl(self): + """Test generating Bazel module PURLs.""" + purl = generate_bazel_module_purl("rules_rust", "0.67.0") + self.assertEqual(purl, "pkg:bazel/rules_rust@0.67.0") + + def test_generate_generic_purl(self): + """Test generating generic PURLs.""" + purl = generate_generic_purl("boost", "1.87.0") + self.assertEqual(purl, "pkg:generic/boost@1.87.0") + + def test_generate_generic_purl_with_qualifiers(self): + """Test generating generic PURLs with qualifiers.""" + purl = generate_generic_purl( + "boost", + "1.87.0", + qualifiers={"download_url": "https://example.com/boost.tar.gz"}, + ) + self.assertIn("download_url=", purl) + + +class TestPurlParsing(unittest.TestCase): + """Tests for PURL parsing functions.""" + + def test_parse_cargo_purl(self): + """Test parsing a Cargo PURL.""" + parsed = parse_purl("pkg:cargo/tokio@1.10.0") + self.assertEqual(parsed["type"], "cargo") + self.assertEqual(parsed["name"], "tokio") + self.assertEqual(parsed["version"], "1.10.0") + + def test_parse_github_purl(self): + """Test parsing a GitHub PURL.""" + parsed = parse_purl("pkg:github/eclipse-score/orchestrator@0.5.0") + self.assertEqual(parsed["type"], "github") + self.assertEqual(parsed["namespace"], "eclipse-score") + self.assertEqual(parsed["name"], "orchestrator") + self.assertEqual(parsed["version"], "0.5.0") + + def test_parse_purl_with_qualifiers(self): + """Test parsing a PURL with qualifiers.""" + parsed = parse_purl("pkg:cargo/tokio@1.10.0?features=full") + self.assertEqual(parsed["qualifiers"], {"features": "full"}) + + def test_parse_purl_with_subpath(self): + """Test parsing a PURL with subpath.""" + parsed = parse_purl("pkg:github/org/repo@1.0.0#src/lib") + self.assertEqual(parsed["subpath"], "src/lib") + + +class TestPurlValidation(unittest.TestCase): + """Tests for PURL validation.""" + + def test_valid_purl(self): + """Test validating a valid PURL.""" + self.assertTrue(validate_purl("pkg:cargo/tokio@1.10.0")) + + def test_invalid_purl_no_prefix(self): + """Test that PURLs without pkg: prefix are invalid.""" + self.assertFalse(validate_purl("cargo/tokio@1.10.0")) + + def test_invalid_purl_no_type(self): + """Test that PURLs without type are invalid.""" + self.assertFalse(validate_purl("pkg:/tokio@1.10.0")) + + +if __name__ == "__main__": + unittest.main() diff --git a/sbom/tests/test_spdx_formatter.py b/sbom/tests/test_spdx_formatter.py new file mode 100644 index 0000000..2a0af5a --- /dev/null +++ b/sbom/tests/test_spdx_formatter.py @@ -0,0 +1,109 @@ +"""Tests for SPDX 2.3 formatter.""" + +import unittest +from datetime import datetime, timezone + +from sbom.internal.generator.spdx_formatter import generate_spdx + + +class TestSpdxFormatter(unittest.TestCase): + """Tests for SPDX 2.3 generation.""" + + def setUp(self): + """Set up test fixtures.""" + self.timestamp = datetime( + 2024, 1, 15, 12, 0, 0, tzinfo=timezone.utc + ).isoformat() + self.config = { + "component_name": "test-component", + "component_version": "1.0.0", + "producer_name": "Eclipse Foundation", + "producer_url": "https://eclipse.dev/score", + "namespace": "https://eclipse.dev/score", + } + self.components = [ + { + "name": "tokio", + "version": "1.10.0", + "purl": "pkg:cargo/tokio@1.10.0", + "type": "library", + "license": "MIT", + }, + { + "name": "serde", + "version": "1.0.0", + "purl": "pkg:cargo/serde@1.0.0", + "type": "library", + "license": "MIT OR Apache-2.0", + }, + ] + + def test_generate_spdx_structure(self): + """Test that generated SPDX has correct structure.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + self.assertEqual(spdx["spdxVersion"], "SPDX-2.3") + self.assertEqual(spdx["dataLicense"], "CC0-1.0") + self.assertEqual(spdx["SPDXID"], "SPDXRef-DOCUMENT") + self.assertIn("documentNamespace", spdx) + self.assertIn("packages", spdx) + self.assertIn("relationships", spdx) + + def test_generate_spdx_document_info(self): + """Test that SPDX document has correct metadata.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + self.assertEqual(spdx["name"], "SBOM for test-component") + creation_info = spdx["creationInfo"] + self.assertEqual(creation_info["created"], self.timestamp) + creators = creation_info["creators"] + self.assertIn("Organization: Eclipse Foundation", creators) + self.assertIn("Tool: score-sbom-generator", creators) + + def test_generate_spdx_components(self): + """Test that components are properly added to SPDX.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + packages = spdx["packages"] + # root package + 2 components + self.assertEqual(len(packages), 3) + + def test_generate_spdx_relationships(self): + """Test that dependency relationships are created.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + relationships = spdx["relationships"] + # DESCRIBES + 2 DEPENDS_ON + describes = [r for r in relationships if r["relationshipType"] == "DESCRIBES"] + depends_on = [r for r in relationships if r["relationshipType"] == "DEPENDS_ON"] + + self.assertEqual(len(describes), 1) + self.assertEqual(len(depends_on), 2) + + def test_generate_spdx_with_empty_components(self): + """Test generating SPDX with no components.""" + spdx = generate_spdx([], self.config, self.timestamp) + + packages = spdx["packages"] + # Only root package + self.assertEqual(len(packages), 1) + + def test_generate_spdx_component_purl(self): + """Test that component PURLs are properly set.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + packages = spdx["packages"] + tokio_pkg = next((p for p in packages if p["name"] == "tokio"), None) + + self.assertIsNotNone(tokio_pkg) + ext_refs = tokio_pkg.get("externalRefs", []) + purl_ref = next( + (r for r in ext_refs if r.get("referenceType") == "purl"), + None, + ) + self.assertIsNotNone(purl_ref) + self.assertEqual(purl_ref["referenceLocator"], "pkg:cargo/tokio@1.10.0") + + +if __name__ == "__main__": + unittest.main() From ffca1f1423153419fcc3a797d4d2488083b0f568 Mon Sep 17 00:00:00 2001 From: Lukasz Juranek Date: Fri, 6 Feb 2026 18:24:02 +0100 Subject: [PATCH 2/2] Add sbom generation tooling cargo lock integration for rust and cdxgen (#2232) --- sbom/BUILD.bazel | 2 + sbom/SBOM_Implementation_Approach_SCORE.md | 161 ---- sbom/SBOM_Readme.md | 144 +++- sbom/cpp_metadata.json | 55 ++ sbom/crates_licenses_common.json | 78 ++ sbom/crates_metadata.json | 806 ++++++++++++++++++ sbom/defs.bzl | 12 +- sbom/extensions.bzl | 118 +-- sbom/internal/generator/BUILD | 4 + .../internal/generator/cyclonedx_formatter.py | 10 + sbom/internal/generator/sbom_generator.py | 238 +++++- sbom/scripts/generate_cpp_metadata_cache.py | 112 +++ .../scripts/generate_crates_metadata_cache.py | 207 +++++ 13 files changed, 1600 insertions(+), 347 deletions(-) delete mode 100644 sbom/SBOM_Implementation_Approach_SCORE.md create mode 100644 sbom/cpp_metadata.json create mode 100644 sbom/crates_licenses_common.json create mode 100644 sbom/crates_metadata.json create mode 100644 sbom/scripts/generate_cpp_metadata_cache.py create mode 100755 sbom/scripts/generate_crates_metadata_cache.py diff --git a/sbom/BUILD.bazel b/sbom/BUILD.bazel index 790e22d..fb677a7 100644 --- a/sbom/BUILD.bazel +++ b/sbom/BUILD.bazel @@ -16,6 +16,8 @@ exports_files([ "extensions.bzl", "repos.bzl", "repository_rules.bzl", + "crates_metadata.json", + "cpp_metadata.json", ]) # Filegroup for all SBOM-related bzl files diff --git a/sbom/SBOM_Implementation_Approach_SCORE.md b/sbom/SBOM_Implementation_Approach_SCORE.md deleted file mode 100644 index 5f5648b..0000000 --- a/sbom/SBOM_Implementation_Approach_SCORE.md +++ /dev/null @@ -1,161 +0,0 @@ -# Detailed SBOM Implementation Approach for Eclipse SCORE - -## Executive Summary - -This proposal addresses the existing backlog items ([#2144](https://github.com/eclipse-score/score/issues/2144), [#2232](https://github.com/eclipse-score/score/issues/2232), [#2060](https://github.com/eclipse-score/score/issues/2060), [#2103](https://github.com/eclipse-score/score/issues/2103)) and provides a comprehensive implementation roadmap for SBOM generation in Eclipse SCORE. - ---- - -## High-Level Architecture - - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ SCORE SBOM ARCHITECTURE │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ -│ │ Rust │ │ C++ │ │ Bazel │ │ -│ │ Cargo.toml │ │ http_archive│ │ MODULE.bazel│ │ -│ │ (metadata) │ │ git_override│ │ (bazel_dep) │ │ -│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ -│ │ │ │ │ -│ ▼ ▼ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ SBOM GENERATOR MODULE │ │ -│ │ ┌──────────────────────┐ ┌──────────────────────┐ │ │ -│ │ │ Bazel Aspect │ │ Metadata Extension │ │ │ -│ │ │ (dep graph traversal│ │ (license/supplier │ │ │ -│ │ │ via sbom_aspect) │ │ from MODULE.bazel)│ │ │ -│ │ └──────────────────────┘ └──────────────────────┘ │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌─────────────────┴─────────────────┐ │ -│ ▼ ▼ │ -│ ┌─────────────┐ ┌─────────────┐ │ -│ │ SPDX 2.3 │ │ CycloneDX │ │ -│ │ .spdx.json │ │ 1.6 .json │ │ -│ └─────────────┘ └─────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### 2.2 Integration with Existing SCORE Tooling - -Dash is a **license compliance checker** only (no SBOM output, no VEX). -SBOM generation is a new, separate module that complements Dash. - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ eclipse-score/tooling │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ EXISTING NEW (IMPLEMENTED) │ -│ ──────── ────────────────── │ -│ ├── dash/ ├── sbom/ │ -│ │ └── dash_license_checker │ ├── defs.bzl │ -│ │ (license compliance) │ │ └── sbom() macro │ -│ ├── cr_checker/ │ ├── extensions.bzl │ -│ │ └── copyright_checker │ │ └── sbom.license() │ -│ │ (header validation) │ ├── internal/ │ -│ │ │ │ ├── aspect.bzl (dep traversal) │ -│ │ │ │ ├── rules.bzl (build rule) │ -│ │ │ │ └── generator/ │ -│ │ │ │ ├── sbom_generator.py │ -│ │ │ │ ├── spdx_formatter.py │ -│ │ │ │ ├── cyclonedx_formatter.py │ -│ │ │ │ └── purl.py │ -│ │ │ └── tests/ │ -│ │ │ │ -│ COMPLEMENTARY WORKFLOW │ │ -│ ────────────────────── │ │ -│ Dash: checks if dependency │ │ -│ licenses are allowed by policy │ │ -│ SBOM: generates .spdx.json / │ │ -│ .cdx.json listing all deps │ │ -│ with name, version, license, │ │ -│ supplier, PURL │ │ -│ │ │ -│ VALIDATION (external, optional) │ │ -│ ──────────────────────────── │ │ -│ pip install spdx-tools │ │ -│ pyspdxtools -i out.spdx.json │ │ -│ Or: https://tools.spdx.org │ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -## 3. SBOM Generation Chain - -When `bazel build //:my_sbom` is invoked, the following chain executes: - -``` - ┌──────────────────────────────────────────────────────────────────────┐ - │ PHASE 1: Loading (MODULE.bazel) │ - │ │ - │ sbom_metadata module extension iterates ALL modules in workspace: │ - │ - Collects sbom.license() tags (name, license, supplier, version) │ - │ - Collects sbom.license(type="cargo") tags (Rust crates) │ - │ - Writes metadata.json to @sbom_metadata repository │ - └──────────────────────────┬───────────────────────────────────────────┘ - │ - ▼ - ┌──────────────────────────────────────────────────────────────────────┐ - │ PHASE 2: Analysis (aspect.bzl) │ - │ │ - │ sbom_aspect is attached to `targets` attr of sbom_rule. │ - │ For each target in targets = ["//src:app"]: │ - │ - Traverses deps, srcs, proc_macro_deps, hdrs, etc. │ - │ - Recursively collects SbomDepsInfo from all transitive deps │ - │ - Builds depsets of: │ - │ * external_repos (e.g. "score_kyron", "crates__tokio-1.10") │ - │ * transitive_deps (all labels in the dep graph) │ - └──────────────────────────┬───────────────────────────────────────────┘ - │ - ▼ - ┌──────────────────────────────────────────────────────────────────────┐ - │ PHASE 3: Execution (rules.bzl → sbom_generator.py) │ - │ │ - │ _sbom_impl combines aspect output + extension metadata: │ - │ 1. Reads external_repos and transitive_deps from SbomDepsInfo │ - │ 2. Reads metadata.json from @sbom_metadata extension │ - │ 3. Writes _deps.json with all data + config │ - │ 4. Runs sbom_generator.py which: │ - │ a. Filters repos by exclude_patterns (removes build tools) │ - │ b. Resolves each repo to a component (name, version, PURL) │ - │ c. Merges extension metadata (license, supplier, version) │ - │ d. Calls spdx_formatter.py → {name}.spdx.json │ - │ e. Calls cyclonedx_formatter.py → {name}.cdx.json │ - └──────────────────────────────────────────────────────────────────────┘ -``` - -### Key files in the chain - -| File | Phase | Role | -|------|-------|------| -| `extensions.bzl` | Loading | Collects `sbom.license()` from all modules (all dep types) | -| `internal/aspect.bzl` | Analysis | Traverses target dep graph, returns `SbomDepsInfo` | -| `internal/providers.bzl` | Analysis | Defines `SbomDepsInfo` provider (external_repos, transitive_deps) | -| `internal/rules.bzl` | Execution | Joins aspect + extension data, invokes Python generator | -| `internal/generator/sbom_generator.py` | Execution | Resolves repos to components, calls formatters | -| `internal/generator/spdx_formatter.py` | Execution | Produces SPDX 2.3 JSON | -| `internal/generator/cyclonedx_formatter.py` | Execution | Produces CycloneDX 1.6 JSON | -| `internal/generator/purl.py` | Execution | Generates Package URLs for components | -| `defs.bzl` | Public API | `sbom()` macro | - -## 4. Tool Selection - -### 4.1 Implemented Tool Stack - -| Component | Tool Used | Status | Rationale | -|-----------|-----------|--------|-----------| -| SBOM Framework | Custom Bazel rules (aspects + module extension) | Implemented | Native Bazel integration, hermetic builds | -| Dependency Discovery | Bazel aspect (sbom_aspect) | Implemented | Traverses transitive deps of any target | -| Rust Crate Metadata | `sbom.license(type = "cargo")` in MODULE.bazel | Implemented | Manual license/supplier, auto PURL | -| SPDX Generation | Custom Python formatter (spdx_formatter.py) | Implemented | SPDX 2.3 JSON, validated at tools.spdx.org | -| CycloneDX Generation | Custom Python formatter (cyclonedx_formatter.py) | Implemented | CycloneDX 1.6 JSON | -| License Data | `sbom.license()` in MODULE.bazel | Implemented | Manual declaration per dependency | -| SPDX Validation | [spdx-tools](https://github.com/spdx/tools-python) (external) | Available | For offline validation | -| License Compliance | Existing Dash (separate tool) | Existing | Complements SBOM, not integrated | - ---- diff --git a/sbom/SBOM_Readme.md b/sbom/SBOM_Readme.md index 6bf8e4a..7418423 100644 --- a/sbom/SBOM_Readme.md +++ b/sbom/SBOM_Readme.md @@ -2,31 +2,17 @@ ## 1. Configure MODULE.bazel -Add the following at the end of your `MODULE.bazel`: +Add the SBOM metadata extension in your **root** MODULE.bazel (e.g. `reference_integration/MODULE.bazel`): ```starlark -# Load the SBOM extension and make the generated metadata repo available +# Enable SBOM metadata collection from all modules in the dependency graph sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") use_repo(sbom_ext, "sbom_metadata") - -# Declare license/supplier for each dependency: - -# For bazel_dep() modules — version is read from the module graph, no need to specify it: -sbom_ext.license(name = "googletest", license = "BSD-3-Clause", supplier = "Google LLC") - -# For http_archive deps — version is NOT in the module graph, must be specified: -sbom_ext.license(name = "boost", license = "BSL-1.0", version = "1.87.0", supplier = "Boost.org") - -# For git_override deps — specify version (commit) + remote so a PURL can be generated: -sbom_ext.license(name = "iceoryx2", license = "Apache-2.0", supplier = "Eclipse Foundation", - version = "d3d1c9a", remote = "https://github.com/eclipse-iceoryx/iceoryx2.git") - -# For Rust crates (type = "cargo" generates pkg:cargo/ PURL): -sbom_ext.license(name = "tokio", license = "MIT", version = "1.10", type = "cargo", - supplier = "Tokio Contributors") ``` -## 2. Add SBOM target in BUILD +No manual license entries are needed — all license metadata is collected automatically. + +## 2. Add SBOM Target in BUILD ```starlark load("@score_tooling//sbom:defs.bzl", "sbom") @@ -39,6 +25,15 @@ sbom( ) ``` +### Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `targets` | Yes | Bazel targets to include in SBOM | +| `component_name` | No | Main component name (defaults to rule name) | +| `component_version` | No | Version string | +| `output_formats` | No | `["spdx", "cyclonedx"]` (default: both) | + ## 3. Build ```bash @@ -49,37 +44,116 @@ bazel build //:my_sbom Two files in `bazel-bin/`: -- `my_sbom.spdx.json` -- SPDX 2.3 -- `my_sbom.cdx.json` -- CycloneDX 1.6 +- `my_sbom.spdx.json` — SPDX 2.3 +- `my_sbom.cdx.json` — CycloneDX 1.6 + +--- + +## Toolchain Components + +### Core Tools + +| Tool | Role | +|------|------| +| [Bazel](https://bazel.build) | Build system — rules, aspects, and module extensions drive dependency discovery and SBOM generation | +| [Python 3](https://www.python.org) | Runtime for the SBOM generator, formatters, and maintenance scripts | + +### Build-Time Components (Bazel-native, no external dependencies) + +| Component | File | Role | +|-----------|------|------| +| **Public API** | `defs.bzl` | `sbom()` macro — user-facing entry point | +| **Module Extension** | `extensions.bzl` | Collects metadata from all modules in dependency graph | +| **Aspect** | `internal/aspect.bzl` | Traverses transitive deps of targets (`SbomDepsInfo` provider) | +| **Rule** | `internal/rules.bzl` | Orchestrates SBOM generation action | +| **Repository Rules** | `repos.bzl`, `repository_rules.bzl` | SBOM-aware `http_archive`/`git_repository` replacements | +| **Generator** | `internal/generator/sbom_generator.py` | Main Python executable — resolves components, loads caches, calls formatters | +| **SPDX Formatter** | `internal/generator/spdx_formatter.py` | Produces SPDX 2.3 JSON output | +| **CycloneDX Formatter** | `internal/generator/cyclonedx_formatter.py` | Produces CycloneDX 1.6 JSON output | +| **PURL Utilities** | `internal/generator/purl.py` | Package URL generation and parsing (`pkg:cargo`, `pkg:github`, `pkg:bazel`, `pkg:generic`) | +| **Rust Cache** | `crates_metadata.json` | Bundled Rust crate metadata (license, version, checksum, PURL) | +| **C++ Cache** | `cpp_metadata.json` | Bundled C++ dependency metadata (license, supplier, version, PURL) | + +### Maintenance Scripts (offline, run by tooling developers) + +| Script | Purpose | External dependency | +|--------|---------|---------------------| +| `scripts/generate_crates_metadata_cache.py` | Regenerate Rust cache from Cargo.lock + crates.io API | Python 3.11+ (tomllib) | +| `scripts/generate_cpp_metadata_cache.py` | Regenerate C++ cache from cdxgen output | None | + +### External Tools (maintenance only, not needed at build time) + +| Tool | Purpose | Usage | +|------|---------|-------| +| [Node.js / npm](https://nodejs.org) | Runtime for cdxgen | Provides `npx` to run cdxgen | +| [@cyclonedx/cdxgen](https://github.com/CycloneDX/cdxgen) | C++ dependency license discovery (scans source tree, LICENSE files, package manifests) | `npx @cyclonedx/cdxgen -t cpp --deep -r -o cdxgen.json` | +| [crates.io API](https://crates.io) | Rust crate license lookup | Called by `generate_crates_metadata_cache.py` | + +### Output Standards + +| Format | Specification | +|--------|---------------| +| SPDX 2.3 | [spdx.github.io/spdx-spec/v2.3](https://spdx.github.io/spdx-spec/v2.3/) | +| CycloneDX 1.6 | [cyclonedx.org/docs/1.6](https://cyclonedx.org/docs/1.6/json/) | +| Package URL (PURL) | [github.com/package-url/purl-spec](https://github.com/package-url/purl-spec) | + +### Three-Phase Architecture + +``` +Phase 1: Loading Phase 2: Analysis Phase 3: Execution +(extensions.bzl) (aspect.bzl) (sbom_generator.py) + +MODULE.bazel Bazel targets _deps.json + metadata.json + | | | + v v v +sbom_metadata ext ---> SbomDepsInfo aspect ---> Python generator + | | | + v v v +metadata.json _deps.json .spdx.json + .cdx.json +``` --- -## Auto-extracted vs manual fields +## How License Metadata Is Collected + +All license metadata is collected **automatically** — no manual declarations needed. + +| Ecosystem | License source | How it works | +|-----------|---------------|--------------| +| Rust crates | `crates_metadata.json` cache | Pre-generated from Cargo.lock + crates.io API (bundled with tooling) | +| C++ dependencies | `cpp_metadata.json` cache | Pre-generated from cdxgen scan (bundled with tooling) | +| Bazel modules | Auto-extracted | Version and PURL from module graph | -**Always auto-extracted:** +### Auto-Extracted Fields | Field | Source | |-------|--------| -| Dependency list | Aspect traverses transitive deps of your targets | +| Dependency list | Bazel aspect traverses transitive deps of your targets | | Version (bazel_dep) | From module graph | -| Version (crates) | From crate repo name | -| PURL | Generated from URLs/remotes | +| Version (Rust crates) | From crates_metadata.json cache | +| PURL | Generated from URLs, remotes, or crate names | +| License (Rust) | crates_metadata.json cache (~70% coverage) | +| License (C++) | cpp_metadata.json cache | +| Checksum (Rust) | SHA-256 from crates_metadata.json cache (100% coverage) | -**What is excluded from the SBOM:** +### What Is Excluded -- Dependencies not in the transitive dep graph of your `targets` (e.g. `dev_dependency = True` lint/formatting tools that your binary never links against) +- Dependencies not in the transitive dep graph of your `targets` - Build toolchain repos matching `exclude_patterns` (e.g. `rules_rust`, `rules_cc`, `bazel_tools`, `platforms`) -**What you must provide manually:** +--- + +## Maintenance (Tooling Developers) + +The metadata caches are bundled with `@score_tooling//sbom` and updated periodically by tooling maintainers. -| Field | Where | When | -|-------|-------|------| -| license | `sbom_ext.license()` | All dependencies | -| supplier | `sbom_ext.license()` | Recommended for NTIA compliance | -| version | `sbom_ext.license()` | For http_archive/git/crate deps (auto-extracted for bazel_dep) | +| Task | Frequency | Command | +|------|-----------|---------| +| Update Rust crate cache | When Rust deps change | `python3 scripts/generate_crates_metadata_cache.py crates_metadata.json` | +| Update C++ dependency cache | When C++ deps change | `npx @cyclonedx/cdxgen -t cpp --deep -r -o cdxgen.json && python3 scripts/generate_cpp_metadata_cache.py cdxgen.json cpp_metadata.json` | --- ## Example -See `reference_integration/BUILD:39-66` for working SBOM targets and `reference_integration/MODULE.bazel:69-77` for the metadata extension setup. +See [reference_integration/BUILD](../../reference_integration/BUILD) for working SBOM targets and [reference_integration/MODULE.bazel](../../reference_integration/MODULE.bazel) for the metadata extension setup. diff --git a/sbom/cpp_metadata.json b/sbom/cpp_metadata.json new file mode 100644 index 0000000..b6703c6 --- /dev/null +++ b/sbom/cpp_metadata.json @@ -0,0 +1,55 @@ +{ + "boost": { + "version": "1.87.0", + "license": "BSL-1.0", + "supplier": "Boost.org", + "purl": "pkg:conan/boost@1.87.0", + "url": "https://www.boost.org/" + }, + "nlohmann-json": { + "version": "3.11.3", + "license": "MIT", + "supplier": "Niels Lohmann", + "purl": "pkg:conan/nlohmann_json@3.11.3", + "url": "https://github.com/nlohmann/json" + }, + "googletest": { + "version": "1.17.0", + "license": "BSD-3-Clause", + "supplier": "Google LLC", + "purl": "pkg:github/google/googletest@1.17.0", + "url": "https://github.com/google/googletest" + }, + "google_benchmark": { + "version": "1.9.4", + "license": "Apache-2.0", + "supplier": "Google LLC", + "purl": "pkg:github/google/benchmark@1.9.4", + "url": "https://github.com/google/benchmark" + }, + "flatbuffers": { + "version": "25.2.10", + "license": "Apache-2.0", + "supplier": "Google LLC", + "purl": "pkg:github/google/flatbuffers@25.2.10", + "url": "https://github.com/google/flatbuffers" + }, + "vsomeip": { + "version": "3.6.0", + "license": "MPL-2.0", + "supplier": "COVESA", + "purl": "pkg:github/COVESA/vsomeip@3.6.0", + "url": "https://github.com/COVESA/vsomeip" + }, + "json_schema_validator": { + "version": "2.1.0", + "license": "MIT", + "supplier": "Patrick Boettcher", + "purl": "pkg:github/pboettch/json-schema-validator@2.1.0" + }, + "bazel_skylib": { + "version": "1.8.1", + "license": "Apache-2.0", + "purl": "pkg:github/bazelbuild/bazel-skylib@1.8.1" + } +} diff --git a/sbom/crates_licenses_common.json b/sbom/crates_licenses_common.json new file mode 100644 index 0000000..cf53b82 --- /dev/null +++ b/sbom/crates_licenses_common.json @@ -0,0 +1,78 @@ +{ + "serde": {"license": "MIT OR Apache-2.0"}, + "serde_derive": {"license": "MIT OR Apache-2.0"}, + "syn": {"license": "MIT OR Apache-2.0"}, + "quote": {"license": "MIT OR Apache-2.0"}, + "proc-macro2": {"license": "MIT OR Apache-2.0"}, + "libc": {"license": "MIT OR Apache-2.0"}, + "cfg-if": {"license": "MIT OR Apache-2.0"}, + "memchr": {"license": "Unlicense OR MIT"}, + "bitflags": {"license": "MIT OR Apache-2.0"}, + "lazy_static": {"license": "MIT OR Apache-2.0"}, + "once_cell": {"license": "MIT OR Apache-2.0"}, + "log": {"license": "MIT OR Apache-2.0"}, + "regex": {"license": "MIT OR Apache-2.0"}, + "tokio": {"license": "MIT"}, + "futures": {"license": "MIT OR Apache-2.0"}, + "futures-core": {"license": "MIT OR Apache-2.0"}, + "futures-util": {"license": "MIT OR Apache-2.0"}, + "futures-channel": {"license": "MIT OR Apache-2.0"}, + "futures-executor": {"license": "MIT OR Apache-2.0"}, + "futures-sink": {"license": "MIT OR Apache-2.0"}, + "futures-task": {"license": "MIT OR Apache-2.0"}, + "futures-io": {"license": "MIT OR Apache-2.0"}, + "futures-macro": {"license": "MIT OR Apache-2.0"}, + "pin-project-lite": {"license": "Apache-2.0 OR MIT"}, + "bytes": {"license": "MIT"}, + "async-trait": {"license": "MIT OR Apache-2.0"}, + "tracing": {"license": "MIT"}, + "tracing-core": {"license": "MIT"}, + "tracing-subscriber": {"license": "MIT"}, + "serde_json": {"license": "MIT OR Apache-2.0"}, + "toml": {"license": "MIT OR Apache-2.0"}, + "anyhow": {"license": "MIT OR Apache-2.0"}, + "thiserror": {"license": "MIT OR Apache-2.0"}, + "clap": {"license": "MIT OR Apache-2.0"}, + "rand": {"license": "MIT OR Apache-2.0"}, + "chrono": {"license": "MIT OR Apache-2.0"}, + "uuid": {"license": "Apache-2.0 OR MIT"}, + "url": {"license": "MIT OR Apache-2.0"}, + "itertools": {"license": "MIT OR Apache-2.0"}, + "either": {"license": "MIT OR Apache-2.0"}, + "crossbeam": {"license": "MIT OR Apache-2.0"}, + "crossbeam-channel": {"license": "MIT OR Apache-2.0"}, + "crossbeam-utils": {"license": "MIT OR Apache-2.0"}, + "byteorder": {"license": "Unlicense OR MIT"}, + "nom": {"license": "MIT"}, + "bindgen": {"license": "BSD-3-Clause"}, + "cc": {"license": "MIT OR Apache-2.0"}, + "indexmap": {"license": "Apache-2.0 OR MIT"}, + "hashbrown": {"license": "MIT OR Apache-2.0"}, + "aho-corasick": {"license": "Unlicense OR MIT"}, + "unicode-ident": {"license": "MIT OR Apache-2.0 AND Unicode-3.0"}, + "enum-iterator": {"license": "0BSD OR MIT OR Apache-2.0"}, + "enum-iterator-derive": {"license": "0BSD OR MIT OR Apache-2.0"}, + "itoa": {"license": "MIT OR Apache-2.0"}, + "libloading": {"license": "ISC"}, + "cexpr": {"license": "Apache-2.0 OR MIT"}, + "clang-sys": {"license": "Apache-2.0"}, + "glob": {"license": "MIT OR Apache-2.0"}, + "shlex": {"license": "MIT OR Apache-2.0"}, + "prettyplease": {"license": "MIT OR Apache-2.0"}, + "rustc-hash": {"license": "Apache-2.0 OR MIT"}, + "nu-ansi-term": {"license": "MIT"}, + "matchers": {"license": "MIT"}, + "minimal-lexical": {"license": "MIT OR Apache-2.0"}, + "num-conv": {"license": "MIT OR Apache-2.0"}, + "deranged": {"license": "MIT OR Apache-2.0"}, + "time": {"license": "MIT OR Apache-2.0"}, + "time-core": {"license": "MIT OR Apache-2.0"}, + "time-macros": {"license": "MIT OR Apache-2.0"}, + "equivalent": {"license": "Apache-2.0 OR MIT"}, + "loom": {"license": "MIT"}, + "generator": {"license": "Apache-2.0 OR MIT"}, + "embedded-io": {"license": "MIT OR Apache-2.0"}, + "cobs": {"license": "MIT OR Apache-2.0"}, + "cdr": {"license": "Apache-2.0 OR MIT"}, + "find-msvc-tools": {"license": "MIT OR Apache-2.0"} +} diff --git a/sbom/crates_metadata.json b/sbom/crates_metadata.json new file mode 100644 index 0000000..2f1b7b6 --- /dev/null +++ b/sbom/crates_metadata.json @@ -0,0 +1,806 @@ +{ + "aho-corasick": { + "checksum": "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301", + "license": "Unlicense OR MIT", + "name": "aho-corasick", + "purl": "pkg:cargo/aho-corasick@1.1.4", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.1.4" + }, + "bindgen": { + "checksum": "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895", + "license": "BSD-3-Clause", + "name": "bindgen", + "purl": "pkg:cargo/bindgen@0.72.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.72.1" + }, + "bitflags": { + "checksum": "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3", + "license": "MIT OR Apache-2.0", + "name": "bitflags", + "purl": "pkg:cargo/bitflags@2.10.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.10.0" + }, + "byteorder": { + "checksum": "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b", + "license": "Unlicense OR MIT", + "name": "byteorder", + "purl": "pkg:cargo/byteorder@1.5.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.5.0" + }, + "cc": { + "checksum": "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215", + "license": "MIT OR Apache-2.0", + "name": "cc", + "purl": "pkg:cargo/cc@1.2.49", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.2.49" + }, + "cdr": { + "checksum": "9617422bf43fde9280707a7e90f8f7494389c182f5c70b0f67592d0f06d41dfa", + "license": "Apache-2.0 OR MIT", + "name": "cdr", + "purl": "pkg:cargo/cdr@0.2.4", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.4" + }, + "cexpr": { + "checksum": "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766", + "license": "Apache-2.0 OR MIT", + "name": "cexpr", + "purl": "pkg:cargo/cexpr@0.6.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.0" + }, + "cfg-if": { + "checksum": "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801", + "license": "MIT OR Apache-2.0", + "name": "cfg-if", + "purl": "pkg:cargo/cfg-if@1.0.4", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.4" + }, + "clang-sys": { + "checksum": "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4", + "license": "Apache-2.0", + "name": "clang-sys", + "purl": "pkg:cargo/clang-sys@1.8.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.8.1" + }, + "cobs": { + "checksum": "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1", + "license": "MIT OR Apache-2.0", + "name": "cobs", + "purl": "pkg:cargo/cobs@0.3.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.0" + }, + "crossbeam-channel": { + "checksum": "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2", + "license": "MIT OR Apache-2.0", + "name": "crossbeam-channel", + "purl": "pkg:cargo/crossbeam-channel@0.5.15", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.5.15" + }, + "crossbeam-utils": { + "checksum": "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28", + "license": "MIT OR Apache-2.0", + "name": "crossbeam-utils", + "purl": "pkg:cargo/crossbeam-utils@0.8.21", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.21" + }, + "deranged": { + "checksum": "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587", + "license": "MIT OR Apache-2.0", + "name": "deranged", + "purl": "pkg:cargo/deranged@0.5.5", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.5.5" + }, + "either": { + "checksum": "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719", + "license": "MIT OR Apache-2.0", + "name": "either", + "purl": "pkg:cargo/either@1.15.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.15.0" + }, + "embedded-io": { + "checksum": "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d", + "license": "MIT OR Apache-2.0", + "name": "embedded-io", + "purl": "pkg:cargo/embedded-io@0.6.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.1" + }, + "enum-iterator": { + "checksum": "a4549325971814bda7a44061bf3fe7e487d447cba01e4220a4b454d630d7a016", + "license": "0BSD OR MIT OR Apache-2.0", + "name": "enum-iterator", + "purl": "pkg:cargo/enum-iterator@2.3.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.3.0" + }, + "enum-iterator-derive": { + "checksum": "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842", + "license": "0BSD OR MIT OR Apache-2.0", + "name": "enum-iterator-derive", + "purl": "pkg:cargo/enum-iterator-derive@1.5.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.5.0" + }, + "equivalent": { + "checksum": "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f", + "license": "Apache-2.0 OR MIT", + "name": "equivalent", + "purl": "pkg:cargo/equivalent@1.0.2", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.2" + }, + "find-msvc-tools": { + "checksum": "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844", + "license": "MIT OR Apache-2.0", + "name": "find-msvc-tools", + "purl": "pkg:cargo/find-msvc-tools@0.1.5", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.5" + }, + "futures": { + "checksum": "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876", + "license": "MIT OR Apache-2.0", + "name": "futures", + "purl": "pkg:cargo/futures@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-channel": { + "checksum": "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10", + "license": "MIT OR Apache-2.0", + "name": "futures-channel", + "purl": "pkg:cargo/futures-channel@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-core": { + "checksum": "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e", + "license": "MIT OR Apache-2.0", + "name": "futures-core", + "purl": "pkg:cargo/futures-core@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-executor": { + "checksum": "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f", + "license": "MIT OR Apache-2.0", + "name": "futures-executor", + "purl": "pkg:cargo/futures-executor@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-io": { + "checksum": "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6", + "license": "MIT OR Apache-2.0", + "name": "futures-io", + "purl": "pkg:cargo/futures-io@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-macro": { + "checksum": "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650", + "license": "MIT OR Apache-2.0", + "name": "futures-macro", + "purl": "pkg:cargo/futures-macro@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-sink": { + "checksum": "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7", + "license": "MIT OR Apache-2.0", + "name": "futures-sink", + "purl": "pkg:cargo/futures-sink@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-task": { + "checksum": "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988", + "license": "MIT OR Apache-2.0", + "name": "futures-task", + "purl": "pkg:cargo/futures-task@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-util": { + "checksum": "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81", + "license": "MIT OR Apache-2.0", + "name": "futures-util", + "purl": "pkg:cargo/futures-util@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "generator": { + "checksum": "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2", + "license": "Apache-2.0 OR MIT", + "name": "generator", + "purl": "pkg:cargo/generator@0.8.7", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.7" + }, + "glob": { + "checksum": "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280", + "license": "MIT OR Apache-2.0", + "name": "glob", + "purl": "pkg:cargo/glob@0.3.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.3" + }, + "hashbrown": { + "checksum": "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100", + "license": "MIT OR Apache-2.0", + "name": "hashbrown", + "purl": "pkg:cargo/hashbrown@0.16.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.16.1" + }, + "indexmap": { + "checksum": "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2", + "license": "Apache-2.0 OR MIT", + "name": "indexmap", + "purl": "pkg:cargo/indexmap@2.12.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.12.1" + }, + "itertools": { + "checksum": "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186", + "license": "MIT OR Apache-2.0", + "name": "itertools", + "purl": "pkg:cargo/itertools@0.13.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.13.0" + }, + "itoa": { + "checksum": "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c", + "license": "MIT OR Apache-2.0", + "name": "itoa", + "purl": "pkg:cargo/itoa@1.0.15", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.15" + }, + "lazy_static": { + "checksum": "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe", + "license": "MIT OR Apache-2.0", + "name": "lazy_static", + "purl": "pkg:cargo/lazy_static@1.5.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.5.0" + }, + "libc": { + "checksum": "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091", + "license": "MIT OR Apache-2.0", + "name": "libc", + "purl": "pkg:cargo/libc@0.2.178", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.178" + }, + "libloading": { + "checksum": "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55", + "license": "ISC", + "name": "libloading", + "purl": "pkg:cargo/libloading@0.8.9", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.9" + }, + "log": { + "checksum": "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897", + "license": "MIT OR Apache-2.0", + "name": "log", + "purl": "pkg:cargo/log@0.4.29", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.29" + }, + "loom": { + "checksum": "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca", + "license": "MIT", + "name": "loom", + "purl": "pkg:cargo/loom@0.7.2", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.7.2" + }, + "matchers": { + "checksum": "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9", + "license": "MIT", + "name": "matchers", + "purl": "pkg:cargo/matchers@0.2.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "memchr": { + "checksum": "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273", + "license": "Unlicense OR MIT", + "name": "memchr", + "purl": "pkg:cargo/memchr@2.7.6", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.7.6" + }, + "minimal-lexical": { + "checksum": "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a", + "license": "MIT OR Apache-2.0", + "name": "minimal-lexical", + "purl": "pkg:cargo/minimal-lexical@0.2.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.1" + }, + "nom": { + "checksum": "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a", + "license": "MIT", + "name": "nom", + "purl": "pkg:cargo/nom@7.1.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "7.1.3" + }, + "nu-ansi-term": { + "checksum": "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5", + "license": "MIT", + "name": "nu-ansi-term", + "purl": "pkg:cargo/nu-ansi-term@0.50.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.50.3" + }, + "num-conv": { + "checksum": "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9", + "license": "MIT OR Apache-2.0", + "name": "num-conv", + "purl": "pkg:cargo/num-conv@0.1.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.0" + }, + "once_cell": { + "checksum": "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d", + "license": "MIT OR Apache-2.0", + "name": "once_cell", + "purl": "pkg:cargo/once_cell@1.21.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.21.3" + }, + "pin-project-lite": { + "checksum": "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b", + "license": "Apache-2.0 OR MIT", + "name": "pin-project-lite", + "purl": "pkg:cargo/pin-project-lite@0.2.16", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.16" + }, + "pin-utils": { + "checksum": "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184", + "name": "pin-utils", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.0" + }, + "postcard": { + "checksum": "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24", + "name": "postcard", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.1.3" + }, + "powerfmt": { + "checksum": "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391", + "name": "powerfmt", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "prettyplease": { + "checksum": "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b", + "license": "MIT OR Apache-2.0", + "name": "prettyplease", + "purl": "pkg:cargo/prettyplease@0.2.37", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.37" + }, + "proc-macro2": { + "checksum": "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8", + "license": "MIT OR Apache-2.0", + "name": "proc-macro2", + "purl": "pkg:cargo/proc-macro2@1.0.103", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.103" + }, + "quote": { + "checksum": "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f", + "license": "MIT OR Apache-2.0", + "name": "quote", + "purl": "pkg:cargo/quote@1.0.42", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.42" + }, + "regex": { + "checksum": "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4", + "license": "MIT OR Apache-2.0", + "name": "regex", + "purl": "pkg:cargo/regex@1.12.2", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.12.2" + }, + "regex-automata": { + "checksum": "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c", + "name": "regex-automata", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.13" + }, + "regex-syntax": { + "checksum": "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58", + "name": "regex-syntax", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.8" + }, + "rustc-hash": { + "checksum": "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d", + "license": "Apache-2.0 OR MIT", + "name": "rustc-hash", + "purl": "pkg:cargo/rustc-hash@2.1.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.1.1" + }, + "rustversion": { + "checksum": "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d", + "name": "rustversion", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.22" + }, + "ryu": { + "checksum": "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f", + "name": "ryu", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.20" + }, + "scoped-tls": { + "checksum": "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294", + "name": "scoped-tls", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.1" + }, + "serde": { + "checksum": "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e", + "license": "MIT OR Apache-2.0", + "name": "serde", + "purl": "pkg:cargo/serde@1.0.228", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.228" + }, + "serde_core": { + "checksum": "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad", + "name": "serde_core", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.228" + }, + "serde_derive": { + "checksum": "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79", + "license": "MIT OR Apache-2.0", + "name": "serde_derive", + "purl": "pkg:cargo/serde_derive@1.0.228", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.228" + }, + "serde_json": { + "checksum": "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c", + "license": "MIT OR Apache-2.0", + "name": "serde_json", + "purl": "pkg:cargo/serde_json@1.0.145", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.145" + }, + "serde_spanned": { + "checksum": "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3", + "name": "serde_spanned", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.9" + }, + "sha1_smol": { + "checksum": "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d", + "name": "sha1_smol", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.1" + }, + "sharded-slab": { + "checksum": "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6", + "name": "sharded-slab", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.7" + }, + "shlex": { + "checksum": "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64", + "license": "MIT OR Apache-2.0", + "name": "shlex", + "purl": "pkg:cargo/shlex@1.3.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.3.0" + }, + "slab": { + "checksum": "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589", + "name": "slab", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.11" + }, + "smallvec": { + "checksum": "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03", + "name": "smallvec", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.15.1" + }, + "syn": { + "checksum": "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87", + "license": "MIT OR Apache-2.0", + "name": "syn", + "purl": "pkg:cargo/syn@2.0.111", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.0.111" + }, + "thiserror": { + "checksum": "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8", + "license": "MIT OR Apache-2.0", + "name": "thiserror", + "purl": "pkg:cargo/thiserror@2.0.17", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.0.17" + }, + "thiserror-impl": { + "checksum": "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913", + "name": "thiserror-impl", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.0.17" + }, + "thread_local": { + "checksum": "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185", + "name": "thread_local", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.1.9" + }, + "time": { + "checksum": "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d", + "license": "MIT OR Apache-2.0", + "name": "time", + "purl": "pkg:cargo/time@0.3.44", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.44" + }, + "time-core": { + "checksum": "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b", + "license": "MIT OR Apache-2.0", + "name": "time-core", + "purl": "pkg:cargo/time-core@0.1.6", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.6" + }, + "time-macros": { + "checksum": "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3", + "license": "MIT OR Apache-2.0", + "name": "time-macros", + "purl": "pkg:cargo/time-macros@0.2.24", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.24" + }, + "tiny-fn": { + "checksum": "9659b108631d1e1cf3e8e489f894bee40bc9d68fd6cc67ec4d4ce9b72d565228", + "name": "tiny-fn", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.9" + }, + "toml": { + "checksum": "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362", + "license": "MIT OR Apache-2.0", + "name": "toml", + "purl": "pkg:cargo/toml@0.8.23", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.23" + }, + "toml_datetime": { + "checksum": "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c", + "name": "toml_datetime", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.11" + }, + "toml_edit": { + "checksum": "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a", + "name": "toml_edit", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.22.27" + }, + "toml_write": { + "checksum": "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801", + "name": "toml_write", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.2" + }, + "tracing": { + "checksum": "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647", + "license": "MIT", + "name": "tracing", + "purl": "pkg:cargo/tracing@0.1.43", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.43" + }, + "tracing-appender": { + "checksum": "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf", + "name": "tracing-appender", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.4" + }, + "tracing-attributes": { + "checksum": "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da", + "name": "tracing-attributes", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.31" + }, + "tracing-core": { + "checksum": "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c", + "license": "MIT", + "name": "tracing-core", + "purl": "pkg:cargo/tracing-core@0.1.35", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.35" + }, + "tracing-log": { + "checksum": "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3", + "name": "tracing-log", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "tracing-serde": { + "checksum": "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1", + "name": "tracing-serde", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "tracing-subscriber": { + "checksum": "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e", + "license": "MIT", + "name": "tracing-subscriber", + "purl": "pkg:cargo/tracing-subscriber@0.3.22", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.22" + }, + "unicode-ident": { + "checksum": "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5", + "license": "MIT OR Apache-2.0 AND Unicode-3.0", + "name": "unicode-ident", + "purl": "pkg:cargo/unicode-ident@1.0.22", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.22" + }, + "valuable": { + "checksum": "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65", + "name": "valuable", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.1" + }, + "windows": { + "checksum": "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893", + "name": "windows", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.61.3" + }, + "windows-collections": { + "checksum": "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8", + "name": "windows-collections", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "windows-core": { + "checksum": "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3", + "name": "windows-core", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.61.2" + }, + "windows-future": { + "checksum": "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e", + "name": "windows-future", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.1" + }, + "windows-implement": { + "checksum": "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf", + "name": "windows-implement", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.60.2" + }, + "windows-interface": { + "checksum": "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358", + "name": "windows-interface", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.59.3" + }, + "windows-link": { + "checksum": "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5", + "name": "windows-link", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.1" + }, + "windows-numerics": { + "checksum": "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1", + "name": "windows-numerics", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "windows-result": { + "checksum": "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6", + "name": "windows-result", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.4" + }, + "windows-strings": { + "checksum": "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57", + "name": "windows-strings", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.2" + }, + "windows-sys": { + "checksum": "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc", + "name": "windows-sys", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.61.2" + }, + "windows-targets": { + "checksum": "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c", + "name": "windows-targets", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows-threading": { + "checksum": "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6", + "name": "windows-threading", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.0" + }, + "windows_aarch64_gnullvm": { + "checksum": "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8", + "name": "windows_aarch64_gnullvm", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_aarch64_msvc": { + "checksum": "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc", + "name": "windows_aarch64_msvc", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_i686_gnu": { + "checksum": "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e", + "name": "windows_i686_gnu", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_i686_msvc": { + "checksum": "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406", + "name": "windows_i686_msvc", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_x86_64_gnu": { + "checksum": "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e", + "name": "windows_x86_64_gnu", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_x86_64_gnullvm": { + "checksum": "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc", + "name": "windows_x86_64_gnullvm", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_x86_64_msvc": { + "checksum": "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538", + "name": "windows_x86_64_msvc", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "winnow": { + "checksum": "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829", + "name": "winnow", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.7.14" + } +} \ No newline at end of file diff --git a/sbom/defs.bzl b/sbom/defs.bzl index bbabe7d..b4b32c2 100644 --- a/sbom/defs.bzl +++ b/sbom/defs.bzl @@ -37,6 +37,11 @@ def sbom( targets, traversing their transitive dependencies and generating output in SPDX 2.3 and/or CycloneDX 1.6 format. + License metadata is collected automatically: + - Rust crates: from crates_metadata.json cache (bundled with tooling) + - C++ deps: from cpp_metadata.json cache (bundled with tooling) + - Bazel modules: version/PURL auto-extracted from module graph + Prerequisites: In your MODULE.bazel, you must enable the sbom_metadata extension: ``` @@ -79,13 +84,6 @@ def sbom( component_name = "score_reference_integration", component_version = "0.5.0-beta", ) - - # SPDX only - sbom( - name = "my_spdx_sbom", - targets = ["//src:my_app"], - output_formats = ["spdx"], - ) """ default_exclude_patterns = [ "rules_rust", diff --git a/sbom/extensions.bzl b/sbom/extensions.bzl index 99aec09..2dbb055 100644 --- a/sbom/extensions.bzl +++ b/sbom/extensions.bzl @@ -1,25 +1,13 @@ """Module extension to collect dependency metadata from bzlmod. -This extension collects version and metadata information for all modules, -crates, and other dependencies in the workspace, making it available for -SBOM generation. +This extension collects version and metadata information for all modules +and other dependencies in the workspace, making it available for +SBOM generation. License metadata is collected automatically from +bundled caches (crates_metadata.json, cpp_metadata.json). Usage in MODULE.bazel: - sbom = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") - - # For bazel_dep modules (version auto-extracted from module): - sbom.license(name = "flatbuffers", license = "Apache-2.0", supplier = "Google LLC") - - # For http_archive dependencies (provide version explicitly): - sbom.license(name = "boost", license = "BSL-1.0", version = "1.87.0", supplier = "Boost.org") - - # For git_repository dependencies: - sbom.license(name = "iceoryx2", license = "Apache-2.0", supplier = "Eclipse Foundation", - version = "0.5.0", remote = "https://github.com/eclipse-iceoryx/iceoryx2.git") - - # For Rust crates (type = "cargo" generates pkg:cargo/ PURL): - sbom.license(name = "tokio", license = "MIT", version = "1.10", type = "cargo", - supplier = "Tokio Contributors") + sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + use_repo(sbom_ext, "sbom_metadata") """ def _generate_purl_from_url(url, name, version): @@ -126,9 +114,7 @@ def _sbom_metadata_impl(module_ctx): """Collects SBOM metadata from all modules in dependency graph.""" all_http_archives = {} all_git_repos = {} - all_licenses = {} all_modules = {} - all_crates = {} for mod in module_ctx.modules: module_name = mod.name @@ -173,86 +159,11 @@ def _sbom_metadata_impl(module_ctx): "declared_by": module_name, } - # Collect license info for bazel_dep modules, http_archive, git_repository, and crate deps - for tag in mod.tags.license: - # Check dependency type - dep_type = tag.type if hasattr(tag, "type") and tag.type else "" - - # Check if this has URL info (http_archive dependency) - url = "" - if hasattr(tag, "urls") and tag.urls: - url = tag.urls[0] - elif hasattr(tag, "url") and tag.url: - url = tag.url - - # Check if this has remote info (git_repository dependency) - remote = tag.remote if hasattr(tag, "remote") and tag.remote else "" - - # Get explicit version if provided - explicit_version = tag.version if hasattr(tag, "version") and tag.version else "" - - # Get supplier if provided - supplier = tag.supplier if hasattr(tag, "supplier") and tag.supplier else "" - - if dep_type == "cargo": - # Rust crate - version = explicit_version if explicit_version else "unknown" - all_crates[tag.name] = { - "version": version, - "purl": tag.purl if tag.purl else "pkg:cargo/{}@{}".format(tag.name, version), - "license": tag.license, - "supplier": supplier, - } - elif url or (explicit_version and not remote): - # http_archive dependency - version = explicit_version if explicit_version else _extract_version_from_url(url) - purl = tag.purl if tag.purl else _generate_purl_from_url(url, tag.name, version) - all_http_archives[tag.name] = { - "version": version or "unknown", - "url": url, - "purl": purl, - "license": tag.license, - "supplier": supplier, - "declared_by": module_name, - } - elif remote: - # git_repository dependency - version = explicit_version if explicit_version else "unknown" - purl = tag.purl if tag.purl else _generate_purl_from_git(remote, tag.name, version) - all_git_repos[tag.name] = { - "version": version, - "remote": remote, - "commit": "", - "tag": "", - "purl": purl, - "license": tag.license, - "supplier": supplier, - "declared_by": module_name, - } - else: - # bazel_dep module license override - all_licenses[tag.name] = { - "license": tag.license, - "supplier": supplier, - "purl": tag.purl if tag.purl else "", - } - - # Apply license/supplier overrides to modules - for name, license_info in all_licenses.items(): - if name in all_modules: - all_modules[name]["license"] = license_info["license"] - if license_info.get("supplier"): - all_modules[name]["supplier"] = license_info["supplier"] - if license_info["purl"]: - all_modules[name]["purl"] = license_info["purl"] - # Generate metadata JSON metadata_content = json.encode({ "modules": all_modules, "http_archives": all_http_archives, "git_repositories": all_git_repos, - "crates": all_crates, - "licenses": all_licenses, }) _sbom_metadata_repo( @@ -289,28 +200,11 @@ _git_repository_tag = tag_class( }, ) -# Tag to add license info to any dependency (bazel_dep, http_archive, git_repository, or crate) -_license_tag = tag_class( - doc = "Add license/supplier metadata for any dependency", - attrs = { - "name": attr.string(mandatory = True, doc = "Dependency name"), - "license": attr.string(mandatory = True, doc = "SPDX license identifier"), - "supplier": attr.string(doc = "Supplier/organization name (e.g., 'Boost.org', 'Google LLC')"), - "version": attr.string(doc = "Version string (for http_archive/git_repository/crate; auto-extracted for bazel_dep)"), - "type": attr.string(doc = "Dependency type: 'cargo' for Rust crates (affects PURL generation). Leave empty for auto-detection."), - "purl": attr.string(doc = "Override Package URL"), - "url": attr.string(doc = "Download URL for http_archive (for PURL generation)"), - "urls": attr.string_list(doc = "Download URLs for http_archive (for PURL generation)"), - "remote": attr.string(doc = "Git remote URL for git_repository (for PURL generation)"), - }, -) - sbom_metadata = module_extension( implementation = _sbom_metadata_impl, tag_classes = { "http_archive": _http_archive_tag, "git_repository": _git_repository_tag, - "license": _license_tag, }, doc = "Collects SBOM metadata from dependency declarations", ) diff --git a/sbom/internal/generator/BUILD b/sbom/internal/generator/BUILD index d22cd19..3158a3a 100644 --- a/sbom/internal/generator/BUILD +++ b/sbom/internal/generator/BUILD @@ -10,6 +10,10 @@ package(default_visibility = ["//sbom:__subpackages__"]) py_binary( name = "sbom_generator", srcs = ["sbom_generator.py"], + data = [ + "//sbom:crates_metadata.json", + "//sbom:cpp_metadata.json", + ], main = "sbom_generator.py", deps = [ ":cyclonedx_formatter", diff --git a/sbom/internal/generator/cyclonedx_formatter.py b/sbom/internal/generator/cyclonedx_formatter.py index 67d6fd2..329e0d1 100644 --- a/sbom/internal/generator/cyclonedx_formatter.py +++ b/sbom/internal/generator/cyclonedx_formatter.py @@ -129,6 +129,7 @@ def _create_cdx_component(component: dict[str, Any]) -> dict[str, Any]: comp_type = component.get("type", "library") source = component.get("source", "") url = component.get("url", "") + checksum = component.get("checksum", "") cdx_comp: dict[str, Any] = { "type": _map_type_to_cdx_type(comp_type), @@ -157,6 +158,15 @@ def _create_cdx_component(component: dict[str, Any]) -> dict[str, Any]: "name": supplier, } + # Add hashes (SHA-256 from Cargo.lock) + if checksum: + cdx_comp["hashes"] = [ + { + "alg": "SHA-256", + "content": checksum, + } + ] + # Add external references external_refs = [] diff --git a/sbom/internal/generator/sbom_generator.py b/sbom/internal/generator/sbom_generator.py index 4c79960..3b18470 100644 --- a/sbom/internal/generator/sbom_generator.py +++ b/sbom/internal/generator/sbom_generator.py @@ -17,6 +17,171 @@ from sbom.internal.generator.cyclonedx_formatter import generate_cyclonedx +def load_crates_cache() -> dict[str, Any]: + """Load pre-generated crates metadata cache. + + Returns: + Dict mapping crate name to metadata (license, checksum, etc.) + """ + # Try multiple paths for cache file + possible_paths = [ + # Bazel runfiles location + Path(__file__).parent.parent.parent.parent.parent / "crates_metadata.json", + # Development/source tree location + Path(__file__).parent.parent.parent / "crates_metadata.json", + # Same directory as script + Path(__file__).parent / "crates_metadata.json", + ] + + for cache_path in possible_paths: + if cache_path.exists(): + try: + with open(cache_path, encoding="utf-8") as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + continue + + # No cache found + return {} + + +def load_cpp_cache() -> dict[str, Any]: + """Load pre-generated C++ dependency metadata cache. + + Returns: + Dict mapping dependency name to metadata (license, supplier, version, etc.) + """ + possible_paths = [ + Path(__file__).parent.parent.parent.parent.parent / "cpp_metadata.json", + Path(__file__).parent.parent.parent / "cpp_metadata.json", + Path(__file__).parent / "cpp_metadata.json", + ] + + for cache_path in possible_paths: + if cache_path.exists(): + try: + with open(cache_path, encoding="utf-8") as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + continue + + return {} + + +def cpp_cache_to_components(cpp_cache: dict[str, Any]) -> list[dict[str, Any]]: + """Convert C++ metadata cache to component list for enrichment. + + Args: + cpp_cache: Dict mapping dep name to metadata + + Returns: + List of component dicts in internal format + """ + components = [] + for name, data in cpp_cache.items(): + version = data.get("version", "unknown") + component = { + "name": name, + "version": version, + "purl": data.get("purl", f"pkg:generic/{name}@{version}"), + "type": "library", + "license": data.get("license", ""), + "supplier": data.get("supplier", ""), + } + if data.get("url"): + component["url"] = data["url"] + components.append(component) + return components + + +def normalize_name(name: str) -> str: + """Normalize a dependency name for fuzzy matching. + + Handles naming differences between Bazel repos and C++ metadata cache: + e.g. nlohmann_json vs nlohmann-json, libfmt vs fmt. + + Args: + name: Dependency name to normalize + + Returns: + Normalized name string for comparison + """ + n = name.lower().strip() + for prefix in ("lib", "lib_"): + if n.startswith(prefix) and len(n) > len(prefix): + n = n[len(prefix):] + n = n.replace("-", "").replace("_", "").replace(".", "") + return n + + +def enrich_components_from_cpp_cache( + components: list[dict[str, Any]], + cpp_components: list[dict[str, Any]], + metadata: dict[str, Any], +) -> list[dict[str, Any]]: + """Enrich Bazel-discovered components with C++ metadata cache. + + For each Bazel component, finds a matching C++ cache entry by normalized + name and fills in missing fields (license, supplier, version, purl). + Unmatched cache entries are appended. + + Args: + components: Bazel-discovered components to enrich + cpp_components: Components from C++ metadata cache + metadata: Metadata dict + + Returns: + Enriched list of components + """ + # Build lookup: normalized_name -> cache component + cpp_by_name: dict[str, dict[str, Any]] = {} + for cc in cpp_components: + norm = normalize_name(cc["name"]) + cpp_by_name[norm] = cc + cpp_by_name[cc["name"].lower()] = cc + + matched_norms: set[str] = set() + + for comp in components: + comp_name = comp.get("name", "") + norm_name = normalize_name(comp_name) + + cpp_match = cpp_by_name.get(norm_name) or cpp_by_name.get(comp_name.lower()) + if not cpp_match: + continue + + matched_norms.add(normalize_name(cpp_match["name"])) + + # Enrich missing fields only + if not comp.get("license") and cpp_match.get("license"): + comp["license"] = cpp_match["license"] + + if not comp.get("supplier") and cpp_match.get("supplier"): + comp["supplier"] = cpp_match["supplier"] + + if comp.get("version") in ("unknown", "") and cpp_match.get("version") not in ("unknown", ""): + comp["version"] = cpp_match["version"] + + if comp.get("purl", "").endswith("@unknown") and cpp_match.get("purl"): + comp["purl"] = cpp_match["purl"] + + if not comp.get("url") and cpp_match.get("url"): + comp["url"] = cpp_match["url"] + + if not comp.get("checksum") and cpp_match.get("checksum"): + comp["checksum"] = cpp_match["checksum"] + + # Append unmatched cache components not already in Bazel's graph + existing_norms = {normalize_name(c.get("name", "")) for c in components} + for cc in cpp_components: + norm = normalize_name(cc["name"]) + if norm not in existing_norms and norm not in matched_norms: + cc["source"] = "cdxgen" + components.append(cc) + + return components + + def main() -> int: """Main entry point for SBOM generation.""" parser = argparse.ArgumentParser(description="Generate SBOM from Bazel deps") @@ -51,6 +216,20 @@ def main() -> int: except (OSError, json.JSONDecodeError): pass # Skip files that can't be read + # Load crates metadata cache (licenses + checksums + versions) + crates_cache = load_crates_cache() + + # Add crates cache to metadata + if crates_cache: + if "crates" not in metadata: + metadata["crates"] = {} + for name, cache_data in crates_cache.items(): + metadata["crates"].setdefault(name, cache_data) + + # Load C++ metadata cache (auto-discovered, like crates cache) + cpp_cache = load_cpp_cache() + cpp_components = cpp_cache_to_components(cpp_cache) if cpp_cache else [] + # Filter external repos (exclude build tools) external_repos = data.get("external_repos", []) exclude_patterns = data.get("exclude_patterns", []) @@ -67,6 +246,13 @@ def main() -> int: # Deduplicate components by name components = deduplicate_components(components) + # Enrich components with C++ metadata cache + if cpp_components: + components = enrich_components_from_cpp_cache( + components, cpp_components, metadata + ) + components = deduplicate_components(components) + # Generate timestamp in SPDX-compliant format (YYYY-MM-DDTHH:MM:SSZ) timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") @@ -132,24 +318,20 @@ def resolve_component( # Normalize repo name - bzlmod adds "+" suffix to module repos normalized_name = repo_name.rstrip("+") - # Get license overrides - licenses = metadata.get("licenses", {}) - # Check if it's a bazel_dep module modules = metadata.get("modules", {}) if normalized_name in modules: mod = modules[normalized_name] - license_info = licenses.get(normalized_name, {}) return { "name": normalized_name, "version": mod.get("version", "unknown"), "purl": mod.get("purl", f"pkg:bazel/{normalized_name}@unknown"), "type": "library", - "supplier": mod.get("supplier", "") or license_info.get("supplier", ""), - "license": mod.get("license", "") or license_info.get("license", ""), + "supplier": mod.get("supplier", ""), + "license": mod.get("license", ""), } - # Check if it's an http_archive dependency (declared via sbom.http_archive or sbom.license) + # Check if it's an http_archive dependency http_archives = metadata.get("http_archives", {}) if normalized_name in http_archives: archive = http_archives[normalized_name] @@ -163,7 +345,7 @@ def resolve_component( "supplier": archive.get("supplier", ""), } - # Check if it's a git_repository dependency (declared via sbom.git_repository or sbom.license) + # Check if it's a git_repository dependency git_repos = metadata.get("git_repositories", {}) if normalized_name in git_repos: repo = git_repos[normalized_name] @@ -177,11 +359,11 @@ def resolve_component( "supplier": repo.get("supplier", ""), } - # Check if it's a crate declared via sbom.license(type="cargo") + # Check if it's a crate from the metadata cache crates = metadata.get("crates", {}) if normalized_name in crates: crate = crates[normalized_name] - return { + result = { "name": normalized_name, "version": crate.get("version", "unknown"), "purl": crate.get("purl", f"pkg:cargo/{normalized_name}@unknown"), @@ -189,37 +371,25 @@ def resolve_component( "license": crate.get("license", ""), "supplier": crate.get("supplier", ""), } - - # Check if it's a bazel_dep module with license info (third-party modules - # that don't use the sbom extension won't appear in 'modules', but their - # license/supplier info is stored in 'licenses' via sbom.license() tags) - if normalized_name in licenses: - license_info = licenses[normalized_name] - return { - "name": normalized_name, - "version": "unknown", - "purl": license_info.get("purl") or f"pkg:bazel/{normalized_name}@unknown", - "type": "library", - "license": license_info.get("license", ""), - "supplier": license_info.get("supplier", ""), - } + if crate.get("checksum"): + result["checksum"] = crate["checksum"] + return result # Handle score_ prefixed repos that might be modules if normalized_name.startswith("score_"): - license_info = licenses.get(normalized_name, {}) return { "name": normalized_name, "version": "unknown", "purl": f"pkg:github/eclipse-score/{normalized_name}@unknown", "type": "library", - "supplier": license_info.get("supplier", "") or "Eclipse Foundation", - "license": license_info.get("license", ""), + "supplier": "Eclipse Foundation", + "license": "", } # Handle crate universe repos - bzlmod format # e.g., rules_rust++crate+crate_index__serde-1.0.228 # e.g., rules_rust++crate+crate_index__iceoryx2-qnx8-0.7.0 - manual_crates = metadata.get("crates", {}) + cached_crates = metadata.get("crates", {}) if "crate_index__" in repo_name or "crate+" in repo_name: # Extract the crate info part after crate_index__ @@ -243,8 +413,8 @@ def resolve_component( crate_name = "-".join(parts[:version_idx]).replace("_", "-") version = "-".join(parts[version_idx:]) - # Look up manual sbom.license(type="cargo") declarations - crate_meta = manual_crates.get(crate_name, {}) + # Look up crate metadata from cache + crate_meta = cached_crates.get(crate_name, {}) result = { "name": crate_name, @@ -258,6 +428,8 @@ def resolve_component( result["supplier"] = crate_meta["supplier"] if crate_meta.get("repository"): result["url"] = crate_meta["repository"] + if crate_meta.get("checksum"): + result["checksum"] = crate_meta["checksum"] return result # Handle legacy crate universe format (e.g., crates_io__tokio-1.10.0) @@ -271,8 +443,8 @@ def resolve_component( crate_name = crate_info[:last_hyphen].replace("_", "-") version = crate_info[last_hyphen + 1 :] - # Look up manual sbom.license(type="cargo") declarations - crate_meta = manual_crates.get(crate_name, {}) + # Look up crate metadata from cache + crate_meta = cached_crates.get(crate_name, {}) result = { "name": crate_name, @@ -286,6 +458,8 @@ def resolve_component( result["supplier"] = crate_meta["supplier"] if crate_meta.get("repository"): result["url"] = crate_meta["repository"] + if crate_meta.get("checksum"): + result["checksum"] = crate_meta["checksum"] return result # Unknown repository - return with unknown version diff --git a/sbom/scripts/generate_cpp_metadata_cache.py b/sbom/scripts/generate_cpp_metadata_cache.py new file mode 100644 index 0000000..cbc0ea1 --- /dev/null +++ b/sbom/scripts/generate_cpp_metadata_cache.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +"""Generate cpp_metadata.json cache from cdxgen CycloneDX output. + +Usage: + # Generate from cdxgen output: + npx @cyclonedx/cdxgen -t cpp --deep -r -o cdxgen_output.cdx.json + python3 generate_cpp_metadata_cache.py cdxgen_output.cdx.json ../cpp_metadata.json + + # Or pipe directly: + npx @cyclonedx/cdxgen -t cpp --deep -r | python3 generate_cpp_metadata_cache.py - ../cpp_metadata.json +""" + +import argparse +import json +import sys + + +def convert_cdxgen_to_cache(cdxgen_path: str) -> dict: + """Convert CycloneDX JSON from cdxgen to internal cache format.""" + if cdxgen_path == "-": + cdx_data = json.load(sys.stdin) + else: + with open(cdxgen_path, encoding="utf-8") as f: + cdx_data = json.load(f) + + if cdx_data.get("bomFormat") != "CycloneDX": + print("Error: Input is not a CycloneDX JSON file", file=sys.stderr) + sys.exit(1) + + cache = {} + for comp in cdx_data.get("components", []): + name = comp.get("name", "") + if not name: + continue + + entry = { + "version": comp.get("version", "unknown"), + } + + # License + licenses = comp.get("licenses", []) + if licenses: + first = licenses[0] + lic_obj = first.get("license", {}) + lic_id = lic_obj.get("id", "") or lic_obj.get("name", "") + if not lic_id: + lic_id = first.get("expression", "") + if lic_id: + entry["license"] = lic_id + + # Supplier + supplier = comp.get("supplier", {}) + if supplier and supplier.get("name"): + entry["supplier"] = supplier["name"] + elif comp.get("publisher"): + entry["supplier"] = comp["publisher"] + + # PURL + if comp.get("purl"): + entry["purl"] = comp["purl"] + + # URL from externalReferences + for ref in comp.get("externalReferences", []): + if ref.get("type") in ("website", "distribution", "vcs") and ref.get("url"): + entry["url"] = ref["url"] + break + + cache[name] = entry + + return cache + + +def main(): + parser = argparse.ArgumentParser( + description="Convert cdxgen CycloneDX output to cpp_metadata.json cache" + ) + parser.add_argument("input", help="cdxgen CycloneDX JSON file (or - for stdin)") + parser.add_argument( + "output", + nargs="?", + default="cpp_metadata.json", + help="Output cache file (default: cpp_metadata.json)", + ) + parser.add_argument( + "--merge", + help="Merge with existing cache file (existing entries take precedence)", + ) + args = parser.parse_args() + + cache = convert_cdxgen_to_cache(args.input) + + if args.merge: + try: + with open(args.merge, encoding="utf-8") as f: + existing = json.load(f) + # Existing entries take precedence + for name, data in cache.items(): + if name not in existing: + existing[name] = data + cache = existing + except (OSError, json.JSONDecodeError): + pass + + with open(args.output, "w", encoding="utf-8") as f: + json.dump(cache, f, indent=2, sort_keys=True) + f.write("\n") + + print(f"Generated {args.output} with {len(cache)} C++ dependencies") + + +if __name__ == "__main__": + main() diff --git a/sbom/scripts/generate_crates_metadata_cache.py b/sbom/scripts/generate_crates_metadata_cache.py new file mode 100755 index 0000000..11eb4c3 --- /dev/null +++ b/sbom/scripts/generate_crates_metadata_cache.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +"""Generate crates.io metadata cache for SBOM generation. + +This script parses Cargo.lock files and fetches license metadata from crates.io, +creating a cache file that can be used during SBOM generation without requiring +network access at build time. + +Usage: + python3 generate_crates_metadata_cache.py [output.json] + +Example: + python3 generate_crates_metadata_cache.py ../../orchestrator/Cargo.lock crates_metadata.json +""" + +import argparse +import json +import sys +import urllib.request +import urllib.error +from pathlib import Path +from typing import Dict, Any + + +def parse_cargo_lock(lockfile_path: str) -> Dict[str, Dict[str, Any]]: + """Parse Cargo.lock and extract crate information. + + Args: + lockfile_path: Path to Cargo.lock file + + Returns: + Dict mapping crate name to {version, checksum, source} + """ + try: + import tomllib as tomli # Python 3.11+ + except ImportError: + try: + import tomli + except ImportError: + print("ERROR: tomli/tomllib library not found. Use Python 3.11+ or install tomli", file=sys.stderr) + sys.exit(1) + + with open(lockfile_path, 'rb') as f: + lock_data = tomli.load(f) + + crates = {} + for package in lock_data.get('package', []): + name = package['name'] + source = package.get('source', '') + + # Only include crates from crates.io + if 'registry+https://github.com/rust-lang/crates.io-index' in source: + crates[name] = { + 'name': name, + 'version': package['version'], + 'checksum': package.get('checksum', ''), + 'source': source, + } + + return crates + + +def fetch_crate_metadata_from_crates_io(crate_name: str) -> Dict[str, Any]: + """Fetch crate metadata from crates.io API. + + Args: + crate_name: Name of the crate + + Returns: + Dict with license, repository, description, etc. + """ + url = f"https://crates.io/api/v1/crates/{crate_name}" + + try: + req = urllib.request.Request(url) + req.add_header('User-Agent', 'SCORE-SBOM-Generator/1.0') + + with urllib.request.urlopen(req, timeout=10) as response: + data = json.loads(response.read().decode('utf-8')) + crate = data.get('crate', {}) + + return { + 'license': crate.get('license'), + 'repository': crate.get('repository'), + 'description': crate.get('description'), + 'homepage': crate.get('homepage'), + 'documentation': crate.get('documentation'), + } + except urllib.error.HTTPError as e: + if e.code == 404: + print(f" WARNING: Crate '{crate_name}' not found on crates.io", file=sys.stderr) + else: + print(f" WARNING: HTTP error {e.code} fetching '{crate_name}'", file=sys.stderr) + return {} + except urllib.error.URLError as e: + print(f" WARNING: Network error fetching '{crate_name}': {e}", file=sys.stderr) + return {} + except Exception as e: + print(f" WARNING: Error fetching '{crate_name}': {e}", file=sys.stderr) + return {} + + +def generate_cache(cargo_lock_path: str, use_network: bool = True) -> Dict[str, Dict[str, Any]]: + """Generate metadata cache from Cargo.lock with optional crates.io lookup. + + Args: + cargo_lock_path: Path to Cargo.lock file + use_network: If True, fetch metadata from crates.io; if False, use checksums only + + Returns: + Dict mapping crate name to metadata + """ + print(f"Parsing {cargo_lock_path}...") + crates = parse_cargo_lock(cargo_lock_path) + + print(f"Found {len(crates)} crates from crates.io") + + if not use_network: + print("Network lookups disabled. Using checksums only.") + return crates + + print("Fetching license metadata from crates.io...") + cache = {} + + for i, (name, info) in enumerate(crates.items(), 1): + print(f" [{i}/{len(crates)}] {name} {info['version']}...", end='', flush=True) + + metadata = fetch_crate_metadata_from_crates_io(name) + + # Merge Cargo.lock data with crates.io metadata + cache[name] = { + 'version': info['version'], + 'checksum': info['checksum'], + 'purl': f"pkg:cargo/{name}@{info['version']}", + 'license': metadata.get('license', ''), + 'repository': metadata.get('repository', ''), + 'description': metadata.get('description', ''), + 'homepage': metadata.get('homepage', ''), + } + + if cache[name]['license']: + print(f" ✓ {cache[name]['license']}") + else: + print(" (no license)") + + return cache + + +def main(): + parser = argparse.ArgumentParser( + description='Generate crates.io metadata cache for SBOM generation' + ) + parser.add_argument( + 'cargo_lock', + help='Path to Cargo.lock file' + ) + parser.add_argument( + 'output', + nargs='?', + default='crates_metadata.json', + help='Output JSON file (default: crates_metadata.json)' + ) + parser.add_argument( + '--no-network', + action='store_true', + help='Skip network lookups (checksums only)' + ) + parser.add_argument( + '--merge', + help='Merge with existing cache file instead of overwriting' + ) + + args = parser.parse_args() + + # Generate new cache + cache = generate_cache(args.cargo_lock, use_network=not args.no_network) + + # Merge with existing cache if requested + if args.merge and Path(args.merge).exists(): + print(f"\nMerging with existing cache: {args.merge}") + with open(args.merge) as f: + existing = json.load(f) + + # Prefer new data, but keep entries not in current Cargo.lock + merged = existing.copy() + merged.update(cache) + cache = merged + print(f"Merged cache now contains {len(cache)} entries") + + # Write cache + print(f"\nWriting cache to {args.output}...") + with open(args.output, 'w') as f: + json.dump(cache, f, indent=2, sort_keys=True) + + # Print statistics + with_license = sum(1 for c in cache.values() if c.get('license')) + with_checksum = sum(1 for c in cache.values() if c.get('checksum')) + + print(f"\n✓ Cache generated successfully!") + print(f" Total crates: {len(cache)}") + print(f" With licenses: {with_license} ({with_license/len(cache)*100:.1f}%)") + print(f" With checksums: {with_checksum} ({with_checksum/len(cache)*100:.1f}%)") + + return 0 + + +if __name__ == '__main__': + sys.exit(main())