From 6e8fc61c719180428175254db2a3aeacf7cc098a Mon Sep 17 00:00:00 2001 From: Jim Crossley Date: Thu, 15 Jan 2026 18:16:03 -0500 Subject: [PATCH 1/6] feat: Better support for CycloneDX component types Specifically files, machine learning models, and cryptographic assets. We've introduced place holders for the latter 2 until migrations for new tables are added in a subsequent commit. This indirectly fixes #2205 because it prevents cryptographic assets from being displayed as packages for CBOM's, but there's not yet a service API that would allow you to fetch those assets. That'll come in a separate commit. (cherry picked from commit 06364ad8e30aa21d7df39fcb4d60ed275f5d8645) --- .../src/endpoints/tests/latest_filters.rs | 6 +- .../graph/sbom/common/cryptographic_asset.rs | 42 +++++ .../sbom/common/machine_learning_model.rs | 42 +++++ modules/ingestor/src/graph/sbom/common/mod.rs | 4 + modules/ingestor/src/graph/sbom/cyclonedx.rs | 175 ++++++++++++++++-- 5 files changed, 251 insertions(+), 18 deletions(-) create mode 100644 modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs create mode 100644 modules/ingestor/src/graph/sbom/common/machine_learning_model.rs diff --git a/modules/analysis/src/endpoints/tests/latest_filters.rs b/modules/analysis/src/endpoints/tests/latest_filters.rs index 937ba9c86..916eaa352 100644 --- a/modules/analysis/src/endpoints/tests/latest_filters.rs +++ b/modules/analysis/src/endpoints/tests/latest_filters.rs @@ -518,7 +518,7 @@ async fn parse_ids_find_only_exact_matches( #[test_context(TrustifyContext)] #[rstest] #[test_log::test(actix_web::test)] -async fn test_tc2578( +async fn test_tc2758( ctx: &TrustifyContext, #[values(false, true)] prime_cache: bool, ) -> Result<(), anyhow::Error> { @@ -579,8 +579,8 @@ async fn test_tc2578( "relationship": "package", "descendants": [ { - "node_id": "pkg:maven/org.jboss.eap/wildfly-ee-aggregate-javadocs@7.4.0.GA-redhat-00005?classifier=javadocs&type=jar", - "name": "wildfly-ee-aggregate-javadocs", + "node_id": "pkg:generic/pom.xml?checksum=sha256%3A974823188145bdb517f9692341a237bdee75c8312d3c86ae0fc4d390225bb923", + "name": "pom.xml", "relationship": "dependency", }] }] diff --git a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs new file mode 100644 index 000000000..b0fc255a4 --- /dev/null +++ b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs @@ -0,0 +1,42 @@ +use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; +use sea_orm::{ConnectionTrait, DbErr}; +use uuid::Uuid; + +// Creator of files and relationships. +pub struct CryptographicAssetCreator { + nodes: NodeCreator, +} + +impl CryptographicAssetCreator { + pub fn new(sbom_id: Uuid) -> Self { + Self { + nodes: NodeCreator::new(sbom_id), + } + } + + pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { + Self { + nodes: NodeCreator::with_capacity(sbom_id, capacity_files), + } + } + + pub fn add(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.nodes.add(node_id.clone(), name, checksums); + } + + pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { + self.nodes.create(db).await?; + + Ok(()) + } +} + +impl<'a> ReferenceSource<'a> for CryptographicAssetCreator { + fn references(&'a self) -> impl IntoIterator { + self.nodes.references() + } +} diff --git a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs new file mode 100644 index 000000000..f9862f62a --- /dev/null +++ b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs @@ -0,0 +1,42 @@ +use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; +use sea_orm::{ConnectionTrait, DbErr}; +use uuid::Uuid; + +// Creator of files and relationships. +pub struct MachineLearningModelCreator { + nodes: NodeCreator, +} + +impl MachineLearningModelCreator { + pub fn new(sbom_id: Uuid) -> Self { + Self { + nodes: NodeCreator::new(sbom_id), + } + } + + pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { + Self { + nodes: NodeCreator::with_capacity(sbom_id, capacity_files), + } + } + + pub fn add(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.nodes.add(node_id.clone(), name, checksums); + } + + pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { + self.nodes.create(db).await?; + + Ok(()) + } +} + +impl<'a> ReferenceSource<'a> for MachineLearningModelCreator { + fn references(&'a self) -> impl IntoIterator { + self.nodes.references() + } +} diff --git a/modules/ingestor/src/graph/sbom/common/mod.rs b/modules/ingestor/src/graph/sbom/common/mod.rs index 8ed48e4ea..33e4e6f55 100644 --- a/modules/ingestor/src/graph/sbom/common/mod.rs +++ b/modules/ingestor/src/graph/sbom/common/mod.rs @@ -1,16 +1,20 @@ mod checksum; +mod cryptographic_asset; mod external; mod file; mod license; mod licensing_info; +mod machine_learning_model; mod node; mod package; mod relationship; pub use checksum::*; +pub use cryptographic_asset::*; pub use external::*; pub use file::*; pub use license::*; pub use licensing_info::*; +pub use machine_learning_model::*; pub use package::*; pub use relationship::*; diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index 31f0e5954..ceb64b648 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -4,9 +4,10 @@ use crate::{ product::ProductInformation, purl::creator::PurlCreator, sbom::{ - CycloneDx as CycloneDxProcessor, LicenseCreator, LicenseInfo, NodeInfoParam, - PackageCreator, PackageLicensenInfo, PackageReference, References, RelationshipCreator, - SbomContext, SbomInformation, + CryptographicAssetCreator, CycloneDx as CycloneDxProcessor, LicenseCreator, + LicenseInfo, MachineLearningModelCreator, NodeInfoParam, PackageCreator, + PackageLicensenInfo, PackageReference, References, RelationshipCreator, SbomContext, + SbomInformation, processor::{ InitContext, PostContext, Processor, RedHatProductComponentRelationships, RunProcessors, @@ -31,6 +32,8 @@ use trustify_common::{cpe::Cpe, purl::Purl}; use trustify_entity::relationship::Relationship; use uuid::Uuid; +use super::FileCreator; + /// Marker we use for identifying the document itself. /// /// Similar to the SPDX doc id, which is attached to the document itself. CycloneDX doesn't have @@ -289,6 +292,9 @@ impl<'a> Creator<'a> { let mut purls = PurlCreator::new(); let mut cpes = CpeCreator::new(); let mut packages = PackageCreator::with_capacity(self.sbom_id, self.components.len()); + let mut files = FileCreator::new(self.sbom_id); + let mut models = MachineLearningModelCreator::new(self.sbom_id); + let mut crypto = CryptographicAssetCreator::new(self.sbom_id); let mut relationships = RelationshipCreator::with_capacity( self.sbom_id, self.relations.len(), @@ -302,6 +308,9 @@ impl<'a> Creator<'a> { &mut purls, &mut licenses, &mut packages, + &mut files, + &mut models, + &mut crypto, &mut relationships, ); creator.create(comp); @@ -326,7 +335,10 @@ impl<'a> Creator<'a> { let sources = References::new() .add_source(&[CYCLONEDX_DOC_REF]) - .add_source(&packages); + .add_source(&packages) + .add_source(&files) + .add_source(&models) + .add_source(&crypto); relationships .validate(sources) .map_err(Error::InvalidContent)?; @@ -338,6 +350,9 @@ impl<'a> Creator<'a> { purls.create(db).await?; cpes.create(db).await?; packages.create(db).await?; + files.create(db).await?; + models.create(db).await?; + crypto.create(db).await?; relationships.create(db).await?; // done @@ -351,6 +366,9 @@ struct ComponentCreator<'a> { purls: &'a mut PurlCreator, licenses: &'a mut LicenseCreator, packages: &'a mut PackageCreator, + files: &'a mut FileCreator, + models: &'a mut MachineLearningModelCreator, + crypto: &'a mut CryptographicAssetCreator, relationships: &'a mut RelationshipCreator, refs: Vec, @@ -362,6 +380,9 @@ impl<'a> ComponentCreator<'a> { purls: &'a mut PurlCreator, licenses: &'a mut LicenseCreator, packages: &'a mut PackageCreator, + files: &'a mut FileCreator, + models: &'a mut MachineLearningModelCreator, + crypto: &'a mut CryptographicAssetCreator, relationships: &'a mut RelationshipCreator, ) -> Self { Self { @@ -370,6 +391,9 @@ impl<'a> ComponentCreator<'a> { licenses, refs: Default::default(), packages, + files, + models, + crypto, relationships, } } @@ -438,17 +462,50 @@ impl<'a> ComponentCreator<'a> { }) .collect::>(); - self.packages.add( - NodeInfoParam { - node_id: node_id.clone(), - name: comp.name.to_string(), - group: comp.group.as_ref().map(|v| v.to_string()), - version: comp.version.as_ref().map(|v| v.to_string()), - package_license_info: cyclone_licenses, - }, - self.refs, - comp.hashes.clone().into_iter().flatten(), - ); + match ComponentType::from_str(&comp.type_) { + Ok(ty) => { + use ComponentType::*; + match ty { + // We treat all these types as "packages" + Application | Framework | Library | Container | OperatingSystem => { + self.packages.add( + NodeInfoParam { + node_id: node_id.clone(), + name: comp.name.to_string(), + group: comp.group.as_ref().map(|v| v.to_string()), + version: comp.version.as_ref().map(|v| v.to_string()), + package_license_info: cyclone_licenses, + }, + self.refs, + comp.hashes.clone().into_iter().flatten(), + ) + } + File => { + self.files.add( + node_id.clone(), + comp.name.to_string(), + comp.hashes.clone().into_iter().flatten(), + ); + } + MachineLearningModel => { + self.models.add( + node_id.clone(), + comp.name.to_string(), + comp.hashes.clone().into_iter().flatten(), + ); + } + CryptographicAsset => { + self.crypto.add( + node_id.clone(), + comp.name.to_string(), + comp.hashes.clone().into_iter().flatten(), + ); + } + _ => log::error!("Unsupported component type: '{ty}'"), + } + } + Err(e) => log::error!("Invalid component type: {e}"), + } for ancestor in comp .pedigree @@ -467,6 +524,9 @@ impl<'a> ComponentCreator<'a> { self.purls, self.licenses, self.packages, + self.files, + self.models, + self.crypto, self.relationships, )); @@ -494,6 +554,9 @@ impl<'a> ComponentCreator<'a> { self.purls, self.licenses, self.packages, + self.files, + self.models, + self.crypto, self.relationships, )); @@ -550,3 +613,85 @@ impl<'a> ComponentCreator<'a> { license_uuid } } + +/// Type of the components within an SBOM, mostly based on +/// https://cyclonedx.org/docs/1.6/json/#components_items_type +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + serde::Serialize, + serde::Deserialize, + strum::EnumString, + strum::Display, +)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case", ascii_case_insensitive)] +pub enum ComponentType { + /// A software application + Application, + /// A software framework + Framework, + /// A software library + Library, + /// A packaging and/or runtime format + Container, + /// A runtime environment which interprets or executes software + Platform, + /// A software operating system without regard to deployment model + OperatingSystem, + /// A hardware device such as a processor or chip-set + Device, + /// A special type of software that operates or controls a particular type of device + DeviceDriver, + /// A special type of software that provides low-level control over a device's hardware + Firmware, + /// A computer file + File, + /// A model based on training data that can make predictions or decisions without being explicitly programmed to do so + MachineLearningModel, + /// A collection of discrete values that convey information + Data, + /// A cryptographic asset including algorithms, protocols, certificates, keys, tokens, and secrets + CryptographicAsset, +} + +#[cfg(test)] +mod test { + use super::*; + use serde_json::json; + use std::str::FromStr; + use test_log::test; + + #[test] + fn component_types() { + use ComponentType::*; + + // The standard conversions + for (s, t) in [ + ("application", Application), + ("framework", Framework), + ("library", Library), + ("container", Container), + ("platform", Platform), + ("operating-system", OperatingSystem), + ("device", Device), + ("device-driver", DeviceDriver), + ("firmware", Firmware), + ("file", File), + ("machine-learning-model", MachineLearningModel), + ("data", Data), + ("cryptographic-asset", CryptographicAsset), + ] { + assert_eq!(ComponentType::from_str(s), Ok(t)); + assert_eq!(t.to_string(), s); + assert_eq!(json!(t), json!(s)); + } + + // Error handling + assert!(ComponentType::from_str("missing").is_err()); + assert_eq!(ComponentType::from_str("FiLe"), Ok(File)); + } +} From 2ffd92e4528426b5ff8b596a7711bda62c1f52bf Mon Sep 17 00:00:00 2001 From: Jim Crossley Date: Thu, 15 Jan 2026 19:03:03 -0500 Subject: [PATCH 2/6] Minor fixes (cherry picked from commit cb0107b47ff87608bde4139e061118695baacb0e) --- modules/fundamental/tests/vuln/mod.rs | 2 +- modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs | 1 - .../ingestor/src/graph/sbom/common/machine_learning_model.rs | 1 - modules/ingestor/src/graph/sbom/cyclonedx.rs | 1 + rustfmt.toml | 1 + 5 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/fundamental/tests/vuln/mod.rs b/modules/fundamental/tests/vuln/mod.rs index 99ed65bf7..cb14ce373 100644 --- a/modules/fundamental/tests/vuln/mod.rs +++ b/modules/fundamental/tests/vuln/mod.rs @@ -18,7 +18,7 @@ async fn issue_1840(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { .analyze_purls(["pkg:rpm/redhat/gnutls@3.7.6-23.el9?arch=aarch64"], &ctx.db) .await?; - println!("{:#?}", result); + log::debug!("{:#?}", result); // check number of PURLs diff --git a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs index b0fc255a4..efba7a7ac 100644 --- a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs +++ b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs @@ -2,7 +2,6 @@ use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ConnectionTrait, DbErr}; use uuid::Uuid; -// Creator of files and relationships. pub struct CryptographicAssetCreator { nodes: NodeCreator, } diff --git a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs index f9862f62a..223d3c5d2 100644 --- a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs +++ b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs @@ -2,7 +2,6 @@ use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ConnectionTrait, DbErr}; use uuid::Uuid; -// Creator of files and relationships. pub struct MachineLearningModelCreator { nodes: NodeCreator, } diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index ceb64b648..88030a6a4 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -375,6 +375,7 @@ struct ComponentCreator<'a> { } impl<'a> ComponentCreator<'a> { + #[allow(clippy::too_many_arguments)] pub fn new( cpes: &'a mut CpeCreator, purls: &'a mut PurlCreator, diff --git a/rustfmt.toml b/rustfmt.toml index 350113681..f3e454b61 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1 +1,2 @@ +edition = "2024" style_edition = "2024" From 98b2dfd29b60522e18230c51d790dbf4291f7b01 Mon Sep 17 00:00:00 2001 From: Jim Crossley Date: Sun, 18 Jan 2026 22:04:15 -0500 Subject: [PATCH 3/6] refactor: encapsulate ComponentCreator a bit Also made the use of the `refs` field a bit more clear and reduced some cloning. (cherry picked from commit 6c841717c997ff492f709483a31fd93f5eb4df29) --- .../ingestor/src/graph/sbom/common/package.rs | 6 +- modules/ingestor/src/graph/sbom/cyclonedx.rs | 220 ++++++++---------- modules/ingestor/src/graph/sbom/mod.rs | 7 +- modules/ingestor/src/graph/sbom/spdx.rs | 2 +- 4 files changed, 99 insertions(+), 136 deletions(-) diff --git a/modules/ingestor/src/graph/sbom/common/package.rs b/modules/ingestor/src/graph/sbom/common/package.rs index 4236ab752..a8c45c6b5 100644 --- a/modules/ingestor/src/graph/sbom/common/package.rs +++ b/modules/ingestor/src/graph/sbom/common/package.rs @@ -62,10 +62,10 @@ impl PackageCreator { } } - pub fn add( + pub fn add<'a, I, C>( &mut self, node_info: NodeInfoParam, - refs: impl IntoIterator, + refs: impl Iterator, checksums: I, ) where I: IntoIterator, @@ -77,7 +77,7 @@ impl PackageCreator { self.cpe_refs.push(sbom_package_cpe_ref::ActiveModel { sbom_id: Set(self.sbom_id), node_id: Set(node_info.node_id.clone()), - cpe_id: Set(cpe), + cpe_id: Set(*cpe), }); } PackageReference::Purl(purl) => { diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index 88030a6a4..5ca53ac5e 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -25,7 +25,7 @@ use sea_orm::ConnectionTrait; use serde_cyclonedx::cyclonedx::v_1_6::{ Component, ComponentEvidenceIdentity, CycloneDx, LicenseChoiceUrl, OrganizationalContact, }; -use std::{borrow::Cow, str::FromStr}; +use std::{borrow::Cow, collections::HashMap, str::FromStr}; use time::{OffsetDateTime, format_description::well_known::Iso8601}; use tracing::instrument; use trustify_common::{cpe::Cpe, purl::Purl}; @@ -289,71 +289,24 @@ impl<'a> Creator<'a> { db: &impl ConnectionTrait, processors: &mut [Box], ) -> Result<(), Error> { - let mut purls = PurlCreator::new(); - let mut cpes = CpeCreator::new(); - let mut packages = PackageCreator::with_capacity(self.sbom_id, self.components.len()); - let mut files = FileCreator::new(self.sbom_id); - let mut models = MachineLearningModelCreator::new(self.sbom_id); - let mut crypto = CryptographicAssetCreator::new(self.sbom_id); - let mut relationships = RelationshipCreator::with_capacity( - self.sbom_id, - self.relations.len(), - CycloneDxProcessor, - ); - let mut licenses = LicenseCreator::new(); + let mut creator = ComponentCreator::new(self.sbom_id, self.components.len()); for comp in self.components { - let creator = ComponentCreator::new( - &mut cpes, - &mut purls, - &mut licenses, - &mut packages, - &mut files, - &mut models, - &mut crypto, - &mut relationships, - ); - creator.create(comp); + creator.add(comp); } for (left, rel, right) in self.relations { - relationships.relate(left, rel, right); + creator.add_relation(left, rel, right); } // post process - - PostContext { - cpes: &cpes, - purls: &purls, - packages: &mut packages, - relationships: &mut relationships.rels, - externals: &mut relationships.externals, - } - .run(processors); + creator.post_process(processors); // validate relationships before inserting + creator.validate().map_err(Error::InvalidContent)?; - let sources = References::new() - .add_source(&[CYCLONEDX_DOC_REF]) - .add_source(&packages) - .add_source(&files) - .add_source(&models) - .add_source(&crypto); - relationships - .validate(sources) - .map_err(Error::InvalidContent)?; - - // create - order matters to prevent cross-table deadlocks when running concurrent - // SBOM ingestions. All SBOM loaders must use the same table insertion order. - - licenses.create(db).await?; - purls.create(db).await?; - cpes.create(db).await?; - packages.create(db).await?; - files.create(db).await?; - models.create(db).await?; - crypto.create(db).await?; - relationships.create(db).await?; + // write to db + creator.create(db).await?; // done @@ -361,45 +314,35 @@ impl<'a> Creator<'a> { } } -struct ComponentCreator<'a> { - cpes: &'a mut CpeCreator, - purls: &'a mut PurlCreator, - licenses: &'a mut LicenseCreator, - packages: &'a mut PackageCreator, - files: &'a mut FileCreator, - models: &'a mut MachineLearningModelCreator, - crypto: &'a mut CryptographicAssetCreator, - relationships: &'a mut RelationshipCreator, - - refs: Vec, +struct ComponentCreator { + cpes: CpeCreator, + purls: PurlCreator, + licenses: LicenseCreator, + packages: PackageCreator, + files: FileCreator, + models: MachineLearningModelCreator, + crypto: CryptographicAssetCreator, + relationships: RelationshipCreator, + // Map each node to a collection of references + refs: HashMap>, } -impl<'a> ComponentCreator<'a> { - #[allow(clippy::too_many_arguments)] - pub fn new( - cpes: &'a mut CpeCreator, - purls: &'a mut PurlCreator, - licenses: &'a mut LicenseCreator, - packages: &'a mut PackageCreator, - files: &'a mut FileCreator, - models: &'a mut MachineLearningModelCreator, - crypto: &'a mut CryptographicAssetCreator, - relationships: &'a mut RelationshipCreator, - ) -> Self { +impl ComponentCreator { + pub fn new(sbom_id: Uuid, capacity: usize) -> Self { Self { - cpes, - purls, - licenses, + cpes: CpeCreator::new(), + purls: PurlCreator::new(), + licenses: LicenseCreator::new(), + packages: PackageCreator::with_capacity(sbom_id, capacity), + files: FileCreator::new(sbom_id), + models: MachineLearningModelCreator::new(sbom_id), + crypto: CryptographicAssetCreator::new(sbom_id), + relationships: RelationshipCreator::new(sbom_id, CycloneDxProcessor), refs: Default::default(), - packages, - files, - models, - crypto, - relationships, } } - pub fn create(mut self, comp: &Component) { + pub fn add(&mut self, comp: &Component) { let node_id = comp .bom_ref .clone() @@ -410,7 +353,7 @@ impl<'a> ComponentCreator<'a> { if let Some(cpe) = &comp.cpe { match Cpe::from_str(cpe.as_ref()) { Ok(cpe) => { - self.add_cpe(cpe); + self.add_cpe(node_id.clone(), cpe); } Err(err) => { log::info!("Skipping CPE due to parsing error: {err}"); @@ -421,7 +364,7 @@ impl<'a> ComponentCreator<'a> { if let Some(purl) = &comp.purl { match Purl::from_str(purl.as_ref()) { Ok(purl) => { - self.add_purl(purl); + self.add_purl(node_id.clone(), purl); } Err(err) => { log::info!("Skipping PURL due to parsing error: {err}"); @@ -442,12 +385,12 @@ impl<'a> ComponentCreator<'a> { match (identity.field.as_str(), &identity.concluded_value) { ("cpe", Some(cpe)) => { if let Ok(cpe) = Cpe::from_str(cpe.as_ref()) { - self.add_cpe(cpe); + self.add_cpe(node_id.clone(), cpe); } } ("purl", Some(purl)) => { if let Ok(purl) = Purl::from_str(purl.as_ref()) { - self.add_purl(purl); + self.add_purl(node_id.clone(), purl); } } @@ -469,6 +412,7 @@ impl<'a> ComponentCreator<'a> { match ty { // We treat all these types as "packages" Application | Framework | Library | Container | OperatingSystem => { + const EMPTY: Vec = vec![]; self.packages.add( NodeInfoParam { node_id: node_id.clone(), @@ -477,7 +421,7 @@ impl<'a> ComponentCreator<'a> { version: comp.version.as_ref().map(|v| v.to_string()), package_license_info: cyclone_licenses, }, - self.refs, + self.refs.get(&node_id).unwrap_or(&EMPTY).iter(), comp.hashes.clone().into_iter().flatten(), ) } @@ -518,24 +462,9 @@ impl<'a> ComponentCreator<'a> { .clone() .unwrap_or_else(|| Uuid::new_v4().to_string()); - // create the component + self.add(ancestor); - let creator = Box::new(ComponentCreator::new( - self.cpes, - self.purls, - self.licenses, - self.packages, - self.files, - self.models, - self.crypto, - self.relationships, - )); - - creator.create(ancestor); - - // and store a relationship - self.relationships - .relate(target, Relationship::AncestorOf, node_id.clone()); + self.add_relation(target, Relationship::AncestorOf, node_id.clone()); } for variant in comp @@ -548,34 +477,30 @@ impl<'a> ComponentCreator<'a> { .clone() .unwrap_or_else(|| Uuid::new_v4().to_string()); - // create the component - - let creator = Box::new(ComponentCreator::new( - self.cpes, - self.purls, - self.licenses, - self.packages, - self.files, - self.models, - self.crypto, - self.relationships, - )); + self.add(variant); - creator.create(variant); - - self.relationships - .relate(node_id.clone(), Relationship::Variant, target); + self.add_relation(node_id.clone(), Relationship::Variant, target); } } - pub fn add_cpe(&mut self, cpe: Cpe) { + fn add_relation(&mut self, left: String, rel: Relationship, right: String) { + self.relationships.relate(left, rel, right); + } + + fn add_cpe(&mut self, node_id: String, cpe: Cpe) { let id = cpe.uuid(); - self.refs.push(PackageReference::Cpe(id)); + self.refs + .entry(node_id) + .or_default() + .push(PackageReference::Cpe(id)); self.cpes.add(cpe); } - pub fn add_purl(&mut self, purl: Purl) { - self.refs.push(PackageReference::Purl(purl.clone())); + fn add_purl(&mut self, node_id: String, purl: Purl) { + self.refs + .entry(node_id) + .or_default() + .push(PackageReference::Purl(purl.clone())); self.purls.add(purl); } @@ -613,6 +538,43 @@ impl<'a> ComponentCreator<'a> { } license_uuid } + + fn post_process(&mut self, processors: &mut [Box]) { + PostContext { + cpes: &self.cpes, + purls: &self.purls, + packages: &mut self.packages, + relationships: &mut self.relationships.rels, + externals: &mut self.relationships.externals, + } + .run(processors); + } + + fn validate(&self) -> Result<(), anyhow::Error> { + let sources = References::new() + .add_source(&[CYCLONEDX_DOC_REF]) + .add_source(&self.packages) + .add_source(&self.files) + .add_source(&self.models) + .add_source(&self.crypto); + self.relationships.validate(sources) + } + + // order matters to prevent cross-table deadlocks when running + // concurrent SBOM ingestions. All SBOM loaders must use the same + // table insertion order. + async fn create(self, db: &impl ConnectionTrait) -> Result<(), Error> { + self.licenses.create(db).await?; + self.purls.create(db).await?; + self.cpes.create(db).await?; + self.packages.create(db).await?; + self.files.create(db).await?; + self.models.create(db).await?; + self.crypto.create(db).await?; + self.relationships.create(db).await?; + + Ok(()) + } } /// Type of the components within an SBOM, mostly based on diff --git a/modules/ingestor/src/graph/sbom/mod.rs b/modules/ingestor/src/graph/sbom/mod.rs index dcc3fca1e..a3eaff80e 100644 --- a/modules/ingestor/src/graph/sbom/mod.rs +++ b/modules/ingestor/src/graph/sbom/mod.rs @@ -602,10 +602,11 @@ impl SbomContext { ) -> Result<(), Error> { let mut creator = PackageCreator::new(self.sbom.sbom_id); - let refs = purls + let refs: Vec = purls .into_iter() .map(PackageReference::Purl) - .chain(cpes.into_iter().map(PackageReference::Cpe)); + .chain(cpes.into_iter().map(PackageReference::Cpe)) + .collect(); creator.add( NodeInfoParam { node_id, @@ -614,7 +615,7 @@ impl SbomContext { version, package_license_info: vec![], }, - refs, + refs.iter(), Checksum::NONE, ); diff --git a/modules/ingestor/src/graph/sbom/spdx.rs b/modules/ingestor/src/graph/sbom/spdx.rs index 106e84251..334905d4d 100644 --- a/modules/ingestor/src/graph/sbom/spdx.rs +++ b/modules/ingestor/src/graph/sbom/spdx.rs @@ -301,7 +301,7 @@ impl SbomContext { version: package.package_version, package_license_info, }, - refs, + refs.iter(), package.package_checksum, ); } From 34169f70acd70e867f92e70e23f45cd4be01631c Mon Sep 17 00:00:00 2001 From: Jim Crossley Date: Mon, 19 Jan 2026 12:13:32 -0500 Subject: [PATCH 4/6] Fail the ingestion if we encounter an invalid component type Also a bit of renaming and commentary (cherry picked from commit 948f9aa00ebf329267b8b471e9693c65423eba72) --- modules/ingestor/src/graph/sbom/cyclonedx.rs | 26 ++++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index 5ca53ac5e..d01c38dd3 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -292,7 +292,7 @@ impl<'a> Creator<'a> { let mut creator = ComponentCreator::new(self.sbom_id, self.components.len()); for comp in self.components { - creator.add(comp); + creator.add_component(comp)?; } for (left, rel, right) in self.relations { @@ -303,7 +303,7 @@ impl<'a> Creator<'a> { creator.post_process(processors); // validate relationships before inserting - creator.validate().map_err(Error::InvalidContent)?; + creator.validate()?; // write to db creator.create(db).await?; @@ -342,7 +342,7 @@ impl ComponentCreator { } } - pub fn add(&mut self, comp: &Component) { + pub fn add_component(&mut self, comp: &Component) -> Result<(), Error> { let node_id = comp .bom_ref .clone() @@ -433,6 +433,7 @@ impl ComponentCreator { ); } MachineLearningModel => { + // TODO: store the model card data self.models.add( node_id.clone(), comp.name.to_string(), @@ -440,6 +441,7 @@ impl ComponentCreator { ); } CryptographicAsset => { + // TODO: store the crypto properties data self.crypto.add( node_id.clone(), comp.name.to_string(), @@ -449,7 +451,11 @@ impl ComponentCreator { _ => log::error!("Unsupported component type: '{ty}'"), } } - Err(e) => log::error!("Invalid component type: {e}"), + Err(e) => { + return Err(Error::InvalidContent(anyhow::anyhow!( + "Invalid component type: {e}" + ))); + } } for ancestor in comp @@ -462,7 +468,7 @@ impl ComponentCreator { .clone() .unwrap_or_else(|| Uuid::new_v4().to_string()); - self.add(ancestor); + self.add_component(ancestor)?; self.add_relation(target, Relationship::AncestorOf, node_id.clone()); } @@ -477,10 +483,12 @@ impl ComponentCreator { .clone() .unwrap_or_else(|| Uuid::new_v4().to_string()); - self.add(variant); + self.add_component(variant)?; self.add_relation(node_id.clone(), Relationship::Variant, target); } + + Ok(()) } fn add_relation(&mut self, left: String, rel: Relationship, right: String) { @@ -550,14 +558,16 @@ impl ComponentCreator { .run(processors); } - fn validate(&self) -> Result<(), anyhow::Error> { + fn validate(&self) -> Result<(), Error> { let sources = References::new() .add_source(&[CYCLONEDX_DOC_REF]) .add_source(&self.packages) .add_source(&self.files) .add_source(&self.models) .add_source(&self.crypto); - self.relationships.validate(sources) + self.relationships + .validate(sources) + .map_err(Error::InvalidContent) } // order matters to prevent cross-table deadlocks when running From a8149c153dda30fd5ebc19bf6fa8637be8e0e303 Mon Sep 17 00:00:00 2001 From: Jim Crossley Date: Mon, 19 Jan 2026 18:37:21 -0500 Subject: [PATCH 5/6] refactor: give NodeCreator responsibility for creating all subtypes Weirdly, I had to revert the tc2758 test expectation. Still not sure why. (cherry picked from commit 44fab35a0557060f9c56111ea9a4bd33f84df41c) --- .../src/endpoints/tests/latest_filters.rs | 4 +- .../graph/sbom/common/cryptographic_asset.rs | 37 ++-------- .../ingestor/src/graph/sbom/common/file.rs | 24 +------ .../sbom/common/machine_learning_model.rs | 37 ++-------- modules/ingestor/src/graph/sbom/common/mod.rs | 1 + .../ingestor/src/graph/sbom/common/node.rs | 72 ++++++++++++++++++- .../ingestor/src/graph/sbom/common/package.rs | 25 +------ modules/ingestor/src/graph/sbom/cyclonedx.rs | 51 ++++++------- modules/ingestor/src/graph/sbom/mod.rs | 12 ++-- modules/ingestor/src/graph/sbom/spdx.rs | 34 ++++----- 10 files changed, 127 insertions(+), 170 deletions(-) diff --git a/modules/analysis/src/endpoints/tests/latest_filters.rs b/modules/analysis/src/endpoints/tests/latest_filters.rs index 916eaa352..024f5688c 100644 --- a/modules/analysis/src/endpoints/tests/latest_filters.rs +++ b/modules/analysis/src/endpoints/tests/latest_filters.rs @@ -579,8 +579,8 @@ async fn test_tc2758( "relationship": "package", "descendants": [ { - "node_id": "pkg:generic/pom.xml?checksum=sha256%3A974823188145bdb517f9692341a237bdee75c8312d3c86ae0fc4d390225bb923", - "name": "pom.xml", + "node_id": "pkg:maven/org.jboss.eap/wildfly-ee-aggregate-javadocs@7.4.0.GA-redhat-00005?classifier=javadocs&type=jar", + "name": "wildfly-ee-aggregate-javadocs", "relationship": "dependency", }] }] diff --git a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs index efba7a7ac..4ecd67b10 100644 --- a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs +++ b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs @@ -1,41 +1,14 @@ -use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ConnectionTrait, DbErr}; -use uuid::Uuid; -pub struct CryptographicAssetCreator { - nodes: NodeCreator, -} +#[derive(Default)] +pub struct CryptographicAssetCreator {} impl CryptographicAssetCreator { - pub fn new(sbom_id: Uuid) -> Self { - Self { - nodes: NodeCreator::new(sbom_id), - } - } - - pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { - Self { - nodes: NodeCreator::with_capacity(sbom_id, capacity_files), - } - } - - pub fn add(&mut self, node_id: String, name: String, checksums: I) - where - I: IntoIterator, - C: Into, - { - self.nodes.add(node_id.clone(), name, checksums); + pub fn add(&mut self, _node_id: String) { + // TODO } - pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { - self.nodes.create(db).await?; - + pub async fn create(self, _db: &impl ConnectionTrait) -> Result<(), DbErr> { Ok(()) } } - -impl<'a> ReferenceSource<'a> for CryptographicAssetCreator { - fn references(&'a self) -> impl IntoIterator { - self.nodes.references() - } -} diff --git a/modules/ingestor/src/graph/sbom/common/file.rs b/modules/ingestor/src/graph/sbom/common/file.rs index 646ed2901..6892b25b6 100644 --- a/modules/ingestor/src/graph/sbom/common/file.rs +++ b/modules/ingestor/src/graph/sbom/common/file.rs @@ -1,4 +1,3 @@ -use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ActiveValue::Set, ConnectionTrait, DbErr, EntityTrait}; use sea_query::OnConflict; use tracing::instrument; @@ -9,7 +8,6 @@ use uuid::Uuid; // Creator of files and relationships. pub struct FileCreator { sbom_id: Uuid, - nodes: NodeCreator, files: Vec, } @@ -17,26 +15,18 @@ impl FileCreator { pub fn new(sbom_id: Uuid) -> Self { Self { sbom_id, - nodes: NodeCreator::new(sbom_id), files: Vec::new(), } } - pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { + pub fn with_capacity(sbom_id: Uuid, capacity: usize) -> Self { Self { sbom_id, - nodes: NodeCreator::with_capacity(sbom_id, capacity_files), - files: Vec::with_capacity(capacity_files), + files: Vec::with_capacity(capacity), } } - pub fn add(&mut self, node_id: String, name: String, checksums: I) - where - I: IntoIterator, - C: Into, - { - self.nodes.add(node_id.clone(), name, checksums); - + pub fn add(&mut self, node_id: String) { self.files.push(sbom_file::ActiveModel { sbom_id: Set(self.sbom_id), node_id: Set(node_id), @@ -45,8 +35,6 @@ impl FileCreator { #[instrument(skip_all, fields(num=self.files.len()), err(level=tracing::Level::INFO))] pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { - self.nodes.create(db).await?; - for batch in &self.files.into_iter().chunked() { sbom_file::Entity::insert_many(batch) .on_conflict( @@ -62,9 +50,3 @@ impl FileCreator { Ok(()) } } - -impl<'a> ReferenceSource<'a> for FileCreator { - fn references(&'a self) -> impl IntoIterator { - self.nodes.references() - } -} diff --git a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs index 223d3c5d2..705978ee0 100644 --- a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs +++ b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs @@ -1,41 +1,14 @@ -use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ConnectionTrait, DbErr}; -use uuid::Uuid; -pub struct MachineLearningModelCreator { - nodes: NodeCreator, -} +#[derive(Default)] +pub struct MachineLearningModelCreator {} impl MachineLearningModelCreator { - pub fn new(sbom_id: Uuid) -> Self { - Self { - nodes: NodeCreator::new(sbom_id), - } - } - - pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { - Self { - nodes: NodeCreator::with_capacity(sbom_id, capacity_files), - } - } - - pub fn add(&mut self, node_id: String, name: String, checksums: I) - where - I: IntoIterator, - C: Into, - { - self.nodes.add(node_id.clone(), name, checksums); + pub fn add(&mut self, _node_id: String) { + // TODO } - pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { - self.nodes.create(db).await?; - + pub async fn create(self, _db: &impl ConnectionTrait) -> Result<(), DbErr> { Ok(()) } } - -impl<'a> ReferenceSource<'a> for MachineLearningModelCreator { - fn references(&'a self) -> impl IntoIterator { - self.nodes.references() - } -} diff --git a/modules/ingestor/src/graph/sbom/common/mod.rs b/modules/ingestor/src/graph/sbom/common/mod.rs index 33e4e6f55..de582d020 100644 --- a/modules/ingestor/src/graph/sbom/common/mod.rs +++ b/modules/ingestor/src/graph/sbom/common/mod.rs @@ -16,5 +16,6 @@ pub use file::*; pub use license::*; pub use licensing_info::*; pub use machine_learning_model::*; +pub use node::*; pub use package::*; pub use relationship::*; diff --git a/modules/ingestor/src/graph/sbom/common/node.rs b/modules/ingestor/src/graph/sbom/common/node.rs index 8bbdc55f2..da1a02db1 100644 --- a/modules/ingestor/src/graph/sbom/common/node.rs +++ b/modules/ingestor/src/graph/sbom/common/node.rs @@ -6,11 +6,21 @@ use trustify_common::db::chunk::EntityChunkedIter; use trustify_entity::{sbom_node, sbom_node_checksum}; use uuid::Uuid; +use super::{ + CryptographicAssetCreator, FileCreator, MachineLearningModelCreator, NodeInfoParam, + PackageCreator, PackageReference, +}; + // Base node creator pub struct NodeCreator { sbom_id: Uuid, nodes: Vec, checksums: Vec, + + packages: PackageCreator, + files: FileCreator, + models: MachineLearningModelCreator, + crypto: CryptographicAssetCreator, } impl NodeCreator { @@ -19,14 +29,22 @@ impl NodeCreator { sbom_id, nodes: Vec::new(), checksums: Vec::new(), + packages: PackageCreator::new(sbom_id), + files: FileCreator::new(sbom_id), + models: MachineLearningModelCreator::default(), + crypto: CryptographicAssetCreator::default(), } } - pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { + pub fn with_capacity(sbom_id: Uuid, capacity: usize) -> Self { Self { sbom_id, - nodes: Vec::with_capacity(capacity_files), - checksums: Vec::with_capacity(capacity_files), + nodes: Vec::with_capacity(capacity), + checksums: Vec::with_capacity(capacity), + packages: PackageCreator::with_capacity(sbom_id, capacity), + files: FileCreator::with_capacity(sbom_id, capacity), + models: MachineLearningModelCreator::default(), + crypto: CryptographicAssetCreator::default(), } } @@ -52,6 +70,47 @@ impl NodeCreator { }); } + pub fn add_package<'a, I, C>( + &mut self, + info: NodeInfoParam, + name: String, + checksums: I, + refs: impl Iterator, + ) where + I: IntoIterator, + C: Into, + { + self.add(info.node_id.clone(), name, checksums); + self.packages.add(info, refs); + } + + pub fn add_file(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.add(node_id.clone(), name, checksums); + self.files.add(node_id); + } + + pub fn add_model(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.add(node_id.clone(), name, checksums); + self.models.add(node_id); + } + + pub fn add_crypto(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.add(node_id.clone(), name, checksums); + self.crypto.add(node_id); + } + #[instrument(skip_all, fields(num=self.nodes.len()), err(level=tracing::Level::INFO))] pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { for batch in &self.nodes.into_iter().chunked() { @@ -82,8 +141,15 @@ impl NodeCreator { .await?; } + self.packages.create(db).await?; + self.files.create(db).await?; + Ok(()) } + + pub fn get_packages_mut(&mut self) -> &mut PackageCreator { + &mut self.packages + } } impl<'a> ReferenceSource<'a> for NodeCreator { diff --git a/modules/ingestor/src/graph/sbom/common/package.rs b/modules/ingestor/src/graph/sbom/common/package.rs index a8c45c6b5..92f8f8dca 100644 --- a/modules/ingestor/src/graph/sbom/common/package.rs +++ b/modules/ingestor/src/graph/sbom/common/package.rs @@ -1,5 +1,3 @@ -use crate::graph::sbom::common::node::NodeCreator; -use crate::graph::sbom::{Checksum, ReferenceSource}; use sea_orm::{ActiveValue::Set, ConnectionTrait, DbErr, EntityTrait}; use sea_query::OnConflict; use tracing::instrument; @@ -14,7 +12,6 @@ use uuid::Uuid; // Creator of packages and relationships. pub struct PackageCreator { sbom_id: Uuid, - pub(crate) nodes: NodeCreator, pub(crate) packages: Vec, pub(crate) purl_refs: Vec, pub(crate) cpe_refs: Vec, @@ -23,7 +20,6 @@ pub struct PackageCreator { pub struct NodeInfoParam { pub node_id: String, - pub name: String, pub group: Option, pub version: Option, pub package_license_info: Vec, @@ -43,7 +39,6 @@ impl PackageCreator { pub fn new(sbom_id: Uuid) -> Self { Self { sbom_id, - nodes: NodeCreator::new(sbom_id), packages: Vec::new(), purl_refs: Vec::new(), cpe_refs: Vec::new(), @@ -54,7 +49,6 @@ impl PackageCreator { pub fn with_capacity(sbom_id: Uuid, capacity_packages: usize) -> Self { Self { sbom_id, - nodes: NodeCreator::with_capacity(sbom_id, capacity_packages), packages: Vec::with_capacity(capacity_packages), purl_refs: Vec::with_capacity(capacity_packages), cpe_refs: Vec::new(), // most packages won't have a CPE, so we start with a low number @@ -62,15 +56,11 @@ impl PackageCreator { } } - pub fn add<'a, I, C>( + pub fn add<'a>( &mut self, node_info: NodeInfoParam, refs: impl Iterator, - checksums: I, - ) where - I: IntoIterator, - C: Into, - { + ) { for r#ref in refs { match r#ref { PackageReference::Cpe(cpe) => { @@ -90,9 +80,6 @@ impl PackageCreator { } } - self.nodes - .add(node_info.node_id.clone(), node_info.name, checksums); - self.packages.push(sbom_package::ActiveModel { sbom_id: Set(self.sbom_id), group: Set(node_info.group), @@ -121,8 +108,6 @@ impl PackageCreator { err(level=tracing::Level::INFO) )] pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { - self.nodes.create(db).await?; - for batch in &self.packages.into_iter().chunked() { sbom_package::Entity::insert_many(batch) .on_conflict( @@ -190,9 +175,3 @@ impl PackageCreator { Ok(()) } } - -impl<'a> ReferenceSource<'a> for PackageCreator { - fn references(&'a self) -> impl IntoIterator { - self.nodes.references() - } -} diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index d01c38dd3..7861f9ca8 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -5,7 +5,7 @@ use crate::{ purl::creator::PurlCreator, sbom::{ CryptographicAssetCreator, CycloneDx as CycloneDxProcessor, LicenseCreator, - LicenseInfo, MachineLearningModelCreator, NodeInfoParam, PackageCreator, + LicenseInfo, MachineLearningModelCreator, NodeCreator, NodeInfoParam, PackageLicensenInfo, PackageReference, References, RelationshipCreator, SbomContext, SbomInformation, processor::{ @@ -32,8 +32,6 @@ use trustify_common::{cpe::Cpe, purl::Purl}; use trustify_entity::relationship::Relationship; use uuid::Uuid; -use super::FileCreator; - /// Marker we use for identifying the document itself. /// /// Similar to the SPDX doc id, which is attached to the document itself. CycloneDX doesn't have @@ -318,8 +316,7 @@ struct ComponentCreator { cpes: CpeCreator, purls: PurlCreator, licenses: LicenseCreator, - packages: PackageCreator, - files: FileCreator, + nodes: NodeCreator, models: MachineLearningModelCreator, crypto: CryptographicAssetCreator, relationships: RelationshipCreator, @@ -333,10 +330,9 @@ impl ComponentCreator { cpes: CpeCreator::new(), purls: PurlCreator::new(), licenses: LicenseCreator::new(), - packages: PackageCreator::with_capacity(sbom_id, capacity), - files: FileCreator::new(sbom_id), - models: MachineLearningModelCreator::new(sbom_id), - crypto: CryptographicAssetCreator::new(sbom_id), + nodes: NodeCreator::with_capacity(sbom_id, capacity), + models: MachineLearningModelCreator::default(), + crypto: CryptographicAssetCreator::default(), relationships: RelationshipCreator::new(sbom_id, CycloneDxProcessor), refs: Default::default(), } @@ -406,6 +402,7 @@ impl ComponentCreator { }) .collect::>(); + // Deal with various Component types match ComponentType::from_str(&comp.type_) { Ok(ty) => { use ComponentType::*; @@ -413,40 +410,38 @@ impl ComponentCreator { // We treat all these types as "packages" Application | Framework | Library | Container | OperatingSystem => { const EMPTY: Vec = vec![]; - self.packages.add( + self.nodes.add_package( NodeInfoParam { node_id: node_id.clone(), - name: comp.name.to_string(), group: comp.group.as_ref().map(|v| v.to_string()), version: comp.version.as_ref().map(|v| v.to_string()), package_license_info: cyclone_licenses, }, - self.refs.get(&node_id).unwrap_or(&EMPTY).iter(), - comp.hashes.clone().into_iter().flatten(), - ) - } - File => { - self.files.add( - node_id.clone(), comp.name.to_string(), comp.hashes.clone().into_iter().flatten(), - ); + self.refs.get(&node_id).unwrap_or(&EMPTY).iter(), + ) } + File => self.nodes.add_file( + node_id.clone(), + comp.name.to_string(), + comp.hashes.clone().into_iter().flatten(), + ), MachineLearningModel => { // TODO: store the model card data - self.models.add( + self.nodes.add_model( node_id.clone(), comp.name.to_string(), comp.hashes.clone().into_iter().flatten(), - ); + ) } CryptographicAsset => { // TODO: store the crypto properties data - self.crypto.add( + self.nodes.add_crypto( node_id.clone(), comp.name.to_string(), comp.hashes.clone().into_iter().flatten(), - ); + ) } _ => log::error!("Unsupported component type: '{ty}'"), } @@ -551,7 +546,7 @@ impl ComponentCreator { PostContext { cpes: &self.cpes, purls: &self.purls, - packages: &mut self.packages, + packages: self.nodes.get_packages_mut(), relationships: &mut self.relationships.rels, externals: &mut self.relationships.externals, } @@ -561,10 +556,7 @@ impl ComponentCreator { fn validate(&self) -> Result<(), Error> { let sources = References::new() .add_source(&[CYCLONEDX_DOC_REF]) - .add_source(&self.packages) - .add_source(&self.files) - .add_source(&self.models) - .add_source(&self.crypto); + .add_source(&self.nodes); self.relationships .validate(sources) .map_err(Error::InvalidContent) @@ -577,8 +569,7 @@ impl ComponentCreator { self.licenses.create(db).await?; self.purls.create(db).await?; self.cpes.create(db).await?; - self.packages.create(db).await?; - self.files.create(db).await?; + self.nodes.create(db).await?; self.models.create(db).await?; self.crypto.create(db).await?; self.relationships.create(db).await?; diff --git a/modules/ingestor/src/graph/sbom/mod.rs b/modules/ingestor/src/graph/sbom/mod.rs index a3eaff80e..a5416bdb7 100644 --- a/modules/ingestor/src/graph/sbom/mod.rs +++ b/modules/ingestor/src/graph/sbom/mod.rs @@ -600,26 +600,26 @@ impl SbomContext { cpes: Vec, connection: &C, ) -> Result<(), Error> { - let mut creator = PackageCreator::new(self.sbom.sbom_id); + let mut nodes = NodeCreator::new(self.sbom.sbom_id); + let mut packages = PackageCreator::new(self.sbom.sbom_id); let refs: Vec = purls .into_iter() .map(PackageReference::Purl) .chain(cpes.into_iter().map(PackageReference::Cpe)) .collect(); - creator.add( + nodes.add(node_id.clone(), name, Checksum::NONE); + packages.add( NodeInfoParam { node_id, - name, group: None, version, package_license_info: vec![], }, refs.iter(), - Checksum::NONE, ); - - creator.create(connection).await?; + nodes.create(connection).await?; + packages.create(connection).await?; // done diff --git a/modules/ingestor/src/graph/sbom/spdx.rs b/modules/ingestor/src/graph/sbom/spdx.rs index 334905d4d..824fcebb5 100644 --- a/modules/ingestor/src/graph/sbom/spdx.rs +++ b/modules/ingestor/src/graph/sbom/spdx.rs @@ -4,9 +4,9 @@ use crate::{ product::ProductInformation, purl::creator::PurlCreator, sbom::{ - FileCreator, LicenseCreator, LicenseInfo, LicensingInfo, LicensingInfoCreator, - NodeInfoParam, PackageCreator, PackageLicensenInfo, PackageReference, References, - RelationshipCreator, SbomContext, SbomInformation, Spdx, + LicenseCreator, LicenseInfo, LicensingInfo, LicensingInfoCreator, NodeCreator, + NodeInfoParam, PackageLicensenInfo, PackageReference, References, RelationshipCreator, + SbomContext, SbomInformation, Spdx, processor::{ InitContext, PostContext, Processor, RedHatProductComponentRelationships, RunProcessors, @@ -198,8 +198,7 @@ impl SbomContext { license_extracted_refs.add(extracted_licensing_info); } - let mut packages = - PackageCreator::with_capacity(self.sbom.sbom_id, sbom_data.package_information.len()); + let mut nodes = NodeCreator::new(self.sbom.sbom_id); for package in sbom_data.package_information { let declared_license_info = package.declared_license.as_ref().map(|e| LicenseInfo { @@ -293,26 +292,24 @@ impl SbomContext { }) .collect::>(), ); - packages.add( + + nodes.add_package( NodeInfoParam { node_id: package.package_spdx_identifier, - name: package.package_name, group: None, version: package.package_version, package_license_info, }, - refs.iter(), + package.package_name, package.package_checksum, + refs.iter(), ); } // prepare files - let mut files = - FileCreator::with_capacity(self.sbom.sbom_id, sbom_data.file_information.len()); - for file in sbom_data.file_information { - files.add( + nodes.add_file( file.file_spdx_identifier, file.file_name, file.file_checksum, @@ -324,7 +321,7 @@ impl SbomContext { PostContext { cpes: &cpes, purls: &purls, - packages: &mut packages, + packages: nodes.get_packages_mut(), relationships: &mut relationships.rels, externals: &mut relationships.externals, } @@ -343,18 +340,13 @@ impl SbomContext { .document_creation_information .spdx_identifier .as_str()]; - let sources = References::new() - .add_source(&doc_id) - .add_source(&packages) - .add_source(&files); + let sources = References::new().add_source(&doc_id).add_source(&nodes); relationships .validate(sources) .map_err(Error::InvalidContent)?; - // create packages, files, and relationships - - packages.create(db).await?; - files.create(db).await?; + // create nodes, packages, files, and relationships + nodes.create(db).await?; relationships.create(db).await?; // done From 22405250edac855d880610106d18de8c3c1ce5e2 Mon Sep 17 00:00:00 2001 From: Jim Crossley Date: Tue, 20 Jan 2026 13:21:00 -0500 Subject: [PATCH 6/6] Revert "refactor: give NodeCreator responsibility for creating all subtypes" This reverts commit 470cd79893f9b54e8fde9088921acfa8fff19758. (cherry picked from commit c3dc0fe9a2f43141332fcc4820b2cda39be92bd5) --- .../src/endpoints/tests/latest_filters.rs | 4 +- .../graph/sbom/common/cryptographic_asset.rs | 37 ++++++++-- .../ingestor/src/graph/sbom/common/file.rs | 24 ++++++- .../sbom/common/machine_learning_model.rs | 37 ++++++++-- modules/ingestor/src/graph/sbom/common/mod.rs | 1 - .../ingestor/src/graph/sbom/common/node.rs | 72 +------------------ .../ingestor/src/graph/sbom/common/package.rs | 25 ++++++- modules/ingestor/src/graph/sbom/cyclonedx.rs | 51 +++++++------ modules/ingestor/src/graph/sbom/mod.rs | 12 ++-- modules/ingestor/src/graph/sbom/spdx.rs | 34 +++++---- 10 files changed, 170 insertions(+), 127 deletions(-) diff --git a/modules/analysis/src/endpoints/tests/latest_filters.rs b/modules/analysis/src/endpoints/tests/latest_filters.rs index 024f5688c..916eaa352 100644 --- a/modules/analysis/src/endpoints/tests/latest_filters.rs +++ b/modules/analysis/src/endpoints/tests/latest_filters.rs @@ -579,8 +579,8 @@ async fn test_tc2758( "relationship": "package", "descendants": [ { - "node_id": "pkg:maven/org.jboss.eap/wildfly-ee-aggregate-javadocs@7.4.0.GA-redhat-00005?classifier=javadocs&type=jar", - "name": "wildfly-ee-aggregate-javadocs", + "node_id": "pkg:generic/pom.xml?checksum=sha256%3A974823188145bdb517f9692341a237bdee75c8312d3c86ae0fc4d390225bb923", + "name": "pom.xml", "relationship": "dependency", }] }] diff --git a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs index 4ecd67b10..efba7a7ac 100644 --- a/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs +++ b/modules/ingestor/src/graph/sbom/common/cryptographic_asset.rs @@ -1,14 +1,41 @@ +use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ConnectionTrait, DbErr}; +use uuid::Uuid; -#[derive(Default)] -pub struct CryptographicAssetCreator {} +pub struct CryptographicAssetCreator { + nodes: NodeCreator, +} impl CryptographicAssetCreator { - pub fn add(&mut self, _node_id: String) { - // TODO + pub fn new(sbom_id: Uuid) -> Self { + Self { + nodes: NodeCreator::new(sbom_id), + } + } + + pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { + Self { + nodes: NodeCreator::with_capacity(sbom_id, capacity_files), + } + } + + pub fn add(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.nodes.add(node_id.clone(), name, checksums); } - pub async fn create(self, _db: &impl ConnectionTrait) -> Result<(), DbErr> { + pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { + self.nodes.create(db).await?; + Ok(()) } } + +impl<'a> ReferenceSource<'a> for CryptographicAssetCreator { + fn references(&'a self) -> impl IntoIterator { + self.nodes.references() + } +} diff --git a/modules/ingestor/src/graph/sbom/common/file.rs b/modules/ingestor/src/graph/sbom/common/file.rs index 6892b25b6..646ed2901 100644 --- a/modules/ingestor/src/graph/sbom/common/file.rs +++ b/modules/ingestor/src/graph/sbom/common/file.rs @@ -1,3 +1,4 @@ +use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ActiveValue::Set, ConnectionTrait, DbErr, EntityTrait}; use sea_query::OnConflict; use tracing::instrument; @@ -8,6 +9,7 @@ use uuid::Uuid; // Creator of files and relationships. pub struct FileCreator { sbom_id: Uuid, + nodes: NodeCreator, files: Vec, } @@ -15,18 +17,26 @@ impl FileCreator { pub fn new(sbom_id: Uuid) -> Self { Self { sbom_id, + nodes: NodeCreator::new(sbom_id), files: Vec::new(), } } - pub fn with_capacity(sbom_id: Uuid, capacity: usize) -> Self { + pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { Self { sbom_id, - files: Vec::with_capacity(capacity), + nodes: NodeCreator::with_capacity(sbom_id, capacity_files), + files: Vec::with_capacity(capacity_files), } } - pub fn add(&mut self, node_id: String) { + pub fn add(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.nodes.add(node_id.clone(), name, checksums); + self.files.push(sbom_file::ActiveModel { sbom_id: Set(self.sbom_id), node_id: Set(node_id), @@ -35,6 +45,8 @@ impl FileCreator { #[instrument(skip_all, fields(num=self.files.len()), err(level=tracing::Level::INFO))] pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { + self.nodes.create(db).await?; + for batch in &self.files.into_iter().chunked() { sbom_file::Entity::insert_many(batch) .on_conflict( @@ -50,3 +62,9 @@ impl FileCreator { Ok(()) } } + +impl<'a> ReferenceSource<'a> for FileCreator { + fn references(&'a self) -> impl IntoIterator { + self.nodes.references() + } +} diff --git a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs index 705978ee0..223d3c5d2 100644 --- a/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs +++ b/modules/ingestor/src/graph/sbom/common/machine_learning_model.rs @@ -1,14 +1,41 @@ +use crate::graph::sbom::{Checksum, ReferenceSource, common::node::NodeCreator}; use sea_orm::{ConnectionTrait, DbErr}; +use uuid::Uuid; -#[derive(Default)] -pub struct MachineLearningModelCreator {} +pub struct MachineLearningModelCreator { + nodes: NodeCreator, +} impl MachineLearningModelCreator { - pub fn add(&mut self, _node_id: String) { - // TODO + pub fn new(sbom_id: Uuid) -> Self { + Self { + nodes: NodeCreator::new(sbom_id), + } + } + + pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { + Self { + nodes: NodeCreator::with_capacity(sbom_id, capacity_files), + } + } + + pub fn add(&mut self, node_id: String, name: String, checksums: I) + where + I: IntoIterator, + C: Into, + { + self.nodes.add(node_id.clone(), name, checksums); } - pub async fn create(self, _db: &impl ConnectionTrait) -> Result<(), DbErr> { + pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { + self.nodes.create(db).await?; + Ok(()) } } + +impl<'a> ReferenceSource<'a> for MachineLearningModelCreator { + fn references(&'a self) -> impl IntoIterator { + self.nodes.references() + } +} diff --git a/modules/ingestor/src/graph/sbom/common/mod.rs b/modules/ingestor/src/graph/sbom/common/mod.rs index de582d020..33e4e6f55 100644 --- a/modules/ingestor/src/graph/sbom/common/mod.rs +++ b/modules/ingestor/src/graph/sbom/common/mod.rs @@ -16,6 +16,5 @@ pub use file::*; pub use license::*; pub use licensing_info::*; pub use machine_learning_model::*; -pub use node::*; pub use package::*; pub use relationship::*; diff --git a/modules/ingestor/src/graph/sbom/common/node.rs b/modules/ingestor/src/graph/sbom/common/node.rs index da1a02db1..8bbdc55f2 100644 --- a/modules/ingestor/src/graph/sbom/common/node.rs +++ b/modules/ingestor/src/graph/sbom/common/node.rs @@ -6,21 +6,11 @@ use trustify_common::db::chunk::EntityChunkedIter; use trustify_entity::{sbom_node, sbom_node_checksum}; use uuid::Uuid; -use super::{ - CryptographicAssetCreator, FileCreator, MachineLearningModelCreator, NodeInfoParam, - PackageCreator, PackageReference, -}; - // Base node creator pub struct NodeCreator { sbom_id: Uuid, nodes: Vec, checksums: Vec, - - packages: PackageCreator, - files: FileCreator, - models: MachineLearningModelCreator, - crypto: CryptographicAssetCreator, } impl NodeCreator { @@ -29,22 +19,14 @@ impl NodeCreator { sbom_id, nodes: Vec::new(), checksums: Vec::new(), - packages: PackageCreator::new(sbom_id), - files: FileCreator::new(sbom_id), - models: MachineLearningModelCreator::default(), - crypto: CryptographicAssetCreator::default(), } } - pub fn with_capacity(sbom_id: Uuid, capacity: usize) -> Self { + pub fn with_capacity(sbom_id: Uuid, capacity_files: usize) -> Self { Self { sbom_id, - nodes: Vec::with_capacity(capacity), - checksums: Vec::with_capacity(capacity), - packages: PackageCreator::with_capacity(sbom_id, capacity), - files: FileCreator::with_capacity(sbom_id, capacity), - models: MachineLearningModelCreator::default(), - crypto: CryptographicAssetCreator::default(), + nodes: Vec::with_capacity(capacity_files), + checksums: Vec::with_capacity(capacity_files), } } @@ -70,47 +52,6 @@ impl NodeCreator { }); } - pub fn add_package<'a, I, C>( - &mut self, - info: NodeInfoParam, - name: String, - checksums: I, - refs: impl Iterator, - ) where - I: IntoIterator, - C: Into, - { - self.add(info.node_id.clone(), name, checksums); - self.packages.add(info, refs); - } - - pub fn add_file(&mut self, node_id: String, name: String, checksums: I) - where - I: IntoIterator, - C: Into, - { - self.add(node_id.clone(), name, checksums); - self.files.add(node_id); - } - - pub fn add_model(&mut self, node_id: String, name: String, checksums: I) - where - I: IntoIterator, - C: Into, - { - self.add(node_id.clone(), name, checksums); - self.models.add(node_id); - } - - pub fn add_crypto(&mut self, node_id: String, name: String, checksums: I) - where - I: IntoIterator, - C: Into, - { - self.add(node_id.clone(), name, checksums); - self.crypto.add(node_id); - } - #[instrument(skip_all, fields(num=self.nodes.len()), err(level=tracing::Level::INFO))] pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { for batch in &self.nodes.into_iter().chunked() { @@ -141,15 +82,8 @@ impl NodeCreator { .await?; } - self.packages.create(db).await?; - self.files.create(db).await?; - Ok(()) } - - pub fn get_packages_mut(&mut self) -> &mut PackageCreator { - &mut self.packages - } } impl<'a> ReferenceSource<'a> for NodeCreator { diff --git a/modules/ingestor/src/graph/sbom/common/package.rs b/modules/ingestor/src/graph/sbom/common/package.rs index 92f8f8dca..a8c45c6b5 100644 --- a/modules/ingestor/src/graph/sbom/common/package.rs +++ b/modules/ingestor/src/graph/sbom/common/package.rs @@ -1,3 +1,5 @@ +use crate::graph::sbom::common::node::NodeCreator; +use crate::graph::sbom::{Checksum, ReferenceSource}; use sea_orm::{ActiveValue::Set, ConnectionTrait, DbErr, EntityTrait}; use sea_query::OnConflict; use tracing::instrument; @@ -12,6 +14,7 @@ use uuid::Uuid; // Creator of packages and relationships. pub struct PackageCreator { sbom_id: Uuid, + pub(crate) nodes: NodeCreator, pub(crate) packages: Vec, pub(crate) purl_refs: Vec, pub(crate) cpe_refs: Vec, @@ -20,6 +23,7 @@ pub struct PackageCreator { pub struct NodeInfoParam { pub node_id: String, + pub name: String, pub group: Option, pub version: Option, pub package_license_info: Vec, @@ -39,6 +43,7 @@ impl PackageCreator { pub fn new(sbom_id: Uuid) -> Self { Self { sbom_id, + nodes: NodeCreator::new(sbom_id), packages: Vec::new(), purl_refs: Vec::new(), cpe_refs: Vec::new(), @@ -49,6 +54,7 @@ impl PackageCreator { pub fn with_capacity(sbom_id: Uuid, capacity_packages: usize) -> Self { Self { sbom_id, + nodes: NodeCreator::with_capacity(sbom_id, capacity_packages), packages: Vec::with_capacity(capacity_packages), purl_refs: Vec::with_capacity(capacity_packages), cpe_refs: Vec::new(), // most packages won't have a CPE, so we start with a low number @@ -56,11 +62,15 @@ impl PackageCreator { } } - pub fn add<'a>( + pub fn add<'a, I, C>( &mut self, node_info: NodeInfoParam, refs: impl Iterator, - ) { + checksums: I, + ) where + I: IntoIterator, + C: Into, + { for r#ref in refs { match r#ref { PackageReference::Cpe(cpe) => { @@ -80,6 +90,9 @@ impl PackageCreator { } } + self.nodes + .add(node_info.node_id.clone(), node_info.name, checksums); + self.packages.push(sbom_package::ActiveModel { sbom_id: Set(self.sbom_id), group: Set(node_info.group), @@ -108,6 +121,8 @@ impl PackageCreator { err(level=tracing::Level::INFO) )] pub async fn create(self, db: &impl ConnectionTrait) -> Result<(), DbErr> { + self.nodes.create(db).await?; + for batch in &self.packages.into_iter().chunked() { sbom_package::Entity::insert_many(batch) .on_conflict( @@ -175,3 +190,9 @@ impl PackageCreator { Ok(()) } } + +impl<'a> ReferenceSource<'a> for PackageCreator { + fn references(&'a self) -> impl IntoIterator { + self.nodes.references() + } +} diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index 7861f9ca8..d01c38dd3 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -5,7 +5,7 @@ use crate::{ purl::creator::PurlCreator, sbom::{ CryptographicAssetCreator, CycloneDx as CycloneDxProcessor, LicenseCreator, - LicenseInfo, MachineLearningModelCreator, NodeCreator, NodeInfoParam, + LicenseInfo, MachineLearningModelCreator, NodeInfoParam, PackageCreator, PackageLicensenInfo, PackageReference, References, RelationshipCreator, SbomContext, SbomInformation, processor::{ @@ -32,6 +32,8 @@ use trustify_common::{cpe::Cpe, purl::Purl}; use trustify_entity::relationship::Relationship; use uuid::Uuid; +use super::FileCreator; + /// Marker we use for identifying the document itself. /// /// Similar to the SPDX doc id, which is attached to the document itself. CycloneDX doesn't have @@ -316,7 +318,8 @@ struct ComponentCreator { cpes: CpeCreator, purls: PurlCreator, licenses: LicenseCreator, - nodes: NodeCreator, + packages: PackageCreator, + files: FileCreator, models: MachineLearningModelCreator, crypto: CryptographicAssetCreator, relationships: RelationshipCreator, @@ -330,9 +333,10 @@ impl ComponentCreator { cpes: CpeCreator::new(), purls: PurlCreator::new(), licenses: LicenseCreator::new(), - nodes: NodeCreator::with_capacity(sbom_id, capacity), - models: MachineLearningModelCreator::default(), - crypto: CryptographicAssetCreator::default(), + packages: PackageCreator::with_capacity(sbom_id, capacity), + files: FileCreator::new(sbom_id), + models: MachineLearningModelCreator::new(sbom_id), + crypto: CryptographicAssetCreator::new(sbom_id), relationships: RelationshipCreator::new(sbom_id, CycloneDxProcessor), refs: Default::default(), } @@ -402,7 +406,6 @@ impl ComponentCreator { }) .collect::>(); - // Deal with various Component types match ComponentType::from_str(&comp.type_) { Ok(ty) => { use ComponentType::*; @@ -410,38 +413,40 @@ impl ComponentCreator { // We treat all these types as "packages" Application | Framework | Library | Container | OperatingSystem => { const EMPTY: Vec = vec![]; - self.nodes.add_package( + self.packages.add( NodeInfoParam { node_id: node_id.clone(), + name: comp.name.to_string(), group: comp.group.as_ref().map(|v| v.to_string()), version: comp.version.as_ref().map(|v| v.to_string()), package_license_info: cyclone_licenses, }, - comp.name.to_string(), - comp.hashes.clone().into_iter().flatten(), self.refs.get(&node_id).unwrap_or(&EMPTY).iter(), + comp.hashes.clone().into_iter().flatten(), ) } - File => self.nodes.add_file( - node_id.clone(), - comp.name.to_string(), - comp.hashes.clone().into_iter().flatten(), - ), + File => { + self.files.add( + node_id.clone(), + comp.name.to_string(), + comp.hashes.clone().into_iter().flatten(), + ); + } MachineLearningModel => { // TODO: store the model card data - self.nodes.add_model( + self.models.add( node_id.clone(), comp.name.to_string(), comp.hashes.clone().into_iter().flatten(), - ) + ); } CryptographicAsset => { // TODO: store the crypto properties data - self.nodes.add_crypto( + self.crypto.add( node_id.clone(), comp.name.to_string(), comp.hashes.clone().into_iter().flatten(), - ) + ); } _ => log::error!("Unsupported component type: '{ty}'"), } @@ -546,7 +551,7 @@ impl ComponentCreator { PostContext { cpes: &self.cpes, purls: &self.purls, - packages: self.nodes.get_packages_mut(), + packages: &mut self.packages, relationships: &mut self.relationships.rels, externals: &mut self.relationships.externals, } @@ -556,7 +561,10 @@ impl ComponentCreator { fn validate(&self) -> Result<(), Error> { let sources = References::new() .add_source(&[CYCLONEDX_DOC_REF]) - .add_source(&self.nodes); + .add_source(&self.packages) + .add_source(&self.files) + .add_source(&self.models) + .add_source(&self.crypto); self.relationships .validate(sources) .map_err(Error::InvalidContent) @@ -569,7 +577,8 @@ impl ComponentCreator { self.licenses.create(db).await?; self.purls.create(db).await?; self.cpes.create(db).await?; - self.nodes.create(db).await?; + self.packages.create(db).await?; + self.files.create(db).await?; self.models.create(db).await?; self.crypto.create(db).await?; self.relationships.create(db).await?; diff --git a/modules/ingestor/src/graph/sbom/mod.rs b/modules/ingestor/src/graph/sbom/mod.rs index a5416bdb7..a3eaff80e 100644 --- a/modules/ingestor/src/graph/sbom/mod.rs +++ b/modules/ingestor/src/graph/sbom/mod.rs @@ -600,26 +600,26 @@ impl SbomContext { cpes: Vec, connection: &C, ) -> Result<(), Error> { - let mut nodes = NodeCreator::new(self.sbom.sbom_id); - let mut packages = PackageCreator::new(self.sbom.sbom_id); + let mut creator = PackageCreator::new(self.sbom.sbom_id); let refs: Vec = purls .into_iter() .map(PackageReference::Purl) .chain(cpes.into_iter().map(PackageReference::Cpe)) .collect(); - nodes.add(node_id.clone(), name, Checksum::NONE); - packages.add( + creator.add( NodeInfoParam { node_id, + name, group: None, version, package_license_info: vec![], }, refs.iter(), + Checksum::NONE, ); - nodes.create(connection).await?; - packages.create(connection).await?; + + creator.create(connection).await?; // done diff --git a/modules/ingestor/src/graph/sbom/spdx.rs b/modules/ingestor/src/graph/sbom/spdx.rs index 824fcebb5..334905d4d 100644 --- a/modules/ingestor/src/graph/sbom/spdx.rs +++ b/modules/ingestor/src/graph/sbom/spdx.rs @@ -4,9 +4,9 @@ use crate::{ product::ProductInformation, purl::creator::PurlCreator, sbom::{ - LicenseCreator, LicenseInfo, LicensingInfo, LicensingInfoCreator, NodeCreator, - NodeInfoParam, PackageLicensenInfo, PackageReference, References, RelationshipCreator, - SbomContext, SbomInformation, Spdx, + FileCreator, LicenseCreator, LicenseInfo, LicensingInfo, LicensingInfoCreator, + NodeInfoParam, PackageCreator, PackageLicensenInfo, PackageReference, References, + RelationshipCreator, SbomContext, SbomInformation, Spdx, processor::{ InitContext, PostContext, Processor, RedHatProductComponentRelationships, RunProcessors, @@ -198,7 +198,8 @@ impl SbomContext { license_extracted_refs.add(extracted_licensing_info); } - let mut nodes = NodeCreator::new(self.sbom.sbom_id); + let mut packages = + PackageCreator::with_capacity(self.sbom.sbom_id, sbom_data.package_information.len()); for package in sbom_data.package_information { let declared_license_info = package.declared_license.as_ref().map(|e| LicenseInfo { @@ -292,24 +293,26 @@ impl SbomContext { }) .collect::>(), ); - - nodes.add_package( + packages.add( NodeInfoParam { node_id: package.package_spdx_identifier, + name: package.package_name, group: None, version: package.package_version, package_license_info, }, - package.package_name, - package.package_checksum, refs.iter(), + package.package_checksum, ); } // prepare files + let mut files = + FileCreator::with_capacity(self.sbom.sbom_id, sbom_data.file_information.len()); + for file in sbom_data.file_information { - nodes.add_file( + files.add( file.file_spdx_identifier, file.file_name, file.file_checksum, @@ -321,7 +324,7 @@ impl SbomContext { PostContext { cpes: &cpes, purls: &purls, - packages: nodes.get_packages_mut(), + packages: &mut packages, relationships: &mut relationships.rels, externals: &mut relationships.externals, } @@ -340,13 +343,18 @@ impl SbomContext { .document_creation_information .spdx_identifier .as_str()]; - let sources = References::new().add_source(&doc_id).add_source(&nodes); + let sources = References::new() + .add_source(&doc_id) + .add_source(&packages) + .add_source(&files); relationships .validate(sources) .map_err(Error::InvalidContent)?; - // create nodes, packages, files, and relationships - nodes.create(db).await?; + // create packages, files, and relationships + + packages.create(db).await?; + files.create(db).await?; relationships.create(db).await?; // done