diff --git a/etc/test-data/cyclonedx/fan-out-external/container.json b/etc/test-data/cyclonedx/fan-out-external/container.json new file mode 100644 index 000000000..adf304bb5 --- /dev/null +++ b/etc/test-data/cyclonedx/fan-out-external/container.json @@ -0,0 +1,55 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:cccccccc-cccc-cccc-cccc-cccccccccccc", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "bom-ref": "root", + "name": "fan-out-container", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "c1", + "name": "C1", + "version": "1", + "purl": "pkg:rpm/redhat/C1@1.0.0?arch=src", + "type": "library" + }, + { + "bom-ref": "c2", + "name": "C2", + "version": "1", + "purl": "pkg:rpm/redhat/C2@1.0.0?arch=src", + "type": "library" + }, + { + "bom-ref": "c3", + "name": "C3", + "version": "1", + "purl": "pkg:rpm/redhat/C3@1.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": ["c1", "c2", "c3"] + }, + { + "ref": "c1", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + }, + { + "ref": "c2", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + }, + { + "ref": "c3", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + } + ] +} diff --git a/etc/test-data/cyclonedx/fan-out-external/target.json b/etc/test-data/cyclonedx/fan-out-external/target.json new file mode 100644 index 000000000..d9c5837be --- /dev/null +++ b/etc/test-data/cyclonedx/fan-out-external/target.json @@ -0,0 +1,29 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "bom-ref": "root", + "name": "fan-out-target", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "t1", + "name": "T1", + "version": "1", + "purl": "pkg:rpm/redhat/T1@1.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": ["t1"] + } + ] +} diff --git a/etc/test-data/cyclonedx/loop-external/a.json b/etc/test-data/cyclonedx/loop-external/a.json new file mode 100644 index 000000000..dd7d7a3b1 --- /dev/null +++ b/etc/test-data/cyclonedx/loop-external/a.json @@ -0,0 +1,28 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "name": "loop-external-a", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "x", + "name": "X", + "version": "1", + "purl": "pkg:rpm/redhat/X@0.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "x", + "dependsOn": ["urn:cdx:bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/1#y"] + } + ] +} diff --git a/etc/test-data/cyclonedx/loop-external/b.json b/etc/test-data/cyclonedx/loop-external/b.json new file mode 100644 index 000000000..8efa5d019 --- /dev/null +++ b/etc/test-data/cyclonedx/loop-external/b.json @@ -0,0 +1,28 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "name": "loop-external-b", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "y", + "name": "Y", + "version": "1", + "purl": "pkg:rpm/redhat/Y@0.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "y", + "dependsOn": ["urn:cdx:aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/1#x"] + } + ] +} diff --git a/modules/analysis/src/service/collector.rs b/modules/analysis/src/service/collector.rs index d1c467fac..04b858fe1 100644 --- a/modules/analysis/src/service/collector.rs +++ b/modules/analysis/src/service/collector.rs @@ -2,20 +2,22 @@ use super::*; use crate::model::graph::{ExternalNode, PackageNode}; use futures::stream::{self, StreamExt}; use parking_lot::Mutex; -use std::{collections::HashMap, sync::Arc}; +use std::{ + collections::{HashMap, hash_map::Entry}, + sync::Arc, +}; /// Tracker for visited nodes, across graphs. #[derive(Default, Clone)] pub struct DiscoveredTracker { - cache: Arc>>, + cache: Arc>>, } impl DiscoveredTracker { - pub fn visit(&self, graph: &NodeGraph, node: NodeIndex) -> bool { + /// Check if a node was already visited, marking it as visited if not. + pub fn visit(&self, sbom_id: Uuid, graph: &NodeGraph, node: NodeIndex) -> bool { let mut maps = self.cache.lock(); - let map = maps - .entry(graph as *const Graph<_, _>) - .or_insert_with(|| graph.visit_map()); + let map = maps.entry(sbom_id).or_insert_with(|| graph.visit_map()); map.visit(node) } @@ -28,11 +30,13 @@ impl DiscoveredTracker { pub struct Collector<'a, C: ConnectionTrait> { graph_cache: &'a Arc, graphs: &'a [(Uuid, Arc)], + sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex, direction: Direction, depth: u64, discovered: DiscoveredTracker, + loaded_graphs: Arc>>>, relationships: &'a HashSet, connection: &'a C, concurrency: usize, @@ -44,11 +48,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Collector { graph_cache: self.graph_cache, graphs: self.graphs, + sbom_id: self.sbom_id, graph: self.graph, node: self.node, direction: self.direction, depth: self.depth, discovered: self.discovered.clone(), + loaded_graphs: self.loaded_graphs.clone(), relationships: self.relationships, connection: self.connection, concurrency: self.concurrency, @@ -61,6 +67,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { pub fn new( graph_cache: &'a Arc, graphs: &'a [(Uuid, Arc)], + sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex, direction: Direction, @@ -73,11 +80,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Self { graph_cache, graphs, + sbom_id, graph, node, direction, depth, discovered: Default::default(), + loaded_graphs: Default::default(), relationships, connection, concurrency, @@ -88,8 +97,9 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { /// Continue with another graph and node as an entry point. /// /// Shares the visited set. - pub fn with(self, graph: &'a NodeGraph, node: NodeIndex) -> Self { + pub fn with(self, sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex) -> Self { Self { + sbom_id, graph, node, ..self @@ -103,11 +113,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Self { graph_cache: self.graph_cache, graphs: self.graphs, + sbom_id: self.sbom_id, graph: self.graph, node, direction: self.direction, depth: self.depth - 1, discovered: self.discovered.clone(), + loaded_graphs: self.loaded_graphs.clone(), relationships: self.relationships, connection: self.connection, concurrency: self.concurrency, @@ -115,6 +127,27 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { } } + /// Load an external SBOM graph, checking the local cache first. + async fn load_external_graph(&self, sbom_id: Uuid) -> Result>, Error> { + if let Some(graph) = self.loaded_graphs.lock().get(&sbom_id).cloned() { + return Ok(Some(graph)); + } + + let Some(graph) = self.loader.load(self.connection, sbom_id).await? else { + return Ok(None); + }; + + let mut map = self.loaded_graphs.lock(); + match map.entry(sbom_id) { + Entry::Occupied(e) => { + log::debug!("concurrent load of external SBOM {sbom_id}, reusing existing handle"); + self.loader.redundant_loads.add(1, &[]); + Ok(Some(e.get().clone())) + } + Entry::Vacant(e) => Ok(Some(e.insert(graph).clone())), + } + } + /// Collect related nodes in the provided direction. /// /// If the depth is zero, or the node was already processed, it will return [`None`], indicating @@ -130,7 +163,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { let node = self.graph.node_weight(self.node); - if !self.discovered.visit(self.graph, self.node) { + if !self.discovered.visit(self.sbom_id, self.graph, self.node) { log::trace!("node got visited already"); return Ok((None, vec!["This node was already visited. Possible relationship loop. Skipping further processing.".into()])); } @@ -169,9 +202,8 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { )); }; - // retrieve external sbom graph from graph_cache - let Some(external_graph) = self.loader.load(self.connection, external_sbom_id).await? - else { + // retrieve external sbom graph, checking local cache first + let Some(external_graph) = self.load_external_graph(external_sbom_id).await? else { return Ok(( None, vec![format!( @@ -196,9 +228,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // recurse into those descendent nodes Ok(( Some( - self.with(external_graph.as_ref(), external_node_index) - .collect_graph() - .await?, + self.with( + external_sbom_id, + external_graph.as_ref(), + external_node_index, + ) + .collect_graph() + .await?, ), vec![], )) @@ -244,9 +280,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // get the external sbom graph - let Some(external_graph) = - self.loader.load(self.connection, matched.sbom_id).await? - else { + let Some(external_graph) = self.load_external_graph(matched.sbom_id).await? else { log::warn!( "external sbom graph {} not found in graph cache or database", matched.sbom_id @@ -268,7 +302,11 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // recurse into those external sbom nodes and save collector - .with(external_graph.as_ref(), external_node_index) + .with( + matched.sbom_id, + external_graph.as_ref(), + external_node_index, + ) .collect_graph() .await }) @@ -344,13 +382,19 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { #[derive(Clone)] pub struct GraphLoader { service: AnalysisService, + pub(crate) redundant_loads: Counter, } impl GraphLoader { pub fn new(service: AnalysisService) -> Self { - Self { service } + let meter = global::meter("AnalysisService"); + Self { + service, + redundant_loads: meter.u64_counter("collector_redundant_loads").build(), + } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn load( &self, connection: &impl ConnectionTrait, diff --git a/modules/analysis/src/service/mod.rs b/modules/analysis/src/service/mod.rs index 433d36dc7..ce0acddcb 100644 --- a/modules/analysis/src/service/mod.rs +++ b/modules/analysis/src/service/mod.rs @@ -504,6 +504,7 @@ impl AnalysisService { let ancestors = Collector::new( &graph_cache, graphs, + node.sbom_id, graph, node_index, Direction::Incoming, @@ -518,6 +519,7 @@ impl AnalysisService { let descendants = Collector::new( &graph_cache, graphs, + node.sbom_id, graph, node_index, Direction::Outgoing, diff --git a/modules/analysis/src/service/test/cache_eviction.rs b/modules/analysis/src/service/test/cache_eviction.rs new file mode 100644 index 000000000..100cdb6c2 --- /dev/null +++ b/modules/analysis/src/service/test/cache_eviction.rs @@ -0,0 +1,177 @@ +use crate::{ + config::AnalysisConfig, + service::{ + AnalysisService, ComponentReference, QueryOptions, test::warnings::collect_warnings, + }, +}; +use std::collections::BTreeMap; +use test_context::test_context; +use trustify_common::model::{BinaryByteSize, Paginated, PaginatedResults}; +use trustify_test_context::TrustifyContext; + +/// Given a set of ingested documents and a component query, +/// when the same query is run against a large cache and a tiny (1-byte) cache, +/// then the result counts and warning counts must be identical. +/// +/// The 1-byte cache guarantees that every SBOM is evicted immediately after +/// loading, forcing the collector to re-load graphs from the database on +/// every access that isn't covered by its own local cache. +async fn assert_cache_pressure_invariant( + ctx: &TrustifyContext, + documents: &[&str], + component: &str, + options: QueryOptions, +) -> PaginatedResults { + ctx.ingest_documents(documents.iter().copied()).await.ok(); + + let paginated = Paginated { + total: true, + ..Paginated::default() + }; + + // --- large cache: both SBOMs fit comfortably --- + + let service_large = AnalysisService::new(AnalysisConfig::default(), ctx.db.clone()); + + let result_large = service_large + .retrieve( + ComponentReference::Name(component), + options.clone(), + paginated, + &ctx.db, + ) + .await + .expect("large-cache retrieve failed"); + + let warnings_large = collect_warnings(&result_large.items); + + log::info!( + "large cache: total={:?}, warnings={}", + result_large.total, + warnings_large.len() + ); + + // --- tiny cache: every SBOM is evicted immediately --- + + let service_tiny = AnalysisService::new( + AnalysisConfig { + max_cache_size: BinaryByteSize::from(1u64), + ..Default::default() + }, + ctx.db.clone(), + ); + + let result_tiny = service_tiny + .retrieve( + ComponentReference::Name(component), + options, + paginated, + &ctx.db, + ) + .await + .expect("tiny-cache retrieve failed"); + + let warnings_tiny = collect_warnings(&result_tiny.items); + + log::info!( + "tiny cache: total={:?}, warnings={}", + result_tiny.total, + warnings_tiny.len() + ); + + assert_eq!( + result_large.total, result_tiny.total, + "result count diverged under cache pressure", + ); + + let sorted_values = |warnings: &BTreeMap<_, &[String]>| -> Vec { + let mut v: Vec<_> = warnings.values().flat_map(|w| w.iter().cloned()).collect(); + v.sort(); + v + }; + + assert_eq!( + sorted_values(&warnings_large), + sorted_values(&warnings_tiny), + "warnings diverged under cache pressure", + ); + + result_large +} + +/// Two CycloneDX SBOMs form a cross-SBOM loop: A's component X has an +/// external dependency on B's component Y, and B's Y has an external +/// dependency back on A's X. With a large cache both graphs stay resident +/// and `DiscoveredTracker` detects the cycle. With a tiny cache the graphs +/// get evicted and reloaded — the UUID-based tracker must still detect +/// the revisit. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn cross_sbom_cycle(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + assert_cache_pressure_invariant( + ctx, + &[ + "cyclonedx/loop-external/a.json", + "cyclonedx/loop-external/b.json", + ], + "X", + QueryOptions { + descendants: u64::MAX, + ..Default::default() + }, + ) + .await; + + Ok(()) +} + +/// Three components in one SBOM (C1, C2, C3) each have an external +/// dependency on the same component T1 in another SBOM. Verifies that +/// all three external references resolve correctly under cache pressure +/// and produce the same results as with a large cache. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn fan_out_same_target(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + let result = assert_cache_pressure_invariant( + ctx, + &[ + "cyclonedx/fan-out-external/container.json", + "cyclonedx/fan-out-external/target.json", + ], + "fan-out-container", + QueryOptions { + descendants: u64::MAX, + ..Default::default() + }, + ) + .await; + + let container = result + .items + .iter() + .find(|n| n.node_id == "root") + .expect("should have the root component node"); + + let descendants = container + .descendants + .as_ref() + .expect("root should have descendants"); + + assert_eq!(descendants.len(), 3); + + let resolved_count = descendants + .iter() + .filter(|c| { + c.descendants + .as_ref() + .is_some_and(|d| d.iter().any(|d| d.node_id.contains("t1"))) + }) + .count(); + + assert_eq!( + resolved_count, 3, + "all 3 children should have resolved T1 from the external SBOM" + ); + + Ok(()) +} diff --git a/modules/analysis/src/service/test/mod.rs b/modules/analysis/src/service/test/mod.rs index ac11c18f1..d81266fd7 100644 --- a/modules/analysis/src/service/test/mod.rs +++ b/modules/analysis/src/service/test/mod.rs @@ -1,3 +1,4 @@ +mod cache_eviction; mod query; mod recursive; mod warnings;