diff --git a/etc/test-data/cyclonedx/fan-out-external/container.json b/etc/test-data/cyclonedx/fan-out-external/container.json new file mode 100644 index 000000000..adf304bb5 --- /dev/null +++ b/etc/test-data/cyclonedx/fan-out-external/container.json @@ -0,0 +1,55 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:cccccccc-cccc-cccc-cccc-cccccccccccc", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "bom-ref": "root", + "name": "fan-out-container", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "c1", + "name": "C1", + "version": "1", + "purl": "pkg:rpm/redhat/C1@1.0.0?arch=src", + "type": "library" + }, + { + "bom-ref": "c2", + "name": "C2", + "version": "1", + "purl": "pkg:rpm/redhat/C2@1.0.0?arch=src", + "type": "library" + }, + { + "bom-ref": "c3", + "name": "C3", + "version": "1", + "purl": "pkg:rpm/redhat/C3@1.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": ["c1", "c2", "c3"] + }, + { + "ref": "c1", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + }, + { + "ref": "c2", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + }, + { + "ref": "c3", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + } + ] +} diff --git a/etc/test-data/cyclonedx/fan-out-external/target.json b/etc/test-data/cyclonedx/fan-out-external/target.json new file mode 100644 index 000000000..d9c5837be --- /dev/null +++ b/etc/test-data/cyclonedx/fan-out-external/target.json @@ -0,0 +1,29 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "bom-ref": "root", + "name": "fan-out-target", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "t1", + "name": "T1", + "version": "1", + "purl": "pkg:rpm/redhat/T1@1.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": ["t1"] + } + ] +} diff --git a/etc/test-data/cyclonedx/loop-external/a.json b/etc/test-data/cyclonedx/loop-external/a.json new file mode 100644 index 000000000..dd7d7a3b1 --- /dev/null +++ b/etc/test-data/cyclonedx/loop-external/a.json @@ -0,0 +1,28 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "name": "loop-external-a", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "x", + "name": "X", + "version": "1", + "purl": "pkg:rpm/redhat/X@0.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "x", + "dependsOn": ["urn:cdx:bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/1#y"] + } + ] +} diff --git a/etc/test-data/cyclonedx/loop-external/b.json b/etc/test-data/cyclonedx/loop-external/b.json new file mode 100644 index 000000000..8efa5d019 --- /dev/null +++ b/etc/test-data/cyclonedx/loop-external/b.json @@ -0,0 +1,28 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "name": "loop-external-b", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "y", + "name": "Y", + "version": "1", + "purl": "pkg:rpm/redhat/Y@0.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "y", + "dependsOn": ["urn:cdx:aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/1#x"] + } + ] +} diff --git a/modules/analysis/src/service/collector.rs b/modules/analysis/src/service/collector.rs index d1c467fac..04b858fe1 100644 --- a/modules/analysis/src/service/collector.rs +++ b/modules/analysis/src/service/collector.rs @@ -2,20 +2,22 @@ use super::*; use crate::model::graph::{ExternalNode, PackageNode}; use futures::stream::{self, StreamExt}; use parking_lot::Mutex; -use std::{collections::HashMap, sync::Arc}; +use std::{ + collections::{HashMap, hash_map::Entry}, + sync::Arc, +}; /// Tracker for visited nodes, across graphs. #[derive(Default, Clone)] pub struct DiscoveredTracker { - cache: Arc>>, + cache: Arc>>, } impl DiscoveredTracker { - pub fn visit(&self, graph: &NodeGraph, node: NodeIndex) -> bool { + /// Check if a node was already visited, marking it as visited if not. + pub fn visit(&self, sbom_id: Uuid, graph: &NodeGraph, node: NodeIndex) -> bool { let mut maps = self.cache.lock(); - let map = maps - .entry(graph as *const Graph<_, _>) - .or_insert_with(|| graph.visit_map()); + let map = maps.entry(sbom_id).or_insert_with(|| graph.visit_map()); map.visit(node) } @@ -28,11 +30,13 @@ impl DiscoveredTracker { pub struct Collector<'a, C: ConnectionTrait> { graph_cache: &'a Arc, graphs: &'a [(Uuid, Arc)], + sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex, direction: Direction, depth: u64, discovered: DiscoveredTracker, + loaded_graphs: Arc>>>, relationships: &'a HashSet, connection: &'a C, concurrency: usize, @@ -44,11 +48,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Collector { graph_cache: self.graph_cache, graphs: self.graphs, + sbom_id: self.sbom_id, graph: self.graph, node: self.node, direction: self.direction, depth: self.depth, discovered: self.discovered.clone(), + loaded_graphs: self.loaded_graphs.clone(), relationships: self.relationships, connection: self.connection, concurrency: self.concurrency, @@ -61,6 +67,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { pub fn new( graph_cache: &'a Arc, graphs: &'a [(Uuid, Arc)], + sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex, direction: Direction, @@ -73,11 +80,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Self { graph_cache, graphs, + sbom_id, graph, node, direction, depth, discovered: Default::default(), + loaded_graphs: Default::default(), relationships, connection, concurrency, @@ -88,8 +97,9 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { /// Continue with another graph and node as an entry point. /// /// Shares the visited set. - pub fn with(self, graph: &'a NodeGraph, node: NodeIndex) -> Self { + pub fn with(self, sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex) -> Self { Self { + sbom_id, graph, node, ..self @@ -103,11 +113,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Self { graph_cache: self.graph_cache, graphs: self.graphs, + sbom_id: self.sbom_id, graph: self.graph, node, direction: self.direction, depth: self.depth - 1, discovered: self.discovered.clone(), + loaded_graphs: self.loaded_graphs.clone(), relationships: self.relationships, connection: self.connection, concurrency: self.concurrency, @@ -115,6 +127,27 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { } } + /// Load an external SBOM graph, checking the local cache first. + async fn load_external_graph(&self, sbom_id: Uuid) -> Result>, Error> { + if let Some(graph) = self.loaded_graphs.lock().get(&sbom_id).cloned() { + return Ok(Some(graph)); + } + + let Some(graph) = self.loader.load(self.connection, sbom_id).await? else { + return Ok(None); + }; + + let mut map = self.loaded_graphs.lock(); + match map.entry(sbom_id) { + Entry::Occupied(e) => { + log::debug!("concurrent load of external SBOM {sbom_id}, reusing existing handle"); + self.loader.redundant_loads.add(1, &[]); + Ok(Some(e.get().clone())) + } + Entry::Vacant(e) => Ok(Some(e.insert(graph).clone())), + } + } + /// Collect related nodes in the provided direction. /// /// If the depth is zero, or the node was already processed, it will return [`None`], indicating @@ -130,7 +163,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { let node = self.graph.node_weight(self.node); - if !self.discovered.visit(self.graph, self.node) { + if !self.discovered.visit(self.sbom_id, self.graph, self.node) { log::trace!("node got visited already"); return Ok((None, vec!["This node was already visited. Possible relationship loop. Skipping further processing.".into()])); } @@ -169,9 +202,8 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { )); }; - // retrieve external sbom graph from graph_cache - let Some(external_graph) = self.loader.load(self.connection, external_sbom_id).await? - else { + // retrieve external sbom graph, checking local cache first + let Some(external_graph) = self.load_external_graph(external_sbom_id).await? else { return Ok(( None, vec![format!( @@ -196,9 +228,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // recurse into those descendent nodes Ok(( Some( - self.with(external_graph.as_ref(), external_node_index) - .collect_graph() - .await?, + self.with( + external_sbom_id, + external_graph.as_ref(), + external_node_index, + ) + .collect_graph() + .await?, ), vec![], )) @@ -244,9 +280,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // get the external sbom graph - let Some(external_graph) = - self.loader.load(self.connection, matched.sbom_id).await? - else { + let Some(external_graph) = self.load_external_graph(matched.sbom_id).await? else { log::warn!( "external sbom graph {} not found in graph cache or database", matched.sbom_id @@ -268,7 +302,11 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // recurse into those external sbom nodes and save collector - .with(external_graph.as_ref(), external_node_index) + .with( + matched.sbom_id, + external_graph.as_ref(), + external_node_index, + ) .collect_graph() .await }) @@ -344,13 +382,19 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { #[derive(Clone)] pub struct GraphLoader { service: AnalysisService, + pub(crate) redundant_loads: Counter, } impl GraphLoader { pub fn new(service: AnalysisService) -> Self { - Self { service } + let meter = global::meter("AnalysisService"); + Self { + service, + redundant_loads: meter.u64_counter("collector_redundant_loads").build(), + } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn load( &self, connection: &impl ConnectionTrait, diff --git a/modules/analysis/src/service/mod.rs b/modules/analysis/src/service/mod.rs index 433d36dc7..ce0acddcb 100644 --- a/modules/analysis/src/service/mod.rs +++ b/modules/analysis/src/service/mod.rs @@ -504,6 +504,7 @@ impl AnalysisService { let ancestors = Collector::new( &graph_cache, graphs, + node.sbom_id, graph, node_index, Direction::Incoming, @@ -518,6 +519,7 @@ impl AnalysisService { let descendants = Collector::new( &graph_cache, graphs, + node.sbom_id, graph, node_index, Direction::Outgoing, diff --git a/modules/analysis/src/service/test/cache_eviction.rs b/modules/analysis/src/service/test/cache_eviction.rs new file mode 100644 index 000000000..b4649ddf1 --- /dev/null +++ b/modules/analysis/src/service/test/cache_eviction.rs @@ -0,0 +1,133 @@ +use crate::{ + config::AnalysisConfig, + service::{ + AnalysisService, ComponentReference, QueryOptions, test::warnings::collect_warnings, + }, +}; +use std::sync::Arc; +use test_context::test_context; +use trustify_common::model::{BinaryByteSize, Paginated}; +use trustify_test_context::{IngestionResult, TrustifyContext}; + +/// Verify that cycle detection across external SBOM references works +/// regardless of cache pressure. +/// +/// Two CycloneDX SBOMs form a cross-SBOM loop: A's component X has an +/// external dependency on B's component Y, and B's Y has an external +/// dependency back on A's X. With a large cache both graphs stay resident +/// and `DiscoveredTracker` detects the cycle via pointer identity. With a +/// tiny cache the graphs get evicted and re-loaded at new addresses — +/// exposing whether the tracker still catches the revisit. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn test_cache_eviction_cross_sbom_cycle_detection( + ctx: &TrustifyContext, +) -> Result<(), anyhow::Error> { + ctx.ingest_documents([ + "cyclonedx/loop-external/a.json", + "cyclonedx/loop-external/b.json", + ]) + .await?; + + let options = QueryOptions { + descendants: u64::MAX, + ..Default::default() + }; + + // --- large cache: both SBOMs fit comfortably --- + + let service_large = AnalysisService::new(AnalysisConfig::default(), ctx.db.clone()); + + let result_large = service_large + .retrieve( + ComponentReference::Name("X"), + options.clone(), + Paginated::default(), + &ctx.db, + ) + .await?; + + let warnings_large = collect_warnings(&result_large.items); + + log::info!( + "large cache: total={:?}, warnings={}", + result_large.total, + warnings_large.len() + ); + + // --- tiny cache: every SBOM is evicted immediately --- + + let service_tiny = AnalysisService::new( + AnalysisConfig { + max_cache_size: BinaryByteSize::from(1u64), + ..Default::default() + }, + ctx.db.clone(), + ); + + let result_tiny = service_tiny + .retrieve( + ComponentReference::Name("X"), + options, + Paginated::default(), + &ctx.db, + ) + .await?; + + let warnings_tiny = collect_warnings(&result_tiny.items); + + log::info!( + "tiny cache: total={:?}, warnings={}", + result_tiny.total, + warnings_tiny.len() + ); + + // Both should detect the cross-SBOM cycle identically. + assert_eq!( + warnings_large.len(), + warnings_tiny.len(), + "cycle detection diverged under cache pressure: large cache produced {} warnings, tiny cache produced {}", + warnings_large.len(), + warnings_tiny.len(), + ); + + assert_eq!( + result_large.total, result_tiny.total, + "result count diverged under cache pressure", + ); + + Ok(()) +} + +/// Verify that loading the same SBOM twice under cache pressure returns +/// the same `Arc` handle, not two independent allocations. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn test_cache_eviction_same_handle(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + let result = ctx + .ingest_documents(["cyclonedx/loop-external/a.json"]) + .await?; + let [sbom_uuid] = result.into_uuid(); + + let service = AnalysisService::new( + AnalysisConfig { + max_cache_size: BinaryByteSize::from(1u64), + ..Default::default() + }, + ctx.db.clone(), + ); + + let first = service.load_graphs(&ctx.db, [sbom_uuid]).await?; + let second = service.load_graphs(&ctx.db, [sbom_uuid]).await?; + + let first_arc = &first.first().expect("first load should return a graph").1; + let second_arc = &second.first().expect("second load should return a graph").1; + + assert!( + Arc::ptr_eq(first_arc, second_arc), + "loading the same SBOM twice should return the same Arc handle, \ + but got two different allocations (cache eviction created a duplicate)", + ); + + Ok(()) +} diff --git a/modules/analysis/src/service/test/mod.rs b/modules/analysis/src/service/test/mod.rs index ac11c18f1..d81266fd7 100644 --- a/modules/analysis/src/service/test/mod.rs +++ b/modules/analysis/src/service/test/mod.rs @@ -1,3 +1,4 @@ +mod cache_eviction; mod query; mod recursive; mod warnings;