From bfadd333bfc5fe370f789ce4f2caa36cb6e39f8a Mon Sep 17 00:00:00 2001 From: Jens Reimann Date: Thu, 21 May 2026 14:15:10 +0200 Subject: [PATCH 1/2] fix: prevent deep recursion when SBOMs get evicted while processing In the case that SBOMs get evicted while loading, the DiscoveredTracker might miss those SBOMs (as seen) due to the fact that the pointer/handle is a new one, which it indeed didn't see before. Using the ID of the SBOM fixes this. Co-authored-by: Claude (cherry picked from commit dce4d88da047fbc26860868f5fefe26433e4428c) --- etc/test-data/cyclonedx/loop-external/a.json | 28 ++++ etc/test-data/cyclonedx/loop-external/b.json | 28 ++++ modules/analysis/src/service/collector.rs | 35 +++-- modules/analysis/src/service/mod.rs | 2 + .../src/service/test/cache_eviction.rs | 139 ++++++++++++++++++ modules/analysis/src/service/test/mod.rs | 1 + 6 files changed, 222 insertions(+), 11 deletions(-) create mode 100644 etc/test-data/cyclonedx/loop-external/a.json create mode 100644 etc/test-data/cyclonedx/loop-external/b.json create mode 100644 modules/analysis/src/service/test/cache_eviction.rs diff --git a/etc/test-data/cyclonedx/loop-external/a.json b/etc/test-data/cyclonedx/loop-external/a.json new file mode 100644 index 000000000..dd7d7a3b1 --- /dev/null +++ b/etc/test-data/cyclonedx/loop-external/a.json @@ -0,0 +1,28 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "name": "loop-external-a", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "x", + "name": "X", + "version": "1", + "purl": "pkg:rpm/redhat/X@0.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "x", + "dependsOn": ["urn:cdx:bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/1#y"] + } + ] +} diff --git a/etc/test-data/cyclonedx/loop-external/b.json b/etc/test-data/cyclonedx/loop-external/b.json new file mode 100644 index 000000000..8efa5d019 --- /dev/null +++ b/etc/test-data/cyclonedx/loop-external/b.json @@ -0,0 +1,28 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "name": "loop-external-b", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "y", + "name": "Y", + "version": "1", + "purl": "pkg:rpm/redhat/Y@0.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "y", + "dependsOn": ["urn:cdx:aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/1#x"] + } + ] +} diff --git a/modules/analysis/src/service/collector.rs b/modules/analysis/src/service/collector.rs index d1c467fac..c2763b713 100644 --- a/modules/analysis/src/service/collector.rs +++ b/modules/analysis/src/service/collector.rs @@ -7,15 +7,14 @@ use std::{collections::HashMap, sync::Arc}; /// Tracker for visited nodes, across graphs. #[derive(Default, Clone)] pub struct DiscoveredTracker { - cache: Arc>>, + cache: Arc>>, } impl DiscoveredTracker { - pub fn visit(&self, graph: &NodeGraph, node: NodeIndex) -> bool { + /// Check if a node was already visited, marking it as visited if not. + pub fn visit(&self, sbom_id: Uuid, graph: &NodeGraph, node: NodeIndex) -> bool { let mut maps = self.cache.lock(); - let map = maps - .entry(graph as *const Graph<_, _>) - .or_insert_with(|| graph.visit_map()); + let map = maps.entry(sbom_id).or_insert_with(|| graph.visit_map()); map.visit(node) } @@ -28,6 +27,7 @@ impl DiscoveredTracker { pub struct Collector<'a, C: ConnectionTrait> { graph_cache: &'a Arc, graphs: &'a [(Uuid, Arc)], + sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex, direction: Direction, @@ -44,6 +44,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Collector { graph_cache: self.graph_cache, graphs: self.graphs, + sbom_id: self.sbom_id, graph: self.graph, node: self.node, direction: self.direction, @@ -61,6 +62,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { pub fn new( graph_cache: &'a Arc, graphs: &'a [(Uuid, Arc)], + sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex, direction: Direction, @@ -73,6 +75,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Self { graph_cache, graphs, + sbom_id, graph, node, direction, @@ -88,8 +91,9 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { /// Continue with another graph and node as an entry point. /// /// Shares the visited set. - pub fn with(self, graph: &'a NodeGraph, node: NodeIndex) -> Self { + pub fn with(self, sbom_id: Uuid, graph: &'a NodeGraph, node: NodeIndex) -> Self { Self { + sbom_id, graph, node, ..self @@ -103,6 +107,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { Self { graph_cache: self.graph_cache, graphs: self.graphs, + sbom_id: self.sbom_id, graph: self.graph, node, direction: self.direction, @@ -130,7 +135,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { let node = self.graph.node_weight(self.node); - if !self.discovered.visit(self.graph, self.node) { + if !self.discovered.visit(self.sbom_id, self.graph, self.node) { log::trace!("node got visited already"); return Ok((None, vec!["This node was already visited. Possible relationship loop. Skipping further processing.".into()])); } @@ -196,9 +201,13 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // recurse into those descendent nodes Ok(( Some( - self.with(external_graph.as_ref(), external_node_index) - .collect_graph() - .await?, + self.with( + external_sbom_id, + external_graph.as_ref(), + external_node_index, + ) + .collect_graph() + .await?, ), vec![], )) @@ -268,7 +277,11 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // recurse into those external sbom nodes and save collector - .with(external_graph.as_ref(), external_node_index) + .with( + matched.sbom_id, + external_graph.as_ref(), + external_node_index, + ) .collect_graph() .await }) diff --git a/modules/analysis/src/service/mod.rs b/modules/analysis/src/service/mod.rs index 433d36dc7..ce0acddcb 100644 --- a/modules/analysis/src/service/mod.rs +++ b/modules/analysis/src/service/mod.rs @@ -504,6 +504,7 @@ impl AnalysisService { let ancestors = Collector::new( &graph_cache, graphs, + node.sbom_id, graph, node_index, Direction::Incoming, @@ -518,6 +519,7 @@ impl AnalysisService { let descendants = Collector::new( &graph_cache, graphs, + node.sbom_id, graph, node_index, Direction::Outgoing, diff --git a/modules/analysis/src/service/test/cache_eviction.rs b/modules/analysis/src/service/test/cache_eviction.rs new file mode 100644 index 000000000..71267f01c --- /dev/null +++ b/modules/analysis/src/service/test/cache_eviction.rs @@ -0,0 +1,139 @@ +use crate::{ + config::AnalysisConfig, + service::{ + AnalysisService, ComponentReference, QueryOptions, test::warnings::collect_warnings, + }, +}; +use std::sync::Arc; +use test_context::test_context; +use trustify_common::model::{BinaryByteSize, Paginated}; +use trustify_test_context::{IngestionResult, TrustifyContext}; + +/// Verify that cycle detection across external SBOM references works +/// regardless of cache pressure. +/// +/// Two CycloneDX SBOMs form a cross-SBOM loop: A's component X has an +/// external dependency on B's component Y, and B's Y has an external +/// dependency back on A's X. With a large cache both graphs stay resident +/// and `DiscoveredTracker` detects the cycle via pointer identity. With a +/// tiny cache the graphs get evicted and re-loaded at new addresses — +/// exposing whether the tracker still catches the revisit. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn test_cache_eviction_cross_sbom_cycle_detection( + ctx: &TrustifyContext, +) -> Result<(), anyhow::Error> { + ctx.ingest_documents([ + "cyclonedx/loop-external/a.json", + "cyclonedx/loop-external/b.json", + ]) + .await?; + + let options = QueryOptions { + descendants: u64::MAX, + ..Default::default() + }; + + // --- large cache: both SBOMs fit comfortably --- + + let service_large = AnalysisService::new(AnalysisConfig::default(), ctx.db.clone()); + + let result_large = service_large + .retrieve( + ComponentReference::Name("X"), + options.clone(), + Paginated { + total: true, + ..Paginated::default() + }, + &ctx.db, + ) + .await?; + + let warnings_large = collect_warnings(&result_large.items); + + log::info!( + "large cache: total={:?}, warnings={}", + result_large.total, + warnings_large.len() + ); + + // --- tiny cache: every SBOM is evicted immediately --- + + let service_tiny = AnalysisService::new( + AnalysisConfig { + max_cache_size: BinaryByteSize::from(1u64), + ..Default::default() + }, + ctx.db.clone(), + ); + + let result_tiny = service_tiny + .retrieve( + ComponentReference::Name("X"), + options, + Paginated { + total: true, + ..Paginated::default() + }, + &ctx.db, + ) + .await?; + + let warnings_tiny = collect_warnings(&result_tiny.items); + + log::info!( + "tiny cache: total={:?}, warnings={}", + result_tiny.total, + warnings_tiny.len() + ); + + // Both should detect the cross-SBOM cycle identically. + assert_eq!( + warnings_large.len(), + warnings_tiny.len(), + "cycle detection diverged under cache pressure: large cache produced {} warnings, tiny cache produced {}", + warnings_large.len(), + warnings_tiny.len(), + ); + + assert_eq!( + result_large.total, result_tiny.total, + "result count diverged under cache pressure", + ); + + Ok(()) +} + +/// Verify that loading the same SBOM twice under cache pressure returns +/// the same `Arc` handle, not two independent allocations. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn test_cache_eviction_same_handle(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + let result = ctx + .ingest_documents(["cyclonedx/loop-external/a.json"]) + .await?; + let [sbom_uuid] = result.into_uuid(); + + let service = AnalysisService::new( + AnalysisConfig { + max_cache_size: BinaryByteSize::from(1u64), + ..Default::default() + }, + ctx.db.clone(), + ); + + let first = service.load_graphs(&ctx.db, [sbom_uuid]).await?; + let second = service.load_graphs(&ctx.db, [sbom_uuid]).await?; + + let first_arc = &first.first().expect("first load should return a graph").1; + let second_arc = &second.first().expect("second load should return a graph").1; + + assert!( + Arc::ptr_eq(first_arc, second_arc), + "loading the same SBOM twice should return the same Arc handle, \ + but got two different allocations (cache eviction created a duplicate)", + ); + + Ok(()) +} diff --git a/modules/analysis/src/service/test/mod.rs b/modules/analysis/src/service/test/mod.rs index ac11c18f1..d81266fd7 100644 --- a/modules/analysis/src/service/test/mod.rs +++ b/modules/analysis/src/service/test/mod.rs @@ -1,3 +1,4 @@ +mod cache_eviction; mod query; mod recursive; mod warnings; From fc882b2a7748a50835914516357c31dfaba2deb7 Mon Sep 17 00:00:00 2001 From: Jens Reimann Date: Thu, 21 May 2026 16:14:44 +0200 Subject: [PATCH 2/2] fix(analysis): add collector-local cache to prevent redundant SBOM loads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple external references within the same collection tree point to the same SBOM, the global Moka cache may have evicted it between lookups — causing redundant database loads. The Collector now maintains a local cache of loaded graphs, shared across recursive calls, so the same SBOM is loaded at most once per collection. A race between concurrent branches is detected via the Entry API and tracked with the `collector_redundant_loads` OTel counter. Co-authored-by: Claude (cherry picked from commit 5ac0ce2bcd17ecfe5e1190be5ad8f3f260503444) --- .../cyclonedx/fan-out-external/container.json | 55 ++++++ .../cyclonedx/fan-out-external/target.json | 29 +++ modules/analysis/src/service/collector.rs | 47 ++++- .../src/service/test/cache_eviction.rs | 170 +++++++++++------- 4 files changed, 227 insertions(+), 74 deletions(-) create mode 100644 etc/test-data/cyclonedx/fan-out-external/container.json create mode 100644 etc/test-data/cyclonedx/fan-out-external/target.json diff --git a/etc/test-data/cyclonedx/fan-out-external/container.json b/etc/test-data/cyclonedx/fan-out-external/container.json new file mode 100644 index 000000000..adf304bb5 --- /dev/null +++ b/etc/test-data/cyclonedx/fan-out-external/container.json @@ -0,0 +1,55 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:cccccccc-cccc-cccc-cccc-cccccccccccc", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "bom-ref": "root", + "name": "fan-out-container", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "c1", + "name": "C1", + "version": "1", + "purl": "pkg:rpm/redhat/C1@1.0.0?arch=src", + "type": "library" + }, + { + "bom-ref": "c2", + "name": "C2", + "version": "1", + "purl": "pkg:rpm/redhat/C2@1.0.0?arch=src", + "type": "library" + }, + { + "bom-ref": "c3", + "name": "C3", + "version": "1", + "purl": "pkg:rpm/redhat/C3@1.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": ["c1", "c2", "c3"] + }, + { + "ref": "c1", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + }, + { + "ref": "c2", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + }, + { + "ref": "c3", + "dependsOn": ["urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt/1#t1"] + } + ] +} diff --git a/etc/test-data/cyclonedx/fan-out-external/target.json b/etc/test-data/cyclonedx/fan-out-external/target.json new file mode 100644 index 000000000..d9c5837be --- /dev/null +++ b/etc/test-data/cyclonedx/fan-out-external/target.json @@ -0,0 +1,29 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:cdx:tttttttt-tttt-tttt-tttt-tttttttttttt", + "version": 1, + "metadata": { + "timestamp": "1970-01-01T13:30:00Z", + "component": { + "bom-ref": "root", + "name": "fan-out-target", + "type": "application" + } + }, + "components": [ + { + "bom-ref": "t1", + "name": "T1", + "version": "1", + "purl": "pkg:rpm/redhat/T1@1.0.0?arch=src", + "type": "library" + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": ["t1"] + } + ] +} diff --git a/modules/analysis/src/service/collector.rs b/modules/analysis/src/service/collector.rs index c2763b713..04b858fe1 100644 --- a/modules/analysis/src/service/collector.rs +++ b/modules/analysis/src/service/collector.rs @@ -2,7 +2,10 @@ use super::*; use crate::model::graph::{ExternalNode, PackageNode}; use futures::stream::{self, StreamExt}; use parking_lot::Mutex; -use std::{collections::HashMap, sync::Arc}; +use std::{ + collections::{HashMap, hash_map::Entry}, + sync::Arc, +}; /// Tracker for visited nodes, across graphs. #[derive(Default, Clone)] @@ -33,6 +36,7 @@ pub struct Collector<'a, C: ConnectionTrait> { direction: Direction, depth: u64, discovered: DiscoveredTracker, + loaded_graphs: Arc>>>, relationships: &'a HashSet, connection: &'a C, concurrency: usize, @@ -50,6 +54,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { direction: self.direction, depth: self.depth, discovered: self.discovered.clone(), + loaded_graphs: self.loaded_graphs.clone(), relationships: self.relationships, connection: self.connection, concurrency: self.concurrency, @@ -81,6 +86,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { direction, depth, discovered: Default::default(), + loaded_graphs: Default::default(), relationships, connection, concurrency, @@ -113,6 +119,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { direction: self.direction, depth: self.depth - 1, discovered: self.discovered.clone(), + loaded_graphs: self.loaded_graphs.clone(), relationships: self.relationships, connection: self.connection, concurrency: self.concurrency, @@ -120,6 +127,27 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { } } + /// Load an external SBOM graph, checking the local cache first. + async fn load_external_graph(&self, sbom_id: Uuid) -> Result>, Error> { + if let Some(graph) = self.loaded_graphs.lock().get(&sbom_id).cloned() { + return Ok(Some(graph)); + } + + let Some(graph) = self.loader.load(self.connection, sbom_id).await? else { + return Ok(None); + }; + + let mut map = self.loaded_graphs.lock(); + match map.entry(sbom_id) { + Entry::Occupied(e) => { + log::debug!("concurrent load of external SBOM {sbom_id}, reusing existing handle"); + self.loader.redundant_loads.add(1, &[]); + Ok(Some(e.get().clone())) + } + Entry::Vacant(e) => Ok(Some(e.insert(graph).clone())), + } + } + /// Collect related nodes in the provided direction. /// /// If the depth is zero, or the node was already processed, it will return [`None`], indicating @@ -174,9 +202,8 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { )); }; - // retrieve external sbom graph from graph_cache - let Some(external_graph) = self.loader.load(self.connection, external_sbom_id).await? - else { + // retrieve external sbom graph, checking local cache first + let Some(external_graph) = self.load_external_graph(external_sbom_id).await? else { return Ok(( None, vec![format!( @@ -253,9 +280,7 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { // get the external sbom graph - let Some(external_graph) = - self.loader.load(self.connection, matched.sbom_id).await? - else { + let Some(external_graph) = self.load_external_graph(matched.sbom_id).await? else { log::warn!( "external sbom graph {} not found in graph cache or database", matched.sbom_id @@ -357,13 +382,19 @@ impl<'a, C: ConnectionTrait> Collector<'a, C> { #[derive(Clone)] pub struct GraphLoader { service: AnalysisService, + pub(crate) redundant_loads: Counter, } impl GraphLoader { pub fn new(service: AnalysisService) -> Self { - Self { service } + let meter = global::meter("AnalysisService"); + Self { + service, + redundant_loads: meter.u64_counter("collector_redundant_loads").build(), + } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn load( &self, connection: &impl ConnectionTrait, diff --git a/modules/analysis/src/service/test/cache_eviction.rs b/modules/analysis/src/service/test/cache_eviction.rs index 71267f01c..100cdb6c2 100644 --- a/modules/analysis/src/service/test/cache_eviction.rs +++ b/modules/analysis/src/service/test/cache_eviction.rs @@ -4,34 +4,29 @@ use crate::{ AnalysisService, ComponentReference, QueryOptions, test::warnings::collect_warnings, }, }; -use std::sync::Arc; +use std::collections::BTreeMap; use test_context::test_context; -use trustify_common::model::{BinaryByteSize, Paginated}; -use trustify_test_context::{IngestionResult, TrustifyContext}; +use trustify_common::model::{BinaryByteSize, Paginated, PaginatedResults}; +use trustify_test_context::TrustifyContext; -/// Verify that cycle detection across external SBOM references works -/// regardless of cache pressure. +/// Given a set of ingested documents and a component query, +/// when the same query is run against a large cache and a tiny (1-byte) cache, +/// then the result counts and warning counts must be identical. /// -/// Two CycloneDX SBOMs form a cross-SBOM loop: A's component X has an -/// external dependency on B's component Y, and B's Y has an external -/// dependency back on A's X. With a large cache both graphs stay resident -/// and `DiscoveredTracker` detects the cycle via pointer identity. With a -/// tiny cache the graphs get evicted and re-loaded at new addresses — -/// exposing whether the tracker still catches the revisit. -#[test_context(TrustifyContext)] -#[test_log::test(tokio::test)] -async fn test_cache_eviction_cross_sbom_cycle_detection( +/// The 1-byte cache guarantees that every SBOM is evicted immediately after +/// loading, forcing the collector to re-load graphs from the database on +/// every access that isn't covered by its own local cache. +async fn assert_cache_pressure_invariant( ctx: &TrustifyContext, -) -> Result<(), anyhow::Error> { - ctx.ingest_documents([ - "cyclonedx/loop-external/a.json", - "cyclonedx/loop-external/b.json", - ]) - .await?; - - let options = QueryOptions { - descendants: u64::MAX, - ..Default::default() + documents: &[&str], + component: &str, + options: QueryOptions, +) -> PaginatedResults { + ctx.ingest_documents(documents.iter().copied()).await.ok(); + + let paginated = Paginated { + total: true, + ..Paginated::default() }; // --- large cache: both SBOMs fit comfortably --- @@ -40,15 +35,13 @@ async fn test_cache_eviction_cross_sbom_cycle_detection( let result_large = service_large .retrieve( - ComponentReference::Name("X"), + ComponentReference::Name(component), options.clone(), - Paginated { - total: true, - ..Paginated::default() - }, + paginated, &ctx.db, ) - .await?; + .await + .expect("large-cache retrieve failed"); let warnings_large = collect_warnings(&result_large.items); @@ -70,15 +63,13 @@ async fn test_cache_eviction_cross_sbom_cycle_detection( let result_tiny = service_tiny .retrieve( - ComponentReference::Name("X"), + ComponentReference::Name(component), options, - Paginated { - total: true, - ..Paginated::default() - }, + paginated, &ctx.db, ) - .await?; + .await + .expect("tiny-cache retrieve failed"); let warnings_tiny = collect_warnings(&result_tiny.items); @@ -88,51 +79,98 @@ async fn test_cache_eviction_cross_sbom_cycle_detection( warnings_tiny.len() ); - // Both should detect the cross-SBOM cycle identically. assert_eq!( - warnings_large.len(), - warnings_tiny.len(), - "cycle detection diverged under cache pressure: large cache produced {} warnings, tiny cache produced {}", - warnings_large.len(), - warnings_tiny.len(), + result_large.total, result_tiny.total, + "result count diverged under cache pressure", ); + let sorted_values = |warnings: &BTreeMap<_, &[String]>| -> Vec { + let mut v: Vec<_> = warnings.values().flat_map(|w| w.iter().cloned()).collect(); + v.sort(); + v + }; + assert_eq!( - result_large.total, result_tiny.total, - "result count diverged under cache pressure", + sorted_values(&warnings_large), + sorted_values(&warnings_tiny), + "warnings diverged under cache pressure", ); - Ok(()) + result_large } -/// Verify that loading the same SBOM twice under cache pressure returns -/// the same `Arc` handle, not two independent allocations. +/// Two CycloneDX SBOMs form a cross-SBOM loop: A's component X has an +/// external dependency on B's component Y, and B's Y has an external +/// dependency back on A's X. With a large cache both graphs stay resident +/// and `DiscoveredTracker` detects the cycle. With a tiny cache the graphs +/// get evicted and reloaded — the UUID-based tracker must still detect +/// the revisit. #[test_context(TrustifyContext)] #[test_log::test(tokio::test)] -async fn test_cache_eviction_same_handle(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { - let result = ctx - .ingest_documents(["cyclonedx/loop-external/a.json"]) - .await?; - let [sbom_uuid] = result.into_uuid(); - - let service = AnalysisService::new( - AnalysisConfig { - max_cache_size: BinaryByteSize::from(1u64), +async fn cross_sbom_cycle(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + assert_cache_pressure_invariant( + ctx, + &[ + "cyclonedx/loop-external/a.json", + "cyclonedx/loop-external/b.json", + ], + "X", + QueryOptions { + descendants: u64::MAX, ..Default::default() }, - ctx.db.clone(), - ); + ) + .await; - let first = service.load_graphs(&ctx.db, [sbom_uuid]).await?; - let second = service.load_graphs(&ctx.db, [sbom_uuid]).await?; + Ok(()) +} - let first_arc = &first.first().expect("first load should return a graph").1; - let second_arc = &second.first().expect("second load should return a graph").1; +/// Three components in one SBOM (C1, C2, C3) each have an external +/// dependency on the same component T1 in another SBOM. Verifies that +/// all three external references resolve correctly under cache pressure +/// and produce the same results as with a large cache. +#[test_context(TrustifyContext)] +#[test_log::test(tokio::test)] +async fn fan_out_same_target(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + let result = assert_cache_pressure_invariant( + ctx, + &[ + "cyclonedx/fan-out-external/container.json", + "cyclonedx/fan-out-external/target.json", + ], + "fan-out-container", + QueryOptions { + descendants: u64::MAX, + ..Default::default() + }, + ) + .await; + + let container = result + .items + .iter() + .find(|n| n.node_id == "root") + .expect("should have the root component node"); + + let descendants = container + .descendants + .as_ref() + .expect("root should have descendants"); + + assert_eq!(descendants.len(), 3); + + let resolved_count = descendants + .iter() + .filter(|c| { + c.descendants + .as_ref() + .is_some_and(|d| d.iter().any(|d| d.node_id.contains("t1"))) + }) + .count(); - assert!( - Arc::ptr_eq(first_arc, second_arc), - "loading the same SBOM twice should return the same Arc handle, \ - but got two different allocations (cache eviction created a duplicate)", + assert_eq!( + resolved_count, 3, + "all 3 children should have resolved T1 from the external SBOM" ); Ok(())