From e5b3d5002e262cce08e9dd26af5bf30dfc7ad9fc Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 25 Mar 2026 10:03:04 +0100 Subject: [PATCH 01/23] perf(purl): replace sequential PURL lookups with bulk queries in recommend_purls Replace per-PURL sequential self.purls() calls with two bulk database queries: one for base_purl using exact column matches with OR conditions, and one for versioned_purl using IN clause. This eliminates N+1 ILIKE queries on JSONB fields and unnecessary COUNT(*) pagination queries. Version filtering logic (redhat pattern match, semver comparison) is preserved and applied in Rust over the bulk-fetched results. Implements TC-3886 Assisted-by: Claude Code (cherry picked from commit 01bcad85cbad8c8efcc0d8e8a91b5224cb83d187) --- modules/fundamental/src/purl/service/mod.rs | 248 +++++++++++++++----- 1 file changed, 184 insertions(+), 64 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 7750aad2d..054f3c819 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -439,15 +439,17 @@ impl PurlService { #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); - for purl in purls { - let query = match purl.to_string().split_once('@') { - Some((p, _)) => format!("purl~{p}"), - None => format!("purl~{purl}"), - }; - let summaries = self - .purls(q(&query), Default::default(), connection) - .await?; + // Phase 1: Parse all input PURLs and build lookup keys + struct InputPurl { + purl_string: String, + ty: String, + namespace: Option, + name: String, + input_version: semver::Version, + } + let mut input_purls: Vec = Vec::new(); + for purl in purls { let Some(ref input_version_str) = purl.version else { continue; }; @@ -460,67 +462,185 @@ impl PurlService { continue; }; - let highest_patch = summaries - .items - .into_iter() - .fold( - None, - |acc: Option<(PurlSummary, semver::Version)>, summary: PurlSummary| { - summary - .head - .purl - .version - .as_ref() - .filter(|version| pattern.is_match(version)) - .and_then(|version| { - lenient_semver::parse(version) - .inspect_err(|_| { - log::debug!( - "purl {} version {:?} failed to parse", - summary.head.purl, - summary.head.purl.version - ) - }) - .ok() - }) - .filter(|version| { - version.major == input_version.major - && version.minor == input_version.minor - && version.patch == input_version.patch - }) - .and_then(|version| match &acc { - Some((_, v)) if version.pre > v.pre => Some((summary, version)), - None => Some((summary, version)), - _ => None, - }) - .or(acc) - }, - ) - .map(|(summary, _)| summary); + input_purls.push(InputPurl { + purl_string: purl.to_string(), + ty: purl.ty.clone(), + namespace: purl.namespace.clone(), + name: purl.name.clone(), + input_version, + }); + } - let mut recommended_purls = Vec::new(); - if let Some(highest) = highest_patch - && let Some(purl_details) = self - .versioned_purl_by_uuid(&highest.head.purl.version_uuid(), connection) - .await? - { - recommended_purls.push(RecommendEntry { - package: highest.head.purl.to_string(), - vulnerabilities: purl_details - .advisories - .iter() - .flat_map(|advisory| { - advisory.status.iter().map(|status| VulnerabilityStatus { - id: status.vulnerability.identifier.clone(), - status: Some(status.into()), - justification: None, - }) + if input_purls.is_empty() { + return Ok(recommendations); + } + + // Phase 2: Bulk query — find all base_purls matching any input PURL's (type, namespace, name) + let mut base_condition = Condition::any(); + let mut seen_keys: std::collections::HashSet = std::collections::HashSet::new(); + for ip in &input_purls { + let key = format!( + "{}|{}|{}", + ip.ty, + ip.namespace.as_deref().unwrap_or(""), + ip.name + ); + if !seen_keys.insert(key) { + continue; + } + let mut cond = Condition::all() + .add(base_purl::Column::Type.eq(&ip.ty)) + .add(base_purl::Column::Name.eq(&ip.name)); + if let Some(ns) = &ip.namespace { + cond = cond.add(base_purl::Column::Namespace.eq(ns)); + } else { + cond = cond.add(base_purl::Column::Namespace.is_null()); + } + base_condition = base_condition.add(cond); + } + + let base_purls: Vec = base_purl::Entity::find() + .filter(base_condition) + .all(connection) + .await?; + + if base_purls.is_empty() { + for ip in &input_purls { + recommendations.insert(ip.purl_string.clone(), Vec::new()); + } + return Ok(recommendations); + } + + // Build base_purl lookup: key -> base_purl model + let base_purl_map: HashMap = base_purls + .iter() + .map(|bp| { + let key = format!( + "{}|{}|{}", + bp.r#type, + bp.namespace.as_deref().unwrap_or(""), + bp.name + ); + (key, bp) + }) + .collect(); + + // Phase 3: Bulk query — get all versioned_purls for matching base_purls + let base_purl_ids: Vec = base_purls.iter().map(|bp| bp.id).collect(); + let all_versioned: Vec = versioned_purl::Entity::find() + .filter(versioned_purl::Column::BasePurlId.is_in(base_purl_ids)) + .all(connection) + .await?; + + // Group versioned_purls by base_purl_id + let mut versioned_by_base: HashMap> = HashMap::new(); + for vp in &all_versioned { + versioned_by_base + .entry(vp.base_purl_id) + .or_default() + .push(vp); + } + + // Phase 4: For each input PURL, find the highest redhat patch version + struct Winner<'a> { + purl_string: String, + versioned_purl: &'a versioned_purl::Model, + #[allow(dead_code)] + base: &'a base_purl::Model, + } + let mut winners: Vec = Vec::new(); + + for ip in &input_purls { + let key = format!( + "{}|{}|{}", + ip.ty, + ip.namespace.as_deref().unwrap_or(""), + ip.name + ); + let Some(base) = base_purl_map.get(&key) else { + recommendations.insert(ip.purl_string.clone(), Vec::new()); + continue; + }; + + let highest_patch = versioned_by_base + .get(&base.id) + .into_iter() + .flatten() + .filter(|vp| pattern.is_match(&vp.version)) + .filter_map(|vp| { + lenient_semver::parse(&vp.version) + .inspect_err(|_| { + log::debug!("purl version {:?} failed to parse", vp.version); }) - .collect(), + .ok() + .map(|v| (*vp, v)) + }) + .filter(|(_, version)| { + version.major == ip.input_version.major + && version.minor == ip.input_version.minor + && version.patch == ip.input_version.patch + }) + .max_by(|(_, a), (_, b)| a.pre.cmp(&b.pre)) + .map(|(vp, _)| vp); + + if let Some(winner_vp) = highest_patch { + winners.push(Winner { + purl_string: ip.purl_string.clone(), + versioned_purl: winner_vp, + base, }); + } else { + recommendations.insert(ip.purl_string.clone(), Vec::new()); } + } + + if winners.is_empty() { + return Ok(recommendations); + } - recommendations.insert(purl.to_string(), recommended_purls); + // Phase 5: Fetch vulnerability details for each winner + // Use versioned_purl_by_uuid to get full details including qualified_purl with qualifiers + for winner in &winners { + if let Some(purl_details) = self + .versioned_purl_by_uuid(&winner.versioned_purl.id, connection) + .await? + { + let package_str = purl_details + .purls + .first() + .map(|p| p.purl.to_string()) + .unwrap_or_else(|| { + Purl { + ty: purl_details.head.purl.ty.clone(), + namespace: purl_details.head.purl.namespace.clone(), + name: purl_details.head.purl.name.clone(), + version: Some(purl_details.head.purl.version.clone().unwrap_or_default()), + qualifiers: Default::default(), + } + .to_string() + }); + + let vulnerabilities: Vec = purl_details + .advisories + .iter() + .flat_map(|advisory| &advisory.status) + .unique_by(|status| &status.vulnerability.identifier) + .map(|status| VulnerabilityStatus { + id: status.vulnerability.identifier.clone(), + status: Some(status.into()), + justification: None, + remediations: status.remediations.clone(), + }) + .collect(); + + recommendations.insert( + winner.purl_string.clone(), + vec![RecommendEntry { + package: package_str, + vulnerabilities, + }], + ); + } } Ok(recommendations) From e3a205e62f13154546c7cad730dc2c2fed32612e Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 25 Mar 2026 10:22:23 +0100 Subject: [PATCH 02/23] refactor(purl): address review feedback on recommend_purls - Replace fragile String-with-delimiter composite key with a typed PurlKey struct (Eq + Hash) for type-safe HashMap lookups - Store PurlKey in InputPurl to avoid recomputing it at each phase - Fix fallback package_str to preserve Option instead of unwrap_or_default which would emit an empty version segment - Remove unused Winner::base field and #[allow(dead_code)] - Trim noisy phase comments Implements TC-3886 Assisted-by: Claude Code (cherry picked from commit 1bb68ed331cfb2e837bb5d7a001c646af223e204) --- modules/fundamental/src/purl/service/mod.rs | 78 ++++++++------------- 1 file changed, 30 insertions(+), 48 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 054f3c819..037ca47aa 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use crate::{ Error, @@ -13,11 +13,8 @@ use crate::{ }; use regex::Regex; use sea_orm::{ - ColumnTrait, ConnectionTrait, DbBackend, EntityTrait, FromQueryResult, QueryFilter, QueryOrder, - QuerySelect, QueryTrait, RelationTrait, Statement, prelude::Uuid, -}; -use sea_query::{ - Alias, ColumnType, Condition, Expr, JoinType, Order, PgFunc, PostgresQueryBuilder, + ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, QueryFilter, QueryOrder, + QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, }; use tracing::instrument; use trustify_common::{ @@ -439,12 +436,16 @@ impl PurlService { #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); - // Phase 1: Parse all input PURLs and build lookup keys - struct InputPurl { - purl_string: String, + #[derive(Clone, PartialEq, Eq, Hash)] + struct PurlKey { ty: String, namespace: Option, name: String, + } + + struct InputPurl { + purl_string: String, + key: PurlKey, input_version: semver::Version, } @@ -464,9 +465,11 @@ impl PurlService { input_purls.push(InputPurl { purl_string: purl.to_string(), - ty: purl.ty.clone(), - namespace: purl.namespace.clone(), - name: purl.name.clone(), + key: PurlKey { + ty: purl.ty.clone(), + namespace: purl.namespace.clone(), + name: purl.name.clone(), + }, input_version, }); } @@ -475,23 +478,17 @@ impl PurlService { return Ok(recommendations); } - // Phase 2: Bulk query — find all base_purls matching any input PURL's (type, namespace, name) + // Bulk query: find all base_purls matching any input PURL's (type, namespace, name) let mut base_condition = Condition::any(); - let mut seen_keys: std::collections::HashSet = std::collections::HashSet::new(); + let mut seen_keys: HashSet = HashSet::new(); for ip in &input_purls { - let key = format!( - "{}|{}|{}", - ip.ty, - ip.namespace.as_deref().unwrap_or(""), - ip.name - ); - if !seen_keys.insert(key) { + if !seen_keys.insert(ip.key.clone()) { continue; } let mut cond = Condition::all() - .add(base_purl::Column::Type.eq(&ip.ty)) - .add(base_purl::Column::Name.eq(&ip.name)); - if let Some(ns) = &ip.namespace { + .add(base_purl::Column::Type.eq(&ip.key.ty)) + .add(base_purl::Column::Name.eq(&ip.key.name)); + if let Some(ns) = &ip.key.namespace { cond = cond.add(base_purl::Column::Namespace.eq(ns)); } else { cond = cond.add(base_purl::Column::Namespace.is_null()); @@ -511,28 +508,25 @@ impl PurlService { return Ok(recommendations); } - // Build base_purl lookup: key -> base_purl model - let base_purl_map: HashMap = base_purls + let base_purl_map: HashMap = base_purls .iter() .map(|bp| { - let key = format!( - "{}|{}|{}", - bp.r#type, - bp.namespace.as_deref().unwrap_or(""), - bp.name - ); + let key = PurlKey { + ty: bp.r#type.clone(), + namespace: bp.namespace.clone(), + name: bp.name.clone(), + }; (key, bp) }) .collect(); - // Phase 3: Bulk query — get all versioned_purls for matching base_purls + // Bulk query: get all versioned_purls for matching base_purls let base_purl_ids: Vec = base_purls.iter().map(|bp| bp.id).collect(); let all_versioned: Vec = versioned_purl::Entity::find() .filter(versioned_purl::Column::BasePurlId.is_in(base_purl_ids)) .all(connection) .await?; - // Group versioned_purls by base_purl_id let mut versioned_by_base: HashMap> = HashMap::new(); for vp in &all_versioned { versioned_by_base @@ -541,23 +535,14 @@ impl PurlService { .push(vp); } - // Phase 4: For each input PURL, find the highest redhat patch version struct Winner<'a> { purl_string: String, versioned_purl: &'a versioned_purl::Model, - #[allow(dead_code)] - base: &'a base_purl::Model, } let mut winners: Vec = Vec::new(); for ip in &input_purls { - let key = format!( - "{}|{}|{}", - ip.ty, - ip.namespace.as_deref().unwrap_or(""), - ip.name - ); - let Some(base) = base_purl_map.get(&key) else { + let Some(base) = base_purl_map.get(&ip.key) else { recommendations.insert(ip.purl_string.clone(), Vec::new()); continue; }; @@ -587,7 +572,6 @@ impl PurlService { winners.push(Winner { purl_string: ip.purl_string.clone(), versioned_purl: winner_vp, - base, }); } else { recommendations.insert(ip.purl_string.clone(), Vec::new()); @@ -598,8 +582,6 @@ impl PurlService { return Ok(recommendations); } - // Phase 5: Fetch vulnerability details for each winner - // Use versioned_purl_by_uuid to get full details including qualified_purl with qualifiers for winner in &winners { if let Some(purl_details) = self .versioned_purl_by_uuid(&winner.versioned_purl.id, connection) @@ -614,7 +596,7 @@ impl PurlService { ty: purl_details.head.purl.ty.clone(), namespace: purl_details.head.purl.namespace.clone(), name: purl_details.head.purl.name.clone(), - version: Some(purl_details.head.purl.version.clone().unwrap_or_default()), + version: purl_details.head.purl.version.clone(), qualifiers: Default::default(), } .to_string() From 83aa9877bfeeccbff7ea7e09f8818b45a8fedfe8 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 25 Mar 2026 13:40:52 +0100 Subject: [PATCH 03/23] test(purl): add coverage for recommend_purls edge cases Add tests for uncovered branches: - unknown PURL not in database (empty base_purls path) - PURL without namespace (null namespace condition) - invalid version string (unparseable semver, early return) - mixed input combining valid, unknown, and no-version PURLs Implements TC-3886 Assisted-by: Claude Code (cherry picked from commit 72ab045d69009676cf8ee3bcfc9b905486ac1422) --- .../fundamental/src/purl/endpoints/test.rs | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index 69604bed2..7235a6235 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -379,3 +379,222 @@ async fn get_recommendations_no_version(ctx: &TrustifyContext) -> Result<(), any Ok(()) } + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_dedup(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, + &ctx.db, + ) + .await?; + + ctx.ingest_documents([ + "osv/RUSTSEC-2021-0079.json", + "osv/RUSTSEC-2021-0079-DUPLICATE.json", + ]) + .await?; + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:cargo/hyper@0.14.1"]})) + .to_request(), + ) + .await; + + log::info!("{recommendations:#?}"); + + let entry = + &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; + assert_eq!(entry["vulnerabilities"].as_array().unwrap().len(), 1); + assert_eq!( + entry["vulnerabilities"].as_array().unwrap()[0]["id"] + .as_str() + .unwrap(), + "CVE-2021-32714" + ); + + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set}; + use trustify_entity::{purl_status, status}; + + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, + &ctx.db, + ) + .await?; + + ctx.ingest_documents(["osv/RUSTSEC-2021-0079.json"]).await?; + + let custom_status_id = Uuid::new_v4(); + let custom_status = status::ActiveModel { + id: Set(custom_status_id), + slug: Set("custom_status".to_string()), + name: Set("Custom Status".to_string()), + description: Set(Some("A custom status for testing".to_string())), + }; + status::Entity::insert(custom_status).exec(&ctx.db).await?; + + let purl_statuses = purl_status::Entity::find() + .filter(purl_status::Column::VulnerabilityId.eq("CVE-2021-32714")) + .all(&ctx.db) + .await?; + + assert!(!purl_statuses.is_empty()); + + for ps in purl_statuses { + let mut active: purl_status::ActiveModel = ps.into(); + active.status_id = Set(custom_status_id); + active.update(&ctx.db).await?; + } + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:cargo/hyper@0.14.1"]})) + .to_request(), + ) + .await; + + log::info!("{recommendations:#?}"); + + let entry = + &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; + let vulns = entry["vulnerabilities"].as_array().unwrap(); + let vuln = vulns + .iter() + .find(|v| v["id"].as_str().unwrap() == "CVE-2021-32714") + .unwrap(); + + assert_eq!(vuln["status"], "custom_status"); + + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_unknown_purl(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:maven/com.example/nonexistent@1.0.0"]})) + .to_request(), + ) + .await; + + let recs = recommendations["recommendations"].as_object().unwrap(); + assert_eq!(recs.len(), 1); + assert_eq!( + recs["pkg:maven/com.example/nonexistent@1.0.0"] + .as_array() + .unwrap() + .len(), + 0 + ); + + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/serde@1.0.0-redhat-00001")?, + &ctx.db, + ) + .await?; + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:cargo/serde@1.0.0"]})) + .to_request(), + ) + .await; + + let recs = recommendations["recommendations"].as_object().unwrap(); + assert_eq!(recs.len(), 1); + assert_eq!(recs["pkg:cargo/serde@1.0.0"].as_array().unwrap().len(), 1); + + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_invalid_version(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:maven/jakarta.el/jakarta.el-api@not-a-version"]})) + .to_request(), + ) + .await; + + let recs = recommendations["recommendations"].as_object().unwrap(); + assert_eq!(recs.len(), 0); + + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": [ + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + "pkg:maven/com.example/nonexistent@1.0.0", + "pkg:maven/jakarta.el/jakarta.el-api" + ]})) + .to_request(), + ) + .await; + + let recs = recommendations["recommendations"].as_object().unwrap(); + // Known PURL with version + unknown PURL = 2 entries (no-version PURL is skipped) + assert_eq!(recs.len(), 2); + assert_eq!( + recs["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"] + .as_array() + .unwrap() + .len(), + 1 + ); + assert_eq!( + recs["pkg:maven/com.example/nonexistent@1.0.0"] + .as_array() + .unwrap() + .len(), + 0 + ); + + Ok(()) +} From 7b21105e5d9bef7fbbb94e364017db167b2cd9c7 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 25 Mar 2026 14:08:05 +0100 Subject: [PATCH 04/23] refactor(purl): extract helper methods from recommend_purls Split the 200-line recommend_purls into focused private methods following project conventions (vulnerability, sbom services): - PurlKey: module-level struct with from_purl, from_base_purl, as_condition constructors - InputPurl::try_from_purl: parse and validate input PURLs - fetch_base_purls: bulk base_purl query with deduplication - fetch_versioned_purls_by_base: bulk versioned_purl fetch grouped by base_purl_id - find_highest_redhat_patch: version filtering and comparison - build_recommend_entry: vulnerability detail assembly recommend_purls is now ~50 lines of orchestration. Implements TC-3886 Assisted-by: Claude Code (cherry picked from commit 3c40c375174aecebf3bc5c2cca503ea925c4be10) --- modules/fundamental/src/purl/service/mod.rs | 347 +++++++++++--------- 1 file changed, 186 insertions(+), 161 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 037ca47aa..338330b4b 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -32,6 +32,69 @@ use trustify_entity::{ }; use trustify_module_ingestor::common::Deprecation; +#[derive(Clone, PartialEq, Eq, Hash)] +struct PurlKey { + ty: String, + namespace: Option, + name: String, +} + +impl PurlKey { + fn from_purl(purl: &Purl) -> Self { + Self { + ty: purl.ty.clone(), + namespace: purl.namespace.clone(), + name: purl.name.clone(), + } + } + + fn from_base_purl(bp: &base_purl::Model) -> Self { + Self { + ty: bp.r#type.clone(), + namespace: bp.namespace.clone(), + name: bp.name.clone(), + } + } + + fn as_condition(&self) -> Condition { + let mut cond = Condition::all() + .add(base_purl::Column::Type.eq(&self.ty)) + .add(base_purl::Column::Name.eq(&self.name)); + if let Some(ns) = &self.namespace { + cond = cond.add(base_purl::Column::Namespace.eq(ns)); + } else { + cond = cond.add(base_purl::Column::Namespace.is_null()); + } + cond + } +} + +struct InputPurl { + purl_string: String, + key: PurlKey, + input_version: semver::Version, +} + +impl InputPurl { + fn try_from_purl(purl: &Purl) -> Option { + let input_version_str = purl.version.as_ref()?; + let input_version = lenient_semver::parse(input_version_str) + .inspect_err(|_| { + log::debug!( + "input purl {} version {:?} failed to parse", + purl, + input_version_str + ); + }) + .ok()?; + Some(Self { + purl_string: purl.to_string(), + key: PurlKey::from_purl(purl), + input_version, + }) + } +} + #[derive(Default)] pub struct PurlService {} @@ -433,74 +496,13 @@ impl PurlService { ) -> Result>, Error> { let mut recommendations = HashMap::new(); - #[allow(clippy::unwrap_used)] - let pattern = Regex::new("redhat-[0-9]+$").unwrap(); - - #[derive(Clone, PartialEq, Eq, Hash)] - struct PurlKey { - ty: String, - namespace: Option, - name: String, - } - - struct InputPurl { - purl_string: String, - key: PurlKey, - input_version: semver::Version, - } - - let mut input_purls: Vec = Vec::new(); - for purl in purls { - let Some(ref input_version_str) = purl.version else { - continue; - }; - let Ok(input_version) = lenient_semver::parse(input_version_str) else { - log::debug!( - "input purl {} version {:?} failed to parse", - purl, - input_version_str - ); - continue; - }; - - input_purls.push(InputPurl { - purl_string: purl.to_string(), - key: PurlKey { - ty: purl.ty.clone(), - namespace: purl.namespace.clone(), - name: purl.name.clone(), - }, - input_version, - }); - } - + let input_purls: Vec = + purls.iter().filter_map(InputPurl::try_from_purl).collect(); if input_purls.is_empty() { return Ok(recommendations); } - // Bulk query: find all base_purls matching any input PURL's (type, namespace, name) - let mut base_condition = Condition::any(); - let mut seen_keys: HashSet = HashSet::new(); - for ip in &input_purls { - if !seen_keys.insert(ip.key.clone()) { - continue; - } - let mut cond = Condition::all() - .add(base_purl::Column::Type.eq(&ip.key.ty)) - .add(base_purl::Column::Name.eq(&ip.key.name)); - if let Some(ns) = &ip.key.namespace { - cond = cond.add(base_purl::Column::Namespace.eq(ns)); - } else { - cond = cond.add(base_purl::Column::Namespace.is_null()); - } - base_condition = base_condition.add(cond); - } - - let base_purls: Vec = base_purl::Entity::find() - .filter(base_condition) - .all(connection) - .await?; - + let base_purls = Self::fetch_base_purls(&input_purls, connection).await?; if base_purls.is_empty() { for ip in &input_purls { recommendations.insert(ip.purl_string.clone(), Vec::new()); @@ -510,36 +512,14 @@ impl PurlService { let base_purl_map: HashMap = base_purls .iter() - .map(|bp| { - let key = PurlKey { - ty: bp.r#type.clone(), - namespace: bp.namespace.clone(), - name: bp.name.clone(), - }; - (key, bp) - }) + .map(|bp| (PurlKey::from_base_purl(bp), bp)) .collect(); - // Bulk query: get all versioned_purls for matching base_purls - let base_purl_ids: Vec = base_purls.iter().map(|bp| bp.id).collect(); - let all_versioned: Vec = versioned_purl::Entity::find() - .filter(versioned_purl::Column::BasePurlId.is_in(base_purl_ids)) - .all(connection) - .await?; - - let mut versioned_by_base: HashMap> = HashMap::new(); - for vp in &all_versioned { - versioned_by_base - .entry(vp.base_purl_id) - .or_default() - .push(vp); - } + let versioned_by_base = + Self::fetch_versioned_purls_by_base(&base_purls, connection).await?; - struct Winner<'a> { - purl_string: String, - versioned_purl: &'a versioned_purl::Model, - } - let mut winners: Vec = Vec::new(); + #[allow(clippy::unwrap_used)] + let pattern = Regex::new("redhat-[0-9]+$").unwrap(); for ip in &input_purls { let Some(base) = base_purl_map.get(&ip.key) else { @@ -547,85 +527,130 @@ impl PurlService { continue; }; - let highest_patch = versioned_by_base - .get(&base.id) - .into_iter() - .flatten() - .filter(|vp| pattern.is_match(&vp.version)) - .filter_map(|vp| { - lenient_semver::parse(&vp.version) - .inspect_err(|_| { - log::debug!("purl version {:?} failed to parse", vp.version); - }) - .ok() - .map(|v| (*vp, v)) - }) - .filter(|(_, version)| { - version.major == ip.input_version.major - && version.minor == ip.input_version.minor - && version.patch == ip.input_version.patch - }) - .max_by(|(_, a), (_, b)| a.pre.cmp(&b.pre)) - .map(|(vp, _)| vp); - - if let Some(winner_vp) = highest_patch { - winners.push(Winner { - purl_string: ip.purl_string.clone(), - versioned_purl: winner_vp, - }); + let highest = Self::find_highest_redhat_patch( + &pattern, + &ip.input_version, + versioned_by_base.get(&base.id), + ); + + if let Some(winner_vp) = highest { + let entry = self.build_recommend_entry(winner_vp, connection).await?; + recommendations.insert(ip.purl_string.clone(), vec![entry]); } else { recommendations.insert(ip.purl_string.clone(), Vec::new()); } } - if winners.is_empty() { - return Ok(recommendations); - } + Ok(recommendations) + } - for winner in &winners { - if let Some(purl_details) = self - .versioned_purl_by_uuid(&winner.versioned_purl.id, connection) - .await? - { - let package_str = purl_details - .purls - .first() - .map(|p| p.purl.to_string()) - .unwrap_or_else(|| { - Purl { - ty: purl_details.head.purl.ty.clone(), - namespace: purl_details.head.purl.namespace.clone(), - name: purl_details.head.purl.name.clone(), - version: purl_details.head.purl.version.clone(), - qualifiers: Default::default(), - } - .to_string() - }); - - let vulnerabilities: Vec = purl_details - .advisories - .iter() - .flat_map(|advisory| &advisory.status) - .unique_by(|status| &status.vulnerability.identifier) - .map(|status| VulnerabilityStatus { - id: status.vulnerability.identifier.clone(), - status: Some(status.into()), - justification: None, - remediations: status.remediations.clone(), - }) - .collect(); - - recommendations.insert( - winner.purl_string.clone(), - vec![RecommendEntry { - package: package_str, - vulnerabilities, - }], - ); + async fn fetch_base_purls( + input_purls: &[InputPurl], + connection: &C, + ) -> Result, Error> { + let mut condition = Condition::any(); + let mut seen_keys: HashSet = HashSet::new(); + for ip in input_purls { + if seen_keys.insert(ip.key.clone()) { + condition = condition.add(ip.key.as_condition()); } } + Ok(base_purl::Entity::find() + .filter(condition) + .all(connection) + .await?) + } - Ok(recommendations) + async fn fetch_versioned_purls_by_base( + base_purls: &[base_purl::Model], + connection: &C, + ) -> Result>, Error> { + let base_purl_ids: Vec = base_purls.iter().map(|bp| bp.id).collect(); + let all_versioned: Vec = versioned_purl::Entity::find() + .filter(versioned_purl::Column::BasePurlId.is_in(base_purl_ids)) + .all(connection) + .await?; + + let mut by_base: HashMap> = HashMap::new(); + for vp in all_versioned { + by_base.entry(vp.base_purl_id).or_default().push(vp); + } + Ok(by_base) + } + + fn find_highest_redhat_patch<'a>( + pattern: &Regex, + input_version: &semver::Version, + versioned_purls: Option<&'a Vec>, + ) -> Option<&'a versioned_purl::Model> { + versioned_purls? + .iter() + .filter(|vp| pattern.is_match(&vp.version)) + .filter_map(|vp| { + lenient_semver::parse(&vp.version) + .inspect_err(|_| { + log::debug!("purl version {:?} failed to parse", vp.version); + }) + .ok() + .map(|v| (vp, v)) + }) + .filter(|(_, version)| { + version.major == input_version.major + && version.minor == input_version.minor + && version.patch == input_version.patch + }) + .max_by(|(_, a), (_, b)| a.pre.cmp(&b.pre)) + .map(|(vp, _)| vp) + } + + async fn build_recommend_entry( + &self, + versioned_purl: &versioned_purl::Model, + connection: &C, + ) -> Result { + let purl_details = self + .versioned_purl_by_uuid(&versioned_purl.id, connection) + .await?; + + let Some(purl_details) = purl_details else { + return Ok(RecommendEntry { + package: String::new(), + vulnerabilities: Vec::new(), + }); + }; + + let package = purl_details + .purls + .first() + .map(|p| p.purl.to_string()) + .unwrap_or_else(|| { + Purl { + ty: purl_details.head.purl.ty.clone(), + namespace: purl_details.head.purl.namespace.clone(), + name: purl_details.head.purl.name.clone(), + version: purl_details.head.purl.version.clone(), + qualifiers: Default::default(), + } + .to_string() + }); + + let vulnerabilities = purl_details + .advisories + .iter() + .flat_map(|advisory| &advisory.status) + .unique_by(|status| &status.vulnerability.identifier) + .map(|status| VulnerabilityStatus { + id: status.vulnerability.identifier.clone(), + status: Some(status.into()), + justification: None, + remediations: status.remediations.clone(), + }) + .collect(); + + Ok(RecommendEntry { + package, + vulnerabilities, + }) } } From 56d2fe64027b2182216413c191844377000699d8 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Thu, 26 Mar 2026 14:45:59 +0100 Subject: [PATCH 05/23] refactor(purl): simplify InputPurl to hold Purl directly Store a Purl in InputPurl instead of separate purl_string and PurlKey fields, reducing the number of concepts to track when reading the code. PurlKey is now derived on-the-fly where needed for map lookups. Addresses review feedback from @Strum355. Co-Authored-By: Claude Opus 4.6 (cherry picked from commit eee5c1019adc3973c9008d37ec11a1ddc00d59bc) --- modules/fundamental/src/purl/service/mod.rs | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 338330b4b..67b9e0f9f 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -70,8 +70,7 @@ impl PurlKey { } struct InputPurl { - purl_string: String, - key: PurlKey, + purl: Purl, input_version: semver::Version, } @@ -88,8 +87,7 @@ impl InputPurl { }) .ok()?; Some(Self { - purl_string: purl.to_string(), - key: PurlKey::from_purl(purl), + purl: purl.clone(), input_version, }) } @@ -505,7 +503,7 @@ impl PurlService { let base_purls = Self::fetch_base_purls(&input_purls, connection).await?; if base_purls.is_empty() { for ip in &input_purls { - recommendations.insert(ip.purl_string.clone(), Vec::new()); + recommendations.insert(ip.purl.to_string(), Vec::new()); } return Ok(recommendations); } @@ -522,8 +520,9 @@ impl PurlService { let pattern = Regex::new("redhat-[0-9]+$").unwrap(); for ip in &input_purls { - let Some(base) = base_purl_map.get(&ip.key) else { - recommendations.insert(ip.purl_string.clone(), Vec::new()); + let key = PurlKey::from_purl(&ip.purl); + let Some(base) = base_purl_map.get(&key) else { + recommendations.insert(ip.purl.to_string(), Vec::new()); continue; }; @@ -535,9 +534,9 @@ impl PurlService { if let Some(winner_vp) = highest { let entry = self.build_recommend_entry(winner_vp, connection).await?; - recommendations.insert(ip.purl_string.clone(), vec![entry]); + recommendations.insert(ip.purl.to_string(), vec![entry]); } else { - recommendations.insert(ip.purl_string.clone(), Vec::new()); + recommendations.insert(ip.purl.to_string(), Vec::new()); } } @@ -551,8 +550,9 @@ impl PurlService { let mut condition = Condition::any(); let mut seen_keys: HashSet = HashSet::new(); for ip in input_purls { - if seen_keys.insert(ip.key.clone()) { - condition = condition.add(ip.key.as_condition()); + let key = PurlKey::from_purl(&ip.purl); + if seen_keys.insert(key.clone()) { + condition = condition.add(key.as_condition()); } } Ok(base_purl::Entity::find() From c6cbb948b925bf2a2434028c4fee5dd0b6a7c1cf Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Tue, 31 Mar 2026 16:23:14 +0200 Subject: [PATCH 06/23] perf(purl): batch purl_status and related entity loading in recommend_purls Replace per-winner VersionedPurlDetails::from_entity calls with a single batched purl_status query using IN clause + version_matches() filter, and SeaORM LoaderTrait for related entities (vulnerability, advisory, status, remediation). This eliminates ~548 individual purl_status queries per 100-PURL request, reducing them to 1-3 batch queries. Cumulative with the bulk PURL lookup (TC-3886), this yields a 5.1x total improvement (1.195s to 0.232s for 100 PURLs) and reduces peak RSS from 106 MB to 71 MB. Implements TC-3887 Assisted-by: Claude Code (cherry picked from commit 4a34e18c03d30f511069e554cf77332be24f3e0f) --- modules/fundamental/src/purl/service/mod.rs | 230 +++++++++++++++----- 1 file changed, 175 insertions(+), 55 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 67b9e0f9f..b352e15d3 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -13,9 +13,10 @@ use crate::{ }; use regex::Regex; use sea_orm::{ - ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, QueryFilter, QueryOrder, - QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, + ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, LoaderTrait, QueryFilter, + QueryOrder, QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, }; +use sea_query::{Asterisk, ColumnType, Expr, Func, JoinType, Order, SimpleExpr, UnionType}; use tracing::instrument; use trustify_common::{ db::{ @@ -26,9 +27,11 @@ use trustify_common::{ purl::{Purl, PurlErr}, }; use trustify_entity::{ - base_purl, license, + advisory, base_purl, license, organization, purl_status, qualified_purl::{self, CanonicalPurl}, - sbom_package, sbom_package_license, sbom_package_purl_ref, versioned_purl, + remediation, remediation_purl_status, sbom_license_expanded, sbom_node, + sbom_node_purl_ref, sbom_package_license, status, version_range, versioned_purl, + vulnerability, }; use trustify_module_ingestor::common::Deprecation; @@ -519,6 +522,13 @@ impl PurlService { #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); + struct Winner<'a> { + purl_string: String, + versioned_purl: &'a versioned_purl::Model, + base: &'a base_purl::Model, + } + let mut winners: Vec = Vec::new(); + for ip in &input_purls { let key = PurlKey::from_purl(&ip.purl); let Some(base) = base_purl_map.get(&key) else { @@ -533,13 +543,172 @@ impl PurlService { ); if let Some(winner_vp) = highest { - let entry = self.build_recommend_entry(winner_vp, connection).await?; - recommendations.insert(ip.purl.to_string(), vec![entry]); + winners.push(Winner { + purl_string: ip.purl.to_string(), + versioned_purl: winner_vp, + base, + }); } else { recommendations.insert(ip.purl.to_string(), Vec::new()); } } + if winners.is_empty() { + return Ok(recommendations); + } + + // Batch fetch purl_status for ALL winners in a single query + let winner_base_ids: Vec = winners.iter().map(|w| w.base.id).unique().collect(); + + let all_statuses: Vec = purl_status::Entity::find() + .columns([ + version_range::Column::Id, + version_range::Column::LowVersion, + version_range::Column::LowInclusive, + version_range::Column::HighVersion, + version_range::Column::HighInclusive, + ]) + .left_join(base_purl::Entity) + .join( + JoinType::LeftJoin, + base_purl::Relation::VersionedPurls.def(), + ) + .left_join(version_range::Entity) + .filter(purl_status::Column::BasePurlId.is_in(winner_base_ids)) + .filter(SimpleExpr::FunctionCall( + Func::cust(trustify_common::db::VersionMatches) + .arg(Expr::col(versioned_purl::Column::Version)) + .arg(Expr::col((version_range::Entity, Asterisk))), + )) + .all(connection) + .await?; + + // Batch load all related entities + let vulns = all_statuses + .load_one(vulnerability::Entity, connection) + .await?; + let advisories_loaded = all_statuses + .load_one(advisory::Entity, connection) + .await?; + let advisory_models: Vec = advisories_loaded + .iter() + .filter_map(|opt| opt.as_ref().cloned()) + .collect(); + let _organizations = advisory_models + .load_one(organization::Entity, connection) + .await?; + let status_models = all_statuses + .load_one(status::Entity, connection) + .await?; + let status_slug_map: HashMap = all_statuses + .iter() + .zip(status_models.iter()) + .map(|(ps, st)| { + ( + ps.status_id, + st.as_ref() + .map(|s| s.slug.clone()) + .unwrap_or_else(|| "unknown".to_string()), + ) + }) + .collect(); + let remediations = all_statuses + .load_many_to_many( + remediation::Entity, + remediation_purl_status::Entity, + connection, + ) + .await?; + + // Group status info by base_purl_id + struct StatusInfo { + vuln_id: String, + status_slug: String, + remediations: Vec, + } + + let mut statuses_by_base: HashMap> = HashMap::new(); + for (i, ps) in all_statuses.iter().enumerate() { + if let (Some(v), Some(_a)) = (&vulns[i], &advisories_loaded[i]) { + let slug = status_slug_map + .get(&ps.status_id) + .cloned() + .unwrap_or_else(|| "unknown".to_string()); + statuses_by_base + .entry(ps.base_purl_id) + .or_default() + .push(StatusInfo { + vuln_id: v.id.clone(), + status_slug: slug, + remediations: remediations[i].clone(), + }); + } + } + + // Batch fetch qualified PURLs for all winners to get full PURL strings with qualifiers + let winner_vp_ids: Vec = winners.iter().map(|w| w.versioned_purl.id).collect(); + let qualified_purls: Vec = qualified_purl::Entity::find() + .filter(qualified_purl::Column::VersionedPurlId.is_in(winner_vp_ids)) + .all(connection) + .await?; + let mut qualified_by_vp: HashMap = HashMap::new(); + for qp in &qualified_purls { + qualified_by_vp.entry(qp.versioned_purl_id).or_insert(qp); + } + + // Assemble recommendations from batched data + for winner in &winners { + let package_str = qualified_by_vp + .get(&winner.versioned_purl.id) + .map(|qp| Purl::from(qp.purl.clone()).to_string()) + .unwrap_or_else(|| { + Purl { + ty: winner.base.r#type.clone(), + namespace: winner.base.namespace.clone(), + name: winner.base.name.clone(), + version: Some(winner.versioned_purl.version.clone()), + qualifiers: Default::default(), + } + .to_string() + }); + + let mut seen_vuln_ids = HashSet::new(); + let vulnerabilities: Vec = statuses_by_base + .get(&winner.base.id) + .into_iter() + .flatten() + .filter(|info| seen_vuln_ids.insert(info.vuln_id.clone())) + .map(|info| { + use crate::purl::model::VexStatus; + let vex_status = match info.status_slug.as_str() { + "affected" => VexStatus::Affected, + "fixed" => VexStatus::Fixed, + "not_affected" => VexStatus::NotAffected, + "under_investigation" => VexStatus::UnderInvestigation, + "recommended" => VexStatus::Recommended, + other => VexStatus::Other(other.to_string()), + }; + VulnerabilityStatus { + id: info.vuln_id.clone(), + status: Some(vex_status), + justification: None, + remediations: + crate::purl::model::summary::remediation::RemediationSummary::from_entities( + &info.remediations, + ), + } + }) + .collect(); + + recommendations.insert( + winner.purl_string.clone(), + vec![RecommendEntry { + package: package_str, + vulnerabilities, + }], + ); + } + Ok(recommendations) } @@ -603,55 +772,6 @@ impl PurlService { .map(|(vp, _)| vp) } - async fn build_recommend_entry( - &self, - versioned_purl: &versioned_purl::Model, - connection: &C, - ) -> Result { - let purl_details = self - .versioned_purl_by_uuid(&versioned_purl.id, connection) - .await?; - - let Some(purl_details) = purl_details else { - return Ok(RecommendEntry { - package: String::new(), - vulnerabilities: Vec::new(), - }); - }; - - let package = purl_details - .purls - .first() - .map(|p| p.purl.to_string()) - .unwrap_or_else(|| { - Purl { - ty: purl_details.head.purl.ty.clone(), - namespace: purl_details.head.purl.namespace.clone(), - name: purl_details.head.purl.name.clone(), - version: purl_details.head.purl.version.clone(), - qualifiers: Default::default(), - } - .to_string() - }); - - let vulnerabilities = purl_details - .advisories - .iter() - .flat_map(|advisory| &advisory.status) - .unique_by(|status| &status.vulnerability.identifier) - .map(|status| VulnerabilityStatus { - id: status.vulnerability.identifier.clone(), - status: Some(status.into()), - justification: None, - remediations: status.remediations.clone(), - }) - .collect(); - - Ok(RecommendEntry { - package, - vulnerabilities, - }) - } } #[cfg(test)] From bf8e103fefbb5137412a97b70019244c324dcfa4 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Tue, 31 Mar 2026 17:26:38 +0200 Subject: [PATCH 07/23] refactor(purl): extract helper methods from recommend_purls for clarity Split the large recommend_purls method into focused helper functions: - fetch_vulnerability_statuses: batched purl_status query with LoaderTrait - fetch_qualified_purl_map: batch qualified PURL lookup - assemble_recommend_entry: builds a single RecommendEntry from batched data - Move StatusInfo and Winner structs to module level Also fixes version_matches filter to only check winning versioned_purls and enhances test assertions per review feedback. Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 6df8381cab183e55419b170047e7772b9a54f532) --- .../fundamental/src/purl/endpoints/test.rs | 11 +- modules/fundamental/src/purl/service/mod.rs | 186 ++++++++++-------- 2 files changed, 112 insertions(+), 85 deletions(-) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index 7235a6235..cb1f8c086 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -533,7 +533,16 @@ async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), a let recs = recommendations["recommendations"].as_object().unwrap(); assert_eq!(recs.len(), 1); - assert_eq!(recs["pkg:cargo/serde@1.0.0"].as_array().unwrap().len(), 1); + + let rec_list = recs["pkg:cargo/serde@1.0.0"] + .as_array() + .expect("recommendations for input purl must be an array"); + assert_eq!(rec_list.len(), 1); + + let recommended_purl = rec_list[0]["package"] + .as_str() + .expect("recommendation must contain a 'package' field"); + assert_eq!(recommended_purl, "pkg:cargo/serde@1.0.0-redhat-00001"); Ok(()) } diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index b352e15d3..e7c4df44b 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -4,17 +4,20 @@ use crate::{ Error, common::license_filtering::{LICENSE, build_license_filtering_with_clause}, purl::model::{ - RecommendEntry, VulnerabilityStatus, + RecommendEntry, VexStatus, VulnerabilityStatus, details::{ base_purl::BasePurlDetails, purl::PurlDetails, versioned_purl::VersionedPurlDetails, }, - summary::{base_purl::BasePurlSummary, purl::PurlSummary, r#type::TypeSummary}, + summary::{ + base_purl::BasePurlSummary, purl::PurlSummary, remediation::RemediationSummary, + r#type::TypeSummary, + }, }, }; use regex::Regex; use sea_orm::{ - ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, LoaderTrait, QueryFilter, - QueryOrder, QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, + ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, LoaderTrait, + QueryFilter, QueryOrder, QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, }; use sea_query::{Asterisk, ColumnType, Expr, Func, JoinType, Order, SimpleExpr, UnionType}; use tracing::instrument; @@ -42,6 +45,18 @@ struct PurlKey { name: String, } +struct StatusInfo { + vuln_id: String, + status_slug: String, + remediations: Vec, +} + +struct Winner<'a> { + purl_string: String, + versioned_purl: &'a versioned_purl::Model, + base: &'a base_purl::Model, +} + impl PurlKey { fn from_purl(purl: &Purl) -> Self { Self { @@ -522,11 +537,6 @@ impl PurlService { #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); - struct Winner<'a> { - purl_string: String, - versioned_purl: &'a versioned_purl::Model, - base: &'a base_purl::Model, - } let mut winners: Vec = Vec::new(); for ip in &input_purls { @@ -557,9 +567,29 @@ impl PurlService { return Ok(recommendations); } - // Batch fetch purl_status for ALL winners in a single query + // Batch fetch vulnerability statuses and qualified PURLs for all winners let winner_base_ids: Vec = winners.iter().map(|w| w.base.id).unique().collect(); + let winner_vp_ids: Vec = winners.iter().map(|w| w.versioned_purl.id).collect(); + + let statuses_by_base = + Self::fetch_vulnerability_statuses(&winner_base_ids, &winner_vp_ids, connection) + .await?; + let qualified_purls = Self::fetch_qualified_purl_map(&winner_vp_ids, connection).await?; + // Assemble recommendations from batched data + for winner in &winners { + let entry = Self::assemble_recommend_entry(winner, &statuses_by_base, &qualified_purls); + recommendations.insert(winner.purl_string.clone(), vec![entry]); + } + + Ok(recommendations) + } + + async fn fetch_vulnerability_statuses( + winner_base_ids: &[Uuid], + winner_vp_ids: &[Uuid], + connection: &C, + ) -> Result>, Error> { let all_statuses: Vec = purl_status::Entity::find() .columns([ version_range::Column::Id, @@ -574,7 +604,8 @@ impl PurlService { base_purl::Relation::VersionedPurls.def(), ) .left_join(version_range::Entity) - .filter(purl_status::Column::BasePurlId.is_in(winner_base_ids)) + .filter(purl_status::Column::BasePurlId.is_in(winner_base_ids.iter().copied())) + .filter(versioned_purl::Column::Id.is_in(winner_vp_ids.iter().copied())) .filter(SimpleExpr::FunctionCall( Func::cust(trustify_common::db::VersionMatches) .arg(Expr::col(versioned_purl::Column::Version)) @@ -583,13 +614,10 @@ impl PurlService { .all(connection) .await?; - // Batch load all related entities let vulns = all_statuses .load_one(vulnerability::Entity, connection) .await?; - let advisories_loaded = all_statuses - .load_one(advisory::Entity, connection) - .await?; + let advisories_loaded = all_statuses.load_one(advisory::Entity, connection).await?; let advisory_models: Vec = advisories_loaded .iter() .filter_map(|opt| opt.as_ref().cloned()) @@ -597,9 +625,7 @@ impl PurlService { let _organizations = advisory_models .load_one(organization::Entity, connection) .await?; - let status_models = all_statuses - .load_one(status::Entity, connection) - .await?; + let status_models = all_statuses.load_one(status::Entity, connection).await?; let status_slug_map: HashMap = all_statuses .iter() .zip(status_models.iter()) @@ -620,13 +646,6 @@ impl PurlService { ) .await?; - // Group status info by base_purl_id - struct StatusInfo { - vuln_id: String, - status_slug: String, - remediations: Vec, - } - let mut statuses_by_base: HashMap> = HashMap::new(); for (i, ps) in all_statuses.iter().enumerate() { if let (Some(v), Some(_a)) = (&vulns[i], &advisories_loaded[i]) { @@ -645,71 +664,71 @@ impl PurlService { } } - // Batch fetch qualified PURLs for all winners to get full PURL strings with qualifiers - let winner_vp_ids: Vec = winners.iter().map(|w| w.versioned_purl.id).collect(); + Ok(statuses_by_base) + } + + async fn fetch_qualified_purl_map( + winner_vp_ids: &[Uuid], + connection: &C, + ) -> Result, Error> { let qualified_purls: Vec = qualified_purl::Entity::find() - .filter(qualified_purl::Column::VersionedPurlId.is_in(winner_vp_ids)) + .filter(qualified_purl::Column::VersionedPurlId.is_in(winner_vp_ids.iter().copied())) .all(connection) .await?; - let mut qualified_by_vp: HashMap = HashMap::new(); - for qp in &qualified_purls { - qualified_by_vp.entry(qp.versioned_purl_id).or_insert(qp); + let mut by_vp: HashMap = HashMap::new(); + for qp in qualified_purls { + by_vp.entry(qp.versioned_purl_id).or_insert(qp); } + Ok(by_vp) + } - // Assemble recommendations from batched data - for winner in &winners { - let package_str = qualified_by_vp - .get(&winner.versioned_purl.id) - .map(|qp| Purl::from(qp.purl.clone()).to_string()) - .unwrap_or_else(|| { - Purl { - ty: winner.base.r#type.clone(), - namespace: winner.base.namespace.clone(), - name: winner.base.name.clone(), - version: Some(winner.versioned_purl.version.clone()), - qualifiers: Default::default(), - } - .to_string() - }); - - let mut seen_vuln_ids = HashSet::new(); - let vulnerabilities: Vec = statuses_by_base - .get(&winner.base.id) - .into_iter() - .flatten() - .filter(|info| seen_vuln_ids.insert(info.vuln_id.clone())) - .map(|info| { - use crate::purl::model::VexStatus; - let vex_status = match info.status_slug.as_str() { - "affected" => VexStatus::Affected, - "fixed" => VexStatus::Fixed, - "not_affected" => VexStatus::NotAffected, - "under_investigation" => VexStatus::UnderInvestigation, - "recommended" => VexStatus::Recommended, - other => VexStatus::Other(other.to_string()), - }; - VulnerabilityStatus { - id: info.vuln_id.clone(), - status: Some(vex_status), - justification: None, - remediations: - crate::purl::model::summary::remediation::RemediationSummary::from_entities( - &info.remediations, - ), - } - }) - .collect(); + fn assemble_recommend_entry( + winner: &Winner<'_>, + statuses_by_base: &HashMap>, + qualified_by_vp: &HashMap, + ) -> RecommendEntry { + let package_str = qualified_by_vp + .get(&winner.versioned_purl.id) + .map(|qp| Purl::from(qp.purl.clone()).to_string()) + .unwrap_or_else(|| { + Purl { + ty: winner.base.r#type.clone(), + namespace: winner.base.namespace.clone(), + name: winner.base.name.clone(), + version: Some(winner.versioned_purl.version.clone()), + qualifiers: Default::default(), + } + .to_string() + }); + + let mut seen_vuln_ids = HashSet::new(); + let vulnerabilities: Vec = statuses_by_base + .get(&winner.base.id) + .into_iter() + .flatten() + .filter(|info| seen_vuln_ids.insert(info.vuln_id.clone())) + .map(|info| { + let vex_status = match info.status_slug.as_str() { + "affected" => VexStatus::Affected, + "fixed" => VexStatus::Fixed, + "not_affected" => VexStatus::NotAffected, + "under_investigation" => VexStatus::UnderInvestigation, + "recommended" => VexStatus::Recommended, + other => VexStatus::Other(other.to_string()), + }; + VulnerabilityStatus { + id: info.vuln_id.clone(), + status: Some(vex_status), + justification: None, + remediations: RemediationSummary::from_entities(&info.remediations), + } + }) + .collect(); - recommendations.insert( - winner.purl_string.clone(), - vec![RecommendEntry { - package: package_str, - vulnerabilities, - }], - ); + RecommendEntry { + package: package_str, + vulnerabilities, } - - Ok(recommendations) } async fn fetch_base_purls( @@ -771,7 +790,6 @@ impl PurlService { .max_by(|(_, a), (_, b)| a.pre.cmp(&b.pre)) .map(|(vp, _)| vp) } - } #[cfg(test)] From fb5ee562645b0d4344b673e03454ebeda20d940c Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Tue, 31 Mar 2026 21:45:06 +0200 Subject: [PATCH 08/23] test(purl): add coverage for fallback package_str and fixed status - get_recommendations_fallback_package_str: exercises the unwrap_or_else path when no qualified_purl exists (versioned_purl without qualifiers) - get_recommendations_fixed_status: covers the VexStatus::Fixed match arm by overriding status slug to "fixed" Implements TC-3887 Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 9d11dff4748cf02d08e435a3365195c982dffe43) --- .../fundamental/src/purl/endpoints/test.rs | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index cb1f8c086..b51e10f8b 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -607,3 +607,114 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: Ok(()) } + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_fallback_package_str( + ctx: &TrustifyContext, +) -> Result<(), anyhow::Error> { + // Ingest a versioned_purl WITHOUT a qualified_purl to exercise the fallback + // package string construction path + let base = ctx + .graph + .ingest_package(&Purl::from_str("pkg:cargo/tokio")?, &ctx.db) + .await?; + base.ingest_package_version( + &Purl::from_str("pkg:cargo/tokio@1.0.0-redhat-00001")?, + &ctx.db, + ) + .await?; + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:cargo/tokio@1.0.0"]})) + .to_request(), + ) + .await; + + let recs = recommendations["recommendations"].as_object().unwrap(); + assert_eq!(recs.len(), 1); + + let rec_list = recs["pkg:cargo/tokio@1.0.0"].as_array().unwrap(); + assert_eq!(rec_list.len(), 1); + + // Without a qualified_purl, the fallback constructs the package string + // from base_purl fields + versioned_purl version + let package = rec_list[0]["package"].as_str().unwrap(); + assert_eq!(package, "pkg:cargo/tokio@1.0.0-redhat-00001"); + + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_fixed_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set}; + use trustify_entity::{purl_status, status}; + + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, + &ctx.db, + ) + .await?; + + ctx.ingest_documents(["osv/RUSTSEC-2021-0079.json"]).await?; + + // Override the status to "fixed" to exercise that VexStatus match arm + let fixed_status = status::Entity::find() + .filter(status::Column::Slug.eq("fixed")) + .one(&ctx.db) + .await?; + + let status_id = if let Some(s) = fixed_status { + s.id + } else { + let id = Uuid::new_v4(); + let new_status = status::ActiveModel { + id: Set(id), + slug: Set("fixed".to_string()), + name: Set("Fixed".to_string()), + description: Set(Some("Vulnerability has been fixed".to_string())), + }; + status::Entity::insert(new_status).exec(&ctx.db).await?; + id + }; + + let purl_statuses = purl_status::Entity::find() + .filter(purl_status::Column::VulnerabilityId.eq("CVE-2021-32714")) + .all(&ctx.db) + .await?; + + for ps in purl_statuses { + let mut active: purl_status::ActiveModel = ps.into(); + active.status_id = Set(status_id); + active.update(&ctx.db).await?; + } + + let app = caller(ctx).await?; + let recommendations: Value = app + .call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({"purls": ["pkg:cargo/hyper@0.14.1"]})) + .to_request(), + ) + .await; + + let entry = + &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; + let vuln = entry["vulnerabilities"] + .as_array() + .unwrap() + .iter() + .find(|v| v["id"].as_str().unwrap() == "CVE-2021-32714") + .unwrap(); + + assert_eq!(vuln["status"], "Fixed"); + + Ok(()) +} From 527f164ff43c93d640fef3ffaebcab77617a740a Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 1 Apr 2026 11:57:41 +0200 Subject: [PATCH 09/23] test(purl): use equality assertions in recommend endpoint tests Replace length-only assertions with full value equality assertions in get_recommendations_unknown_purl, get_recommendations_no_namespace, get_recommendations_invalid_version, and get_recommendations_mixed tests for better debugging when tests fail. Implements TC-3970 Assisted-by: Claude Code (cherry picked from commit 815c6b7cd9db0308fd7583d01b884e8f72fa70d4) --- .../fundamental/src/purl/endpoints/test.rs | 55 +++++++------------ 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index b51e10f8b..a9f34b5bd 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -498,14 +498,9 @@ async fn get_recommendations_unknown_purl(ctx: &TrustifyContext) -> Result<(), a ) .await; - let recs = recommendations["recommendations"].as_object().unwrap(); - assert_eq!(recs.len(), 1); assert_eq!( - recs["pkg:maven/com.example/nonexistent@1.0.0"] - .as_array() - .unwrap() - .len(), - 0 + recommendations["recommendations"], + json!({"pkg:maven/com.example/nonexistent@1.0.0": []}) ); Ok(()) @@ -531,18 +526,15 @@ async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), a ) .await; - let recs = recommendations["recommendations"].as_object().unwrap(); - assert_eq!(recs.len(), 1); - - let rec_list = recs["pkg:cargo/serde@1.0.0"] - .as_array() - .expect("recommendations for input purl must be an array"); - assert_eq!(rec_list.len(), 1); - - let recommended_purl = rec_list[0]["package"] - .as_str() - .expect("recommendation must contain a 'package' field"); - assert_eq!(recommended_purl, "pkg:cargo/serde@1.0.0-redhat-00001"); + assert_eq!( + recommendations["recommendations"], + json!({ + "pkg:cargo/serde@1.0.0": [{ + "package": "pkg:cargo/serde@1.0.0-redhat-00001", + "vulnerabilities": [] + }] + }) + ); Ok(()) } @@ -562,8 +554,7 @@ async fn get_recommendations_invalid_version(ctx: &TrustifyContext) -> Result<() ) .await; - let recs = recommendations["recommendations"].as_object().unwrap(); - assert_eq!(recs.len(), 0); + assert_eq!(recommendations["recommendations"], json!({})); Ok(()) } @@ -587,22 +578,18 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: ) .await; - let recs = recommendations["recommendations"].as_object().unwrap(); - // Known PURL with version + unknown PURL = 2 entries (no-version PURL is skipped) - assert_eq!(recs.len(), 2); + let entry = &recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]; assert_eq!( - recs["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"] - .as_array() - .unwrap() - .len(), - 1 + entry[0]["package"], + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002?repository_url=https://maven.repository.redhat.com/ga/&type=jar" ); assert_eq!( - recs["pkg:maven/com.example/nonexistent@1.0.0"] - .as_array() - .unwrap() - .len(), - 0 + entry[0]["vulnerabilities"], + json!([{"id": "CVE-2022-45787", "status": "NotAffected", "remediations": []}]) + ); + assert_eq!( + recommendations["recommendations"]["pkg:maven/com.example/nonexistent@1.0.0"], + json!([]) ); Ok(()) From 8732d32fafa393767442a1138faf6e7e37a61395 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 1 Apr 2026 12:23:39 +0200 Subject: [PATCH 10/23] docs(purl): add doc comments to recommend_purls structs and helpers Add one-line documentation comments to PurlKey, InputPurl, StatusInfo, Winner structs and all helper methods in the batched recommend_purls pipeline for improved reviewability. Implements TC-3971 Assisted-by: Claude Code (cherry picked from commit eff1b2fdaaf07c569cc6f9de377b2f7f8e4a6a13) --- modules/fundamental/src/purl/service/mod.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index e7c4df44b..b1eeeb82f 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -38,6 +38,7 @@ use trustify_entity::{ }; use trustify_module_ingestor::common::Deprecation; +/// Composite key identifying a base PURL by type, namespace, and name (without version). #[derive(Clone, PartialEq, Eq, Hash)] struct PurlKey { ty: String, @@ -45,12 +46,14 @@ struct PurlKey { name: String, } +/// Vulnerability status record linking a vulnerability ID to its VEX status and remediations. struct StatusInfo { vuln_id: String, status_slug: String, remediations: Vec, } +/// The highest Red Hat patch version selected for a given input PURL, used to build the recommendation. struct Winner<'a> { purl_string: String, versioned_purl: &'a versioned_purl::Model, @@ -87,6 +90,7 @@ impl PurlKey { } } +/// A user-supplied PURL paired with its parsed semver version for version comparison. struct InputPurl { purl: Purl, input_version: semver::Version, @@ -585,6 +589,7 @@ impl PurlService { Ok(recommendations) } + /// Batch-loads vulnerability statuses for the winning versioned PURLs, grouped by base PURL ID. async fn fetch_vulnerability_statuses( winner_base_ids: &[Uuid], winner_vp_ids: &[Uuid], @@ -667,6 +672,7 @@ impl PurlService { Ok(statuses_by_base) } + /// Loads qualified PURLs for winning versioned PURLs to resolve full PURL strings with qualifiers. async fn fetch_qualified_purl_map( winner_vp_ids: &[Uuid], connection: &C, @@ -682,6 +688,7 @@ impl PurlService { Ok(by_vp) } + /// Builds a single recommendation entry from a winner, its vulnerability statuses, and qualified PURL. fn assemble_recommend_entry( winner: &Winner<'_>, statuses_by_base: &HashMap>, @@ -731,6 +738,7 @@ impl PurlService { } } + /// Batch-fetches base PURL entities for the deduplicated set of input PURLs. async fn fetch_base_purls( input_purls: &[InputPurl], connection: &C, @@ -749,6 +757,7 @@ impl PurlService { .await?) } + /// Loads all versioned PURLs for the given base PURLs, grouped by base PURL ID. async fn fetch_versioned_purls_by_base( base_purls: &[base_purl::Model], connection: &C, @@ -766,6 +775,7 @@ impl PurlService { Ok(by_base) } + /// Selects the versioned PURL with the highest Red Hat pre-release suffix matching the input version. fn find_highest_redhat_patch<'a>( pattern: &Regex, input_version: &semver::Version, From df3f3c98356aca78d98fbf89fc58c564d235c684 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 1 Apr 2026 12:42:59 +0200 Subject: [PATCH 11/23] refactor(purl): use type inference for local variable bindings in recommend_purls Replace explicit type annotations (Vec, HashMap) with inferred types (Vec<_>, HashMap<_, _>) throughout recommend_purls and its helper methods where the types are obvious from the iterator/collect context. Implements TC-3972 Assisted-by: Claude Code (cherry picked from commit 6491cf6620fe75afcaecfe97e82f181cd2ebe1e4) --- modules/fundamental/src/purl/service/mod.rs | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index b1eeeb82f..603614d45 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -516,7 +516,7 @@ impl PurlService { ) -> Result>, Error> { let mut recommendations = HashMap::new(); - let input_purls: Vec = + let input_purls: Vec<_> = purls.iter().filter_map(InputPurl::try_from_purl).collect(); if input_purls.is_empty() { return Ok(recommendations); @@ -530,7 +530,7 @@ impl PurlService { return Ok(recommendations); } - let base_purl_map: HashMap = base_purls + let base_purl_map: HashMap<_, _> = base_purls .iter() .map(|bp| (PurlKey::from_base_purl(bp), bp)) .collect(); @@ -572,8 +572,8 @@ impl PurlService { } // Batch fetch vulnerability statuses and qualified PURLs for all winners - let winner_base_ids: Vec = winners.iter().map(|w| w.base.id).unique().collect(); - let winner_vp_ids: Vec = winners.iter().map(|w| w.versioned_purl.id).collect(); + let winner_base_ids: Vec<_> = winners.iter().map(|w| w.base.id).unique().collect(); + let winner_vp_ids: Vec<_> = winners.iter().map(|w| w.versioned_purl.id).collect(); let statuses_by_base = Self::fetch_vulnerability_statuses(&winner_base_ids, &winner_vp_ids, connection) @@ -595,7 +595,7 @@ impl PurlService { winner_vp_ids: &[Uuid], connection: &C, ) -> Result>, Error> { - let all_statuses: Vec = purl_status::Entity::find() + let all_statuses: Vec<_> = purl_status::Entity::find() .columns([ version_range::Column::Id, version_range::Column::LowVersion, @@ -623,7 +623,7 @@ impl PurlService { .load_one(vulnerability::Entity, connection) .await?; let advisories_loaded = all_statuses.load_one(advisory::Entity, connection).await?; - let advisory_models: Vec = advisories_loaded + let advisory_models: Vec<_> = advisories_loaded .iter() .filter_map(|opt| opt.as_ref().cloned()) .collect(); @@ -631,7 +631,7 @@ impl PurlService { .load_one(organization::Entity, connection) .await?; let status_models = all_statuses.load_one(status::Entity, connection).await?; - let status_slug_map: HashMap = all_statuses + let status_slug_map: HashMap<_, _> = all_statuses .iter() .zip(status_models.iter()) .map(|(ps, st)| { @@ -677,11 +677,11 @@ impl PurlService { winner_vp_ids: &[Uuid], connection: &C, ) -> Result, Error> { - let qualified_purls: Vec = qualified_purl::Entity::find() + let qualified_purls: Vec<_> = qualified_purl::Entity::find() .filter(qualified_purl::Column::VersionedPurlId.is_in(winner_vp_ids.iter().copied())) .all(connection) .await?; - let mut by_vp: HashMap = HashMap::new(); + let mut by_vp: HashMap<_, _> = HashMap::new(); for qp in qualified_purls { by_vp.entry(qp.versioned_purl_id).or_insert(qp); } @@ -709,7 +709,7 @@ impl PurlService { }); let mut seen_vuln_ids = HashSet::new(); - let vulnerabilities: Vec = statuses_by_base + let vulnerabilities: Vec<_> = statuses_by_base .get(&winner.base.id) .into_iter() .flatten() @@ -762,13 +762,13 @@ impl PurlService { base_purls: &[base_purl::Model], connection: &C, ) -> Result>, Error> { - let base_purl_ids: Vec = base_purls.iter().map(|bp| bp.id).collect(); - let all_versioned: Vec = versioned_purl::Entity::find() + let base_purl_ids: Vec<_> = base_purls.iter().map(|bp| bp.id).collect(); + let all_versioned: Vec<_> = versioned_purl::Entity::find() .filter(versioned_purl::Column::BasePurlId.is_in(base_purl_ids)) .all(connection) .await?; - let mut by_base: HashMap> = HashMap::new(); + let mut by_base: HashMap<_, Vec<_>> = HashMap::new(); for vp in all_versioned { by_base.entry(vp.base_purl_id).or_default().push(vp); } From e0959f95b9d51b66d3645c6ce06fb5619af57150 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 1 Apr 2026 13:40:06 +0200 Subject: [PATCH 12/23] perf(purl): pre-allocate recommendations HashMap with input size Use HashMap::with_capacity(purls.len()) instead of HashMap::new() to avoid rehashing as the map grows, since output size equals input size. Implements TC-3973 Assisted-by: Claude Code (cherry picked from commit 8f74b785dbaad0dd32a87d1ae27b0b7cf71b4b45) --- modules/fundamental/src/purl/service/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 603614d45..1f02e4421 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -514,7 +514,7 @@ impl PurlService { purls: &[Purl], connection: &C, ) -> Result>, Error> { - let mut recommendations = HashMap::new(); + let mut recommendations = HashMap::with_capacity(purls.len()); let input_purls: Vec<_> = purls.iter().filter_map(InputPurl::try_from_purl).collect(); From 37e56390e778531818aecca35444461c0c94ef21 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 1 Apr 2026 13:47:41 +0200 Subject: [PATCH 13/23] style(purl): apply rustfmt formatting Implements TC-3973 Assisted-by: Claude Code (cherry picked from commit c721e59149adc620506274e1cc554611dc1f803e) --- modules/fundamental/src/purl/service/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 1f02e4421..e952070e3 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -516,8 +516,7 @@ impl PurlService { ) -> Result>, Error> { let mut recommendations = HashMap::with_capacity(purls.len()); - let input_purls: Vec<_> = - purls.iter().filter_map(InputPurl::try_from_purl).collect(); + let input_purls: Vec<_> = purls.iter().filter_map(InputPurl::try_from_purl).collect(); if input_purls.is_empty() { return Ok(recommendations); } From b8a682c9345bf605baaa6eb07c64200135f54a39 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 1 Apr 2026 23:08:23 +0200 Subject: [PATCH 14/23] refactor(purl): address remaining review feedback on recommend_purls Address @ctron's review comments from PR #2310: - Remove unused `advisory_models` and `_organizations` variables and the `organization` import that were computed but never referenced - Consume `winners` Vec by value instead of borrowing, avoiding an unnecessary `.clone()` on `purl_string` - Use `.zip()` for parallel iteration instead of `enumerate()` + index access, matching the established codebase pattern and avoiding panics on size mismatches - Add `#[instrument]` tracing to all four DB helper methods (`fetch_base_purls`, `fetch_versioned_purls_by_base`, `fetch_vulnerability_statuses`, `fetch_qualified_purl_map`) - Chunk all IN-clause and OR-chain queries with a 10,000-item batch size to stay within Postgres's 65,535 bind parameter limit - When the same vulnerability appears from multiple advisories with different statuses, keep the status from the most recent advisory (by `modified` or `published` date) instead of an arbitrary first match, so that newer "fixed" assessments take precedence over older "affected" ones - Document the qualified PURL dedup rationale: only one qualified PURL is kept per versioned PURL because vulnerability statuses are tracked at the base PURL level - Extract a `recommend()` test helper to reduce boilerplate across all 10 recommendation endpoint tests Implements TC-3887 Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 197d15df921c96521e25ba10835f8c714c815d4c) --- .../fundamental/src/purl/endpoints/test.rs | 120 +++------ modules/fundamental/src/purl/service/mod.rs | 250 +++++++++++------- 2 files changed, 185 insertions(+), 185 deletions(-) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index a9f34b5bd..696c66da1 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -15,6 +15,16 @@ use trustify_test_context::{TrustifyContext, call::CallService, subset::Contains use urlencoding::encode; use uuid::Uuid; +async fn recommend(app: &impl CallService, purls: &[&str]) -> Value { + app.call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({ "purls": purls })) + .to_request(), + ) + .await +} + async fn setup(db: &Database, graph: &Graph) -> Result<(), anyhow::Error> { let log4j = graph .ingest_package(&Purl::from_str("pkg:maven/org.apache/log4j")?, db) @@ -305,14 +315,14 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> .await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:maven/jakarta.el/jakarta.el-api@3.0.3", "pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]})) - .to_request(), - ) - .await; + let recommendations = recommend( + &app, + &[ + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + ], + ) + .await; log::info!("{recommendations:#?}"); @@ -358,14 +368,7 @@ async fn get_recommendations_no_version(ctx: &TrustifyContext) -> Result<(), any .await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:maven/jakarta.el/jakarta.el-api"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:maven/jakarta.el/jakarta.el-api"]).await; log::info!("{recommendations:#?}"); @@ -397,14 +400,7 @@ async fn get_recommendations_dedup(ctx: &TrustifyContext) -> Result<(), anyhow:: .await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:cargo/hyper@0.14.1"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; log::info!("{recommendations:#?}"); @@ -459,14 +455,7 @@ async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), a } let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:cargo/hyper@0.14.1"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; log::info!("{recommendations:#?}"); @@ -489,14 +478,7 @@ async fn get_recommendations_unknown_purl(ctx: &TrustifyContext) -> Result<(), a ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:maven/com.example/nonexistent@1.0.0"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:maven/com.example/nonexistent@1.0.0"]).await; assert_eq!( recommendations["recommendations"], @@ -517,14 +499,7 @@ async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), a .await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:cargo/serde@1.0.0"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:cargo/serde@1.0.0"]).await; assert_eq!( recommendations["recommendations"], @@ -545,14 +520,8 @@ async fn get_recommendations_invalid_version(ctx: &TrustifyContext) -> Result<() ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:maven/jakarta.el/jakarta.el-api@not-a-version"]})) - .to_request(), - ) - .await; + let recommendations = + recommend(&app, &["pkg:maven/jakarta.el/jakarta.el-api@not-a-version"]).await; assert_eq!(recommendations["recommendations"], json!({})); @@ -565,18 +534,15 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": [ - "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", - "pkg:maven/com.example/nonexistent@1.0.0", - "pkg:maven/jakarta.el/jakarta.el-api" - ]})) - .to_request(), - ) - .await; + let recommendations = recommend( + &app, + &[ + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + "pkg:maven/com.example/nonexistent@1.0.0", + "pkg:maven/jakarta.el/jakarta.el-api", + ], + ) + .await; let entry = &recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]; assert_eq!( @@ -613,14 +579,7 @@ async fn get_recommendations_fallback_package_str( .await?; let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:cargo/tokio@1.0.0"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:cargo/tokio@1.0.0"]).await; let recs = recommendations["recommendations"].as_object().unwrap(); assert_eq!(recs.len(), 1); @@ -683,14 +642,7 @@ async fn get_recommendations_fixed_status(ctx: &TrustifyContext) -> Result<(), a } let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:cargo/hyper@0.14.1"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; let entry = &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index e952070e3..a1257ff02 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -30,7 +30,7 @@ use trustify_common::{ purl::{Purl, PurlErr}, }; use trustify_entity::{ - advisory, base_purl, license, organization, purl_status, + advisory, base_purl, license, purl_status, qualified_purl::{self, CanonicalPurl}, remediation, remediation_purl_status, sbom_license_expanded, sbom_node, sbom_node_purl_ref, sbom_package_license, status, version_range, versioned_purl, @@ -38,6 +38,10 @@ use trustify_entity::{ }; use trustify_module_ingestor::common::Deprecation; +/// Maximum number of items per IN-clause chunk to stay well under Postgres's 65,535 bind +/// parameter limit. Conservative value that works for all query shapes in this module. +const QUERY_BATCH_SIZE: usize = 10_000; + /// Composite key identifying a base PURL by type, namespace, and name (without version). #[derive(Clone, PartialEq, Eq, Hash)] struct PurlKey { @@ -51,6 +55,9 @@ struct StatusInfo { vuln_id: String, status_slug: String, remediations: Vec, + /// The most recent date from the advisory that reported this status, used to pick the + /// latest assessment when the same vulnerability appears in multiple advisories. + advisory_date: Option, } /// The highest Red Hat patch version selected for a given input PURL, used to build the recommendation. @@ -580,91 +587,96 @@ impl PurlService { let qualified_purls = Self::fetch_qualified_purl_map(&winner_vp_ids, connection).await?; // Assemble recommendations from batched data - for winner in &winners { - let entry = Self::assemble_recommend_entry(winner, &statuses_by_base, &qualified_purls); - recommendations.insert(winner.purl_string.clone(), vec![entry]); + for winner in winners { + let entry = + Self::assemble_recommend_entry(&winner, &statuses_by_base, &qualified_purls); + recommendations.insert(winner.purl_string, vec![entry]); } Ok(recommendations) } /// Batch-loads vulnerability statuses for the winning versioned PURLs, grouped by base PURL ID. + /// Chunks by base PURL IDs to stay within Postgres bind parameter limits. + #[instrument(skip(winner_base_ids, winner_vp_ids, connection), err)] async fn fetch_vulnerability_statuses( winner_base_ids: &[Uuid], winner_vp_ids: &[Uuid], connection: &C, ) -> Result>, Error> { - let all_statuses: Vec<_> = purl_status::Entity::find() - .columns([ - version_range::Column::Id, - version_range::Column::LowVersion, - version_range::Column::LowInclusive, - version_range::Column::HighVersion, - version_range::Column::HighInclusive, - ]) - .left_join(base_purl::Entity) - .join( - JoinType::LeftJoin, - base_purl::Relation::VersionedPurls.def(), - ) - .left_join(version_range::Entity) - .filter(purl_status::Column::BasePurlId.is_in(winner_base_ids.iter().copied())) - .filter(versioned_purl::Column::Id.is_in(winner_vp_ids.iter().copied())) - .filter(SimpleExpr::FunctionCall( - Func::cust(trustify_common::db::VersionMatches) - .arg(Expr::col(versioned_purl::Column::Version)) - .arg(Expr::col((version_range::Entity, Asterisk))), - )) - .all(connection) - .await?; + let mut statuses_by_base: HashMap> = HashMap::new(); - let vulns = all_statuses - .load_one(vulnerability::Entity, connection) - .await?; - let advisories_loaded = all_statuses.load_one(advisory::Entity, connection).await?; - let advisory_models: Vec<_> = advisories_loaded - .iter() - .filter_map(|opt| opt.as_ref().cloned()) - .collect(); - let _organizations = advisory_models - .load_one(organization::Entity, connection) - .await?; - let status_models = all_statuses.load_one(status::Entity, connection).await?; - let status_slug_map: HashMap<_, _> = all_statuses - .iter() - .zip(status_models.iter()) - .map(|(ps, st)| { - ( - ps.status_id, - st.as_ref() - .map(|s| s.slug.clone()) - .unwrap_or_else(|| "unknown".to_string()), + for base_chunk in winner_base_ids.chunks(QUERY_BATCH_SIZE) { + let all_statuses: Vec<_> = purl_status::Entity::find() + .columns([ + version_range::Column::Id, + version_range::Column::LowVersion, + version_range::Column::LowInclusive, + version_range::Column::HighVersion, + version_range::Column::HighInclusive, + ]) + .left_join(base_purl::Entity) + .join( + JoinType::LeftJoin, + base_purl::Relation::VersionedPurls.def(), ) - }) - .collect(); - let remediations = all_statuses - .load_many_to_many( - remediation::Entity, - remediation_purl_status::Entity, - connection, - ) - .await?; + .left_join(version_range::Entity) + .filter(purl_status::Column::BasePurlId.is_in(base_chunk.iter().copied())) + .filter(versioned_purl::Column::Id.is_in(winner_vp_ids.iter().copied())) + .filter(SimpleExpr::FunctionCall( + Func::cust(trustify_common::db::VersionMatches) + .arg(Expr::col(versioned_purl::Column::Version)) + .arg(Expr::col((version_range::Entity, Asterisk))), + )) + .all(connection) + .await?; - let mut statuses_by_base: HashMap> = HashMap::new(); - for (i, ps) in all_statuses.iter().enumerate() { - if let (Some(v), Some(_a)) = (&vulns[i], &advisories_loaded[i]) { - let slug = status_slug_map - .get(&ps.status_id) - .cloned() - .unwrap_or_else(|| "unknown".to_string()); - statuses_by_base - .entry(ps.base_purl_id) - .or_default() - .push(StatusInfo { - vuln_id: v.id.clone(), - status_slug: slug, - remediations: remediations[i].clone(), - }); + let vulns = all_statuses + .load_one(vulnerability::Entity, connection) + .await?; + let advisories_loaded = all_statuses.load_one(advisory::Entity, connection).await?; + let status_models = all_statuses.load_one(status::Entity, connection).await?; + let status_slug_map: HashMap<_, _> = all_statuses + .iter() + .zip(status_models.iter()) + .map(|(ps, st)| { + ( + ps.status_id, + st.as_ref() + .map(|s| s.slug.clone()) + .unwrap_or_else(|| "unknown".to_string()), + ) + }) + .collect(); + let remediations = all_statuses + .load_many_to_many( + remediation::Entity, + remediation_purl_status::Entity, + connection, + ) + .await?; + + for (((vuln, advisory), ps), rems) in vulns + .iter() + .zip(advisories_loaded.iter()) + .zip(all_statuses.iter()) + .zip(remediations.iter()) + { + if let (Some(v), Some(advisory)) = (vuln, advisory) { + let slug = status_slug_map + .get(&ps.status_id) + .cloned() + .unwrap_or_else(|| "unknown".to_string()); + statuses_by_base + .entry(ps.base_purl_id) + .or_default() + .push(StatusInfo { + vuln_id: v.id.clone(), + status_slug: slug, + remediations: rems.clone(), + advisory_date: advisory.modified.or(advisory.published), + }); + } } } @@ -672,17 +684,27 @@ impl PurlService { } /// Loads qualified PURLs for winning versioned PURLs to resolve full PURL strings with qualifiers. + /// + /// A versioned PURL may have multiple qualified variants (e.g. different `arch` or `type` + /// qualifiers). We keep only the first one found per versioned PURL because vulnerability + /// statuses are tracked at the base PURL level, so all qualified variants share the same + /// vulnerability data. The qualified PURL is used here only to reconstruct a full PURL + /// string (with qualifiers) for the recommendation response. + /// Chunks the IN clause to stay within Postgres bind parameter limits. + #[instrument(skip(winner_vp_ids, connection), err)] async fn fetch_qualified_purl_map( winner_vp_ids: &[Uuid], connection: &C, ) -> Result, Error> { - let qualified_purls: Vec<_> = qualified_purl::Entity::find() - .filter(qualified_purl::Column::VersionedPurlId.is_in(winner_vp_ids.iter().copied())) - .all(connection) - .await?; let mut by_vp: HashMap<_, _> = HashMap::new(); - for qp in qualified_purls { - by_vp.entry(qp.versioned_purl_id).or_insert(qp); + for chunk in winner_vp_ids.chunks(QUERY_BATCH_SIZE) { + let qualified_purls = qualified_purl::Entity::find() + .filter(qualified_purl::Column::VersionedPurlId.is_in(chunk.iter().copied())) + .all(connection) + .await?; + for qp in qualified_purls { + by_vp.entry(qp.versioned_purl_id).or_insert(qp); + } } Ok(by_vp) } @@ -707,12 +729,23 @@ impl PurlService { .to_string() }); - let mut seen_vuln_ids = HashSet::new(); - let vulnerabilities: Vec<_> = statuses_by_base - .get(&winner.base.id) - .into_iter() - .flatten() - .filter(|info| seen_vuln_ids.insert(info.vuln_id.clone())) + // When the same vulnerability appears in multiple advisories with different statuses, + // keep the one from the most recent advisory so that newer assessments (e.g. "fixed") + // take precedence over older ones (e.g. "affected"). + let mut best_by_vuln: HashMap<&str, &StatusInfo> = HashMap::new(); + for info in statuses_by_base.get(&winner.base.id).into_iter().flatten() { + best_by_vuln + .entry(&info.vuln_id) + .and_modify(|existing| { + if info.advisory_date > existing.advisory_date { + *existing = info; + } + }) + .or_insert(info); + } + + let vulnerabilities: Vec<_> = best_by_vuln + .into_values() .map(|info| { let vex_status = match info.status_slug.as_str() { "affected" => VexStatus::Affected, @@ -738,38 +771,53 @@ impl PurlService { } /// Batch-fetches base PURL entities for the deduplicated set of input PURLs. + /// Chunks the OR conditions to stay within Postgres bind parameter limits. + #[instrument(skip(input_purls, connection), err)] async fn fetch_base_purls( input_purls: &[InputPurl], connection: &C, ) -> Result, Error> { - let mut condition = Condition::any(); - let mut seen_keys: HashSet = HashSet::new(); - for ip in input_purls { - let key = PurlKey::from_purl(&ip.purl); - if seen_keys.insert(key.clone()) { - condition = condition.add(key.as_condition()); - } + let mut seen_keys = HashSet::new(); + let unique_keys: Vec<_> = input_purls + .iter() + .filter_map(|ip| { + let key = PurlKey::from_purl(&ip.purl); + seen_keys.insert(key.clone()).then_some(key) + }) + .collect(); + + let mut results = Vec::new(); + for chunk in unique_keys.chunks(QUERY_BATCH_SIZE) { + let condition = chunk + .iter() + .fold(Condition::any(), |cond, key| cond.add(key.as_condition())); + let batch = base_purl::Entity::find() + .filter(condition) + .all(connection) + .await?; + results.extend(batch); } - Ok(base_purl::Entity::find() - .filter(condition) - .all(connection) - .await?) + Ok(results) } /// Loads all versioned PURLs for the given base PURLs, grouped by base PURL ID. + /// Chunks the IN clause to stay within Postgres bind parameter limits. + #[instrument(skip(base_purls, connection), err)] async fn fetch_versioned_purls_by_base( base_purls: &[base_purl::Model], connection: &C, ) -> Result>, Error> { let base_purl_ids: Vec<_> = base_purls.iter().map(|bp| bp.id).collect(); - let all_versioned: Vec<_> = versioned_purl::Entity::find() - .filter(versioned_purl::Column::BasePurlId.is_in(base_purl_ids)) - .all(connection) - .await?; let mut by_base: HashMap<_, Vec<_>> = HashMap::new(); - for vp in all_versioned { - by_base.entry(vp.base_purl_id).or_default().push(vp); + for chunk in base_purl_ids.chunks(QUERY_BATCH_SIZE) { + let batch = versioned_purl::Entity::find() + .filter(versioned_purl::Column::BasePurlId.is_in(chunk.iter().copied())) + .all(connection) + .await?; + for vp in batch { + by_base.entry(vp.base_purl_id).or_default().push(vp); + } } Ok(by_base) } From f70662d8fc2d9408d0a8a3103e30e1afa7633f40 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Mon, 6 Apr 2026 13:55:27 +0200 Subject: [PATCH 15/23] refactor(purl): parameterize unknown_purl and invalid_version tests Merge get_recommendations_unknown_purl and get_recommendations_invalid_version into a single parameterized get_recommendations_no_match test using rstest, reducing duplication while preserving coverage. Co-Authored-By: Claude Opus 4.6 (cherry picked from commit a4840185b20615c48ca4f9b4390e30f7ee3f9888) --- .../fundamental/src/purl/endpoints/test.rs | 39 +++++++++---------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index 696c66da1..edf8fb967 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -3,6 +3,7 @@ use crate::purl::model::summary::base_purl::BasePurlSummary; use crate::purl::model::summary::purl::PurlSummary; use crate::test::caller; use actix_web::test::TestRequest; +use rstest::rstest; use serde_json::{Value, json}; use std::str::FromStr; use test_context::test_context; @@ -473,17 +474,27 @@ async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), a } #[test_context(TrustifyContext)] -#[test(actix_web::test)] -async fn get_recommendations_unknown_purl(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { +#[rstest] +#[case::unknown_purl( + "pkg:maven/com.example/nonexistent@1.0.0", + json!({"pkg:maven/com.example/nonexistent@1.0.0": []}) +)] +#[case::invalid_version( + "pkg:maven/jakarta.el/jakarta.el-api@not-a-version", + json!({}) +)] +#[test_log::test(actix_web::test)] +async fn get_recommendations_no_match( + ctx: &TrustifyContext, + #[case] purl: &str, + #[case] expected: Value, +) -> Result<(), anyhow::Error> { ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; let app = caller(ctx).await?; - let recommendations = recommend(&app, &["pkg:maven/com.example/nonexistent@1.0.0"]).await; + let recommendations = recommend(&app, &[purl]).await; - assert_eq!( - recommendations["recommendations"], - json!({"pkg:maven/com.example/nonexistent@1.0.0": []}) - ); + assert_eq!(recommendations["recommendations"], expected); Ok(()) } @@ -514,20 +525,6 @@ async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), a Ok(()) } -#[test_context(TrustifyContext)] -#[test(actix_web::test)] -async fn get_recommendations_invalid_version(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { - ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; - - let app = caller(ctx).await?; - let recommendations = - recommend(&app, &["pkg:maven/jakarta.el/jakarta.el-api@not-a-version"]).await; - - assert_eq!(recommendations["recommendations"], json!({})); - - Ok(()) -} - #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { From df4e3859d6f34820aaca3eb1c4ba35969bf0cf5b Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Mon, 6 Apr 2026 14:31:58 +0200 Subject: [PATCH 16/23] refactor(purl): return versioned PURL instead of qualified in recommendations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always return the versioned PURL (without qualifiers) as the recommended package string. Qualifiers like arch, repository_url, and type are context-dependent — the system cannot know which match the caller's environment, so including an arbitrary one is misleading. Remove fetch_qualified_purl_map and the extra DB query it performed, simplifying the recommend flow. Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 54d31908976689bea7527eb9bc6f7778c046778f) --- .../fundamental/src/purl/endpoints/test.rs | 11 ++-- modules/fundamental/src/purl/service/mod.rs | 56 +++++-------------- 2 files changed, 18 insertions(+), 49 deletions(-) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index edf8fb967..f669cdba4 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -346,7 +346,7 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> ); assert_eq!( recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"][0]["package"], - "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002?repository_url=https://maven.repository.redhat.com/ga/&type=jar", + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002", ); let mut cves = recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"] @@ -544,7 +544,7 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: let entry = &recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]; assert_eq!( entry[0]["package"], - "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002?repository_url=https://maven.repository.redhat.com/ga/&type=jar" + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002" ); assert_eq!( entry[0]["vulnerabilities"], @@ -563,8 +563,8 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: async fn get_recommendations_fallback_package_str( ctx: &TrustifyContext, ) -> Result<(), anyhow::Error> { - // Ingest a versioned_purl WITHOUT a qualified_purl to exercise the fallback - // package string construction path + // Ingest a versioned_purl WITHOUT a qualified_purl to verify the versioned + // PURL is returned as the recommended package string let base = ctx .graph .ingest_package(&Purl::from_str("pkg:cargo/tokio")?, &ctx.db) @@ -584,8 +584,7 @@ async fn get_recommendations_fallback_package_str( let rec_list = recs["pkg:cargo/tokio@1.0.0"].as_array().unwrap(); assert_eq!(rec_list.len(), 1); - // Without a qualified_purl, the fallback constructs the package string - // from base_purl fields + versioned_purl version + // Recommendations always return the versioned PURL (without qualifiers) let package = rec_list[0]["package"].as_str().unwrap(); assert_eq!(package, "pkg:cargo/tokio@1.0.0-redhat-00001"); diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index a1257ff02..613f7ee6c 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -584,12 +584,10 @@ impl PurlService { let statuses_by_base = Self::fetch_vulnerability_statuses(&winner_base_ids, &winner_vp_ids, connection) .await?; - let qualified_purls = Self::fetch_qualified_purl_map(&winner_vp_ids, connection).await?; // Assemble recommendations from batched data for winner in winners { - let entry = - Self::assemble_recommend_entry(&winner, &statuses_by_base, &qualified_purls); + let entry = Self::assemble_recommend_entry(&winner, &statuses_by_base); recommendations.insert(winner.purl_string, vec![entry]); } @@ -683,51 +681,23 @@ impl PurlService { Ok(statuses_by_base) } - /// Loads qualified PURLs for winning versioned PURLs to resolve full PURL strings with qualifiers. + /// Builds a single recommendation entry from a winner and its vulnerability statuses. /// - /// A versioned PURL may have multiple qualified variants (e.g. different `arch` or `type` - /// qualifiers). We keep only the first one found per versioned PURL because vulnerability - /// statuses are tracked at the base PURL level, so all qualified variants share the same - /// vulnerability data. The qualified PURL is used here only to reconstruct a full PURL - /// string (with qualifiers) for the recommendation response. - /// Chunks the IN clause to stay within Postgres bind parameter limits. - #[instrument(skip(winner_vp_ids, connection), err)] - async fn fetch_qualified_purl_map( - winner_vp_ids: &[Uuid], - connection: &C, - ) -> Result, Error> { - let mut by_vp: HashMap<_, _> = HashMap::new(); - for chunk in winner_vp_ids.chunks(QUERY_BATCH_SIZE) { - let qualified_purls = qualified_purl::Entity::find() - .filter(qualified_purl::Column::VersionedPurlId.is_in(chunk.iter().copied())) - .all(connection) - .await?; - for qp in qualified_purls { - by_vp.entry(qp.versioned_purl_id).or_insert(qp); - } - } - Ok(by_vp) - } - - /// Builds a single recommendation entry from a winner, its vulnerability statuses, and qualified PURL. + /// Returns the versioned PURL (without qualifiers) as the recommended package string. + /// Qualifiers are context-dependent (arch, repository_url, type) and the system cannot + /// know which qualifiers match the caller's environment. fn assemble_recommend_entry( winner: &Winner<'_>, statuses_by_base: &HashMap>, - qualified_by_vp: &HashMap, ) -> RecommendEntry { - let package_str = qualified_by_vp - .get(&winner.versioned_purl.id) - .map(|qp| Purl::from(qp.purl.clone()).to_string()) - .unwrap_or_else(|| { - Purl { - ty: winner.base.r#type.clone(), - namespace: winner.base.namespace.clone(), - name: winner.base.name.clone(), - version: Some(winner.versioned_purl.version.clone()), - qualifiers: Default::default(), - } - .to_string() - }); + let package_str = Purl { + ty: winner.base.r#type.clone(), + namespace: winner.base.namespace.clone(), + name: winner.base.name.clone(), + version: Some(winner.versioned_purl.version.clone()), + qualifiers: Default::default(), + } + .to_string(); // When the same vulnerability appears in multiple advisories with different statuses, // keep the one from the most recent advisory so that newer assessments (e.g. "fixed") From ebd801902fa88bbc16893dd67bc85aeb9e5baaf1 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Mon, 6 Apr 2026 14:44:24 +0200 Subject: [PATCH 17/23] refactor: use chunked_with for IN-clause chunking in purl service Replace manual QUERY_BATCH_SIZE constant and .chunks() calls with the existing trustify_common::db::chunk::chunked_with utility, which dynamically calculates chunk sizes based on PostgreSQL's bind parameter limit. Co-Authored-By: Claude Opus 4.6 (cherry picked from commit ee80f8560b73aef08f1d8dc7f37b8b4246ecd131) --- modules/fundamental/src/purl/service/mod.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 613f7ee6c..d4e374377 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -23,6 +23,7 @@ use sea_query::{Asterisk, ColumnType, Expr, Func, JoinType, Order, SimpleExpr, U use tracing::instrument; use trustify_common::{ db::{ + chunk::chunked_with, limiter::LimiterTrait, query::{Columns, Filtering, IntoColumns, Query, q}, }, @@ -38,10 +39,6 @@ use trustify_entity::{ }; use trustify_module_ingestor::common::Deprecation; -/// Maximum number of items per IN-clause chunk to stay well under Postgres's 65,535 bind -/// parameter limit. Conservative value that works for all query shapes in this module. -const QUERY_BATCH_SIZE: usize = 10_000; - /// Composite key identifying a base PURL by type, namespace, and name (without version). #[derive(Clone, PartialEq, Eq, Hash)] struct PurlKey { @@ -604,7 +601,9 @@ impl PurlService { ) -> Result>, Error> { let mut statuses_by_base: HashMap> = HashMap::new(); - for base_chunk in winner_base_ids.chunks(QUERY_BATCH_SIZE) { + let base_chunks = chunked_with(1, winner_base_ids.iter().copied()); + for base_chunk in &base_chunks { + let base_chunk: Vec<_> = base_chunk.collect(); let all_statuses: Vec<_> = purl_status::Entity::find() .columns([ version_range::Column::Id, @@ -619,7 +618,7 @@ impl PurlService { base_purl::Relation::VersionedPurls.def(), ) .left_join(version_range::Entity) - .filter(purl_status::Column::BasePurlId.is_in(base_chunk.iter().copied())) + .filter(purl_status::Column::BasePurlId.is_in(base_chunk)) .filter(versioned_purl::Column::Id.is_in(winner_vp_ids.iter().copied())) .filter(SimpleExpr::FunctionCall( Func::cust(trustify_common::db::VersionMatches) @@ -757,7 +756,9 @@ impl PurlService { .collect(); let mut results = Vec::new(); - for chunk in unique_keys.chunks(QUERY_BATCH_SIZE) { + let key_chunks = chunked_with(3, unique_keys.into_iter()); + for chunk in &key_chunks { + let chunk: Vec<_> = chunk.collect(); let condition = chunk .iter() .fold(Condition::any(), |cond, key| cond.add(key.as_condition())); @@ -780,9 +781,11 @@ impl PurlService { let base_purl_ids: Vec<_> = base_purls.iter().map(|bp| bp.id).collect(); let mut by_base: HashMap<_, Vec<_>> = HashMap::new(); - for chunk in base_purl_ids.chunks(QUERY_BATCH_SIZE) { + let id_chunks = chunked_with(1, base_purl_ids.into_iter()); + for chunk in &id_chunks { + let chunk: Vec<_> = chunk.collect(); let batch = versioned_purl::Entity::find() - .filter(versioned_purl::Column::BasePurlId.is_in(chunk.iter().copied())) + .filter(versioned_purl::Column::BasePurlId.is_in(chunk)) .all(connection) .await?; for vp in batch { From ef8b51307fb1067432e3d48dc5f509f99244bd4e Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Mon, 6 Apr 2026 16:51:07 +0200 Subject: [PATCH 18/23] docs(tests): add doc comments and given-when-then structure to recommendation tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply test documentation standards (§5.9-5.10): every test gets a doc comment explaining what it verifies, and non-trivial tests get given-when-then inline comments to make their structure navigable. Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 2ca4c386d7166fb57abe189ec4430b475f7c0b90) --- .../fundamental/src/purl/endpoints/test.rs | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index f669cdba4..cb0cd3b91 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -305,9 +305,11 @@ async fn test_purl_license_details(ctx: &TrustifyContext) -> Result<(), anyhow:: Ok(()) } +/// Verifies that duplicate input PURLs are deduplicated and recommendations include the correct upgraded package and vulnerabilities. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given advisories for multiple CVEs are ingested ctx.ingest_documents([ "cve/CVE-2022-45787.json", "cve/CVE-2023-28867.json", @@ -315,6 +317,7 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> ]) .await?; + // When requesting recommendations for a duplicated PURL let app = caller(ctx).await?; let recommendations = recommend( &app, @@ -327,6 +330,7 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> log::info!("{recommendations:#?}"); + // Then a single recommendation entry is returned with both CVEs assert_eq!( recommendations["recommendations"] .as_object() @@ -362,17 +366,21 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> Ok(()) } +/// Verifies that PURLs without a version produce no recommendations. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_no_version(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given advisories are ingested ctx.ingest_documents(["cve/CVE-2022-45787.json", "cve/CVE-2023-28867.json"]) .await?; + // When requesting recommendations for a PURL without a version let app = caller(ctx).await?; let recommendations = recommend(&app, &["pkg:maven/jakarta.el/jakarta.el-api"]).await; log::info!("{recommendations:#?}"); + // Then no recommendations are returned assert_eq!( recommendations["recommendations"] .as_object() @@ -384,9 +392,11 @@ async fn get_recommendations_no_version(ctx: &TrustifyContext) -> Result<(), any Ok(()) } +/// Verifies that duplicate advisories for the same CVE produce a single vulnerability entry. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_dedup(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given a Red Hat package and two duplicate advisories for the same CVE ctx.graph .ingest_qualified_package( &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, @@ -400,11 +410,13 @@ async fn get_recommendations_dedup(ctx: &TrustifyContext) -> Result<(), anyhow:: ]) .await?; + // When requesting recommendations let app = caller(ctx).await?; let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; log::info!("{recommendations:#?}"); + // Then the recommendation contains a single deduplicated vulnerability let entry = &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; assert_eq!(entry["vulnerabilities"].as_array().unwrap().len(), 1); @@ -418,12 +430,14 @@ async fn get_recommendations_dedup(ctx: &TrustifyContext) -> Result<(), anyhow:: Ok(()) } +/// Verifies that a custom vulnerability status is reflected in the recommendation response. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set}; use trustify_entity::{purl_status, status}; + // Given a package with a vulnerability whose status is overridden to a custom value ctx.graph .ingest_qualified_package( &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, @@ -455,11 +469,13 @@ async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), a active.update(&ctx.db).await?; } + // When requesting recommendations let app = caller(ctx).await?; let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; log::info!("{recommendations:#?}"); + // Then the vulnerability status reflects the custom status let entry = &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; let vulns = entry["vulnerabilities"].as_array().unwrap(); @@ -473,6 +489,7 @@ async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), a Ok(()) } +/// Verifies that PURLs with no matching base package or an unparseable version produce the expected empty response. #[test_context(TrustifyContext)] #[rstest] #[case::unknown_purl( @@ -489,19 +506,24 @@ async fn get_recommendations_no_match( #[case] purl: &str, #[case] expected: Value, ) -> Result<(), anyhow::Error> { + // Given an advisory is ingested ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + // When requesting recommendations for a non-matching PURL let app = caller(ctx).await?; let recommendations = recommend(&app, &[purl]).await; + // Then the response matches the expected empty result assert_eq!(recommendations["recommendations"], expected); Ok(()) } +/// Verifies that recommendations work for PURLs without a namespace (e.g., cargo packages). #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given a Red Hat package with no namespace ctx.graph .ingest_qualified_package( &Purl::from_str("pkg:cargo/serde@1.0.0-redhat-00001")?, @@ -509,9 +531,11 @@ async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), a ) .await?; + // When requesting recommendations let app = caller(ctx).await?; let recommendations = recommend(&app, &["pkg:cargo/serde@1.0.0"]).await; + // Then the recommendation returns the Red Hat package assert_eq!( recommendations["recommendations"], json!({ @@ -525,11 +549,14 @@ async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), a Ok(()) } +/// Verifies correct handling of mixed input: a known PURL, an unknown PURL, and a versionless PURL. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given an advisory is ingested ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + // When requesting recommendations for a mix of known, unknown, and versionless PURLs let app = caller(ctx).await?; let recommendations = recommend( &app, @@ -541,6 +568,7 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: ) .await; + // Then known PURLs get recommendations and unknown PURLs get empty arrays let entry = &recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]; assert_eq!( entry[0]["package"], @@ -558,13 +586,13 @@ async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow:: Ok(()) } +/// Verifies that the versioned PURL (without qualifiers) is returned as the package string when no qualified PURL exists. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_fallback_package_str( ctx: &TrustifyContext, ) -> Result<(), anyhow::Error> { - // Ingest a versioned_purl WITHOUT a qualified_purl to verify the versioned - // PURL is returned as the recommended package string + // Given a versioned PURL without a qualified PURL let base = ctx .graph .ingest_package(&Purl::from_str("pkg:cargo/tokio")?, &ctx.db) @@ -575,28 +603,31 @@ async fn get_recommendations_fallback_package_str( ) .await?; + // When requesting recommendations let app = caller(ctx).await?; let recommendations = recommend(&app, &["pkg:cargo/tokio@1.0.0"]).await; + // Then the versioned PURL is returned as the package string let recs = recommendations["recommendations"].as_object().unwrap(); assert_eq!(recs.len(), 1); let rec_list = recs["pkg:cargo/tokio@1.0.0"].as_array().unwrap(); assert_eq!(rec_list.len(), 1); - // Recommendations always return the versioned PURL (without qualifiers) let package = rec_list[0]["package"].as_str().unwrap(); assert_eq!(package, "pkg:cargo/tokio@1.0.0-redhat-00001"); Ok(()) } +/// Verifies that a "fixed" vulnerability status maps to the Fixed VexStatus and uses the status name in the response. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_fixed_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set}; use trustify_entity::{purl_status, status}; + // Given a package with a vulnerability whose status is set to "fixed" ctx.graph .ingest_qualified_package( &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, @@ -606,7 +637,6 @@ async fn get_recommendations_fixed_status(ctx: &TrustifyContext) -> Result<(), a ctx.ingest_documents(["osv/RUSTSEC-2021-0079.json"]).await?; - // Override the status to "fixed" to exercise that VexStatus match arm let fixed_status = status::Entity::find() .filter(status::Column::Slug.eq("fixed")) .one(&ctx.db) @@ -637,9 +667,11 @@ async fn get_recommendations_fixed_status(ctx: &TrustifyContext) -> Result<(), a active.update(&ctx.db).await?; } + // When requesting recommendations let app = caller(ctx).await?; let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; + // Then the vulnerability status is reported as "Fixed" let entry = &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; let vuln = entry["vulnerabilities"] From 2f6c4f991d9ad249a5eaaf9de460ae79f8412a8d Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 8 Apr 2026 10:30:18 +0200 Subject: [PATCH 19/23] refactor(purl): remove remaining unnecessary type annotations Remove explicit type annotations where the Rust compiler can infer the type: Vec on winners (push provides type), Vec<_> on SeaORM .all() result, HashMap key type on statuses_by_base, and vulnerabilities Vec<_> (inlined into RecommendEntry struct literal where the field type provides inference). Implements TC-4013 Assisted-by: Claude Code (cherry picked from commit 1b5a0d1365a6d63ecc34dfd46cad91372c7f78bc) --- modules/fundamental/src/purl/service/mod.rs | 46 ++++++++++----------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index d4e374377..f0e9718a3 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -544,7 +544,7 @@ impl PurlService { #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); - let mut winners: Vec = Vec::new(); + let mut winners = Vec::new(); for ip in &input_purls { let key = PurlKey::from_purl(&ip.purl); @@ -599,12 +599,12 @@ impl PurlService { winner_vp_ids: &[Uuid], connection: &C, ) -> Result>, Error> { - let mut statuses_by_base: HashMap> = HashMap::new(); + let mut statuses_by_base: HashMap<_, Vec> = HashMap::new(); let base_chunks = chunked_with(1, winner_base_ids.iter().copied()); for base_chunk in &base_chunks { let base_chunk: Vec<_> = base_chunk.collect(); - let all_statuses: Vec<_> = purl_status::Entity::find() + let all_statuses = purl_status::Entity::find() .columns([ version_range::Column::Id, version_range::Column::LowVersion, @@ -713,29 +713,27 @@ impl PurlService { .or_insert(info); } - let vulnerabilities: Vec<_> = best_by_vuln - .into_values() - .map(|info| { - let vex_status = match info.status_slug.as_str() { - "affected" => VexStatus::Affected, - "fixed" => VexStatus::Fixed, - "not_affected" => VexStatus::NotAffected, - "under_investigation" => VexStatus::UnderInvestigation, - "recommended" => VexStatus::Recommended, - other => VexStatus::Other(other.to_string()), - }; - VulnerabilityStatus { - id: info.vuln_id.clone(), - status: Some(vex_status), - justification: None, - remediations: RemediationSummary::from_entities(&info.remediations), - } - }) - .collect(); - RecommendEntry { package: package_str, - vulnerabilities, + vulnerabilities: best_by_vuln + .into_values() + .map(|info| { + let vex_status = match info.status_slug.as_str() { + "affected" => VexStatus::Affected, + "fixed" => VexStatus::Fixed, + "not_affected" => VexStatus::NotAffected, + "under_investigation" => VexStatus::UnderInvestigation, + "recommended" => VexStatus::Recommended, + other => VexStatus::Other(other.to_string()), + }; + VulnerabilityStatus { + id: info.vuln_id.clone(), + status: Some(vex_status), + justification: None, + remediations: RemediationSummary::from_entities(&info.remediations), + } + }) + .collect(), } } From 10193b4b3d071ed43ab42a015f28364f3ebb41f5 Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 8 Apr 2026 12:45:36 +0200 Subject: [PATCH 20/23] refactor(purl): use idiomatic .into_iter() and eliminate unnecessary clones Replace .iter() with .into_iter() where source collections are no longer needed, eliminating .clone() calls on vuln IDs, remediations, and status slugs. Reorder base_purl_map construction to consume base_purls after fetch_versioned_purls_by_base. Simplify status_slug_map to build directly from status_models. Implements TC-4014 Assisted-by: Claude Code (cherry picked from commit cf3e65417057e415a5287e57f4295394b04564fa) --- modules/fundamental/src/purl/service/mod.rs | 42 ++++++++++----------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index f0e9718a3..92232cabb 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -533,20 +533,23 @@ impl PurlService { return Ok(recommendations); } - let base_purl_map: HashMap<_, _> = base_purls - .iter() - .map(|bp| (PurlKey::from_base_purl(bp), bp)) - .collect(); - let versioned_by_base = Self::fetch_versioned_purls_by_base(&base_purls, connection).await?; + let base_purl_map: HashMap<_, _> = base_purls + .into_iter() + .map(|bp| { + let key = PurlKey::from_base_purl(&bp); + (key, bp) + }) + .collect(); + #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); let mut winners = Vec::new(); - for ip in &input_purls { + for ip in input_purls { let key = PurlKey::from_purl(&ip.purl); let Some(base) = base_purl_map.get(&key) else { recommendations.insert(ip.purl.to_string(), Vec::new()); @@ -633,17 +636,10 @@ impl PurlService { .await?; let advisories_loaded = all_statuses.load_one(advisory::Entity, connection).await?; let status_models = all_statuses.load_one(status::Entity, connection).await?; - let status_slug_map: HashMap<_, _> = all_statuses - .iter() - .zip(status_models.iter()) - .map(|(ps, st)| { - ( - ps.status_id, - st.as_ref() - .map(|s| s.slug.clone()) - .unwrap_or_else(|| "unknown".to_string()), - ) - }) + let status_slug_map: HashMap<_, _> = status_models + .into_iter() + .flatten() + .map(|s| (s.id, s.slug)) .collect(); let remediations = all_statuses .load_many_to_many( @@ -654,10 +650,10 @@ impl PurlService { .await?; for (((vuln, advisory), ps), rems) in vulns - .iter() - .zip(advisories_loaded.iter()) - .zip(all_statuses.iter()) - .zip(remediations.iter()) + .into_iter() + .zip(advisories_loaded) + .zip(all_statuses) + .zip(remediations) { if let (Some(v), Some(advisory)) = (vuln, advisory) { let slug = status_slug_map @@ -668,9 +664,9 @@ impl PurlService { .entry(ps.base_purl_id) .or_default() .push(StatusInfo { - vuln_id: v.id.clone(), + vuln_id: v.id, status_slug: slug, - remediations: rems.clone(), + remediations: rems, advisory_date: advisory.modified.or(advisory.published), }); } From b9653c49664cea13a83e10d415c5ef141b2f3cab Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 8 Apr 2026 13:17:48 +0200 Subject: [PATCH 21/23] refactor(purl): add tracing instrument spans to recommend sub-operations Wrap fetch_base_purls, fetch_versioned_purls_by_base, and fetch_vulnerability_statuses calls with info_span!() for profiling visibility into recommend_purls pipeline stages. Implements TC-4016 Assisted-by: Claude Code (cherry picked from commit 56667b692b55095ccf86a091e55ee58b75a21e69) --- modules/fundamental/src/purl/service/mod.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 92232cabb..24bce4f84 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -20,7 +20,7 @@ use sea_orm::{ QueryFilter, QueryOrder, QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, }; use sea_query::{Asterisk, ColumnType, Expr, Func, JoinType, Order, SimpleExpr, UnionType}; -use tracing::instrument; +use tracing::{Instrument, info_span, instrument}; use trustify_common::{ db::{ chunk::chunked_with, @@ -525,7 +525,9 @@ impl PurlService { return Ok(recommendations); } - let base_purls = Self::fetch_base_purls(&input_purls, connection).await?; + let base_purls = Self::fetch_base_purls(&input_purls, connection) + .instrument(info_span!("loading base purls")) + .await?; if base_purls.is_empty() { for ip in &input_purls { recommendations.insert(ip.purl.to_string(), Vec::new()); @@ -533,8 +535,9 @@ impl PurlService { return Ok(recommendations); } - let versioned_by_base = - Self::fetch_versioned_purls_by_base(&base_purls, connection).await?; + let versioned_by_base = Self::fetch_versioned_purls_by_base(&base_purls, connection) + .instrument(info_span!("loading versioned purls")) + .await?; let base_purl_map: HashMap<_, _> = base_purls .into_iter() @@ -583,6 +586,7 @@ impl PurlService { let statuses_by_base = Self::fetch_vulnerability_statuses(&winner_base_ids, &winner_vp_ids, connection) + .instrument(info_span!("loading vulnerability statuses")) .await?; // Assemble recommendations from batched data From 87ac5cb3b5c846d4d7dbf129f113d0997bb1b9ee Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 8 Apr 2026 13:33:46 +0200 Subject: [PATCH 22/23] refactor: use borrowed references in PurlKey to avoid string allocations Change PurlKey fields from owned String to borrowed &str references, eliminating unnecessary string cloning for map lookups. The struct is now Copy since all fields are references. Implements TC-4017 --trailer="Assisted-by: Claude Code" Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 2ec53680b5b5e321837ded98a98a3cbbbace392b) --- modules/fundamental/src/purl/service/mod.rs | 54 ++++++++++----------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 24bce4f84..321945c74 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -33,18 +33,17 @@ use trustify_common::{ use trustify_entity::{ advisory, base_purl, license, purl_status, qualified_purl::{self, CanonicalPurl}, - remediation, remediation_purl_status, sbom_license_expanded, sbom_node, - sbom_node_purl_ref, sbom_package_license, status, version_range, versioned_purl, - vulnerability, + remediation, remediation_purl_status, sbom_license_expanded, sbom_node, sbom_node_purl_ref, + sbom_package_license, status, version_range, versioned_purl, vulnerability, }; use trustify_module_ingestor::common::Deprecation; /// Composite key identifying a base PURL by type, namespace, and name (without version). -#[derive(Clone, PartialEq, Eq, Hash)] -struct PurlKey { - ty: String, - namespace: Option, - name: String, +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +struct PurlKey<'a> { + ty: &'a str, + namespace: Option<&'a str>, + name: &'a str, } /// Vulnerability status record linking a vulnerability ID to its VEX status and remediations. @@ -64,28 +63,28 @@ struct Winner<'a> { base: &'a base_purl::Model, } -impl PurlKey { - fn from_purl(purl: &Purl) -> Self { +impl<'a> PurlKey<'a> { + fn from_purl(purl: &'a Purl) -> Self { Self { - ty: purl.ty.clone(), - namespace: purl.namespace.clone(), - name: purl.name.clone(), + ty: &purl.ty, + namespace: purl.namespace.as_deref(), + name: &purl.name, } } - fn from_base_purl(bp: &base_purl::Model) -> Self { + fn from_base_purl(bp: &'a base_purl::Model) -> Self { Self { - ty: bp.r#type.clone(), - namespace: bp.namespace.clone(), - name: bp.name.clone(), + ty: &bp.r#type, + namespace: bp.namespace.as_deref(), + name: &bp.name, } } fn as_condition(&self) -> Condition { let mut cond = Condition::all() - .add(base_purl::Column::Type.eq(&self.ty)) - .add(base_purl::Column::Name.eq(&self.name)); - if let Some(ns) = &self.namespace { + .add(base_purl::Column::Type.eq(self.ty)) + .add(base_purl::Column::Name.eq(self.name)); + if let Some(ns) = self.namespace { cond = cond.add(base_purl::Column::Namespace.eq(ns)); } else { cond = cond.add(base_purl::Column::Namespace.is_null()); @@ -539,12 +538,9 @@ impl PurlService { .instrument(info_span!("loading versioned purls")) .await?; - let base_purl_map: HashMap<_, _> = base_purls - .into_iter() - .map(|bp| { - let key = PurlKey::from_base_purl(&bp); - (key, bp) - }) + let base_purl_map: HashMap, &base_purl::Model> = base_purls + .iter() + .map(|bp| (PurlKey::from_base_purl(bp), bp)) .collect(); #[allow(clippy::unwrap_used)] @@ -552,9 +548,9 @@ impl PurlService { let mut winners = Vec::new(); - for ip in input_purls { + for ip in &input_purls { let key = PurlKey::from_purl(&ip.purl); - let Some(base) = base_purl_map.get(&key) else { + let Some(&base) = base_purl_map.get(&key) else { recommendations.insert(ip.purl.to_string(), Vec::new()); continue; }; @@ -749,7 +745,7 @@ impl PurlService { .iter() .filter_map(|ip| { let key = PurlKey::from_purl(&ip.purl); - seen_keys.insert(key.clone()).then_some(key) + seen_keys.insert(key).then_some(key) }) .collect(); From eb88de8e580a27fabfe42f83c6c436192fac39db Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Thu, 9 Apr 2026 12:06:13 +0200 Subject: [PATCH 23/23] refactor: fix instrumentation patterns per review feedback - Remove redundant .instrument() wrappers on self-calls that already have #[instrument] attributes - Use skip_all and err(level=INFO) per log_tracing.md conventions - Accept impl IntoIterator instead of &[Uuid] to avoid intermediate allocations - Add instrumentation spans to SeaORM load_one/load_many_to_many calls - Use HashMap<_, _> type inference for base_purl_map Co-Authored-By: Claude Opus 4.6 (cherry picked from commit 8533cde083b6e00916a84763ee6d318d80bedecf) --- modules/fundamental/src/purl/service/mod.rs | 168 +++++++++----------- 1 file changed, 72 insertions(+), 96 deletions(-) diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 321945c74..a2f6b19bd 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use crate::{ Error, - common::license_filtering::{LICENSE, build_license_filtering_with_clause}, + common::license_filtering::LICENSE, purl::model::{ RecommendEntry, VexStatus, VulnerabilityStatus, details::{ @@ -14,6 +14,7 @@ use crate::{ }, }, }; +use itertools::Itertools; use regex::Regex; use sea_orm::{ ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, LoaderTrait, @@ -126,6 +127,7 @@ impl PurlService { Self {} } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn purl_types( &self, connection: &C, @@ -151,6 +153,7 @@ impl PurlService { TypeSummary::from_names(&ecosystems, connection).await } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purls_by_type( &self, r#type: &str, @@ -171,6 +174,7 @@ impl PurlService { }) } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purl( &self, r#type: &str, @@ -197,6 +201,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn versioned_purl( &self, r#type: &str, @@ -228,6 +233,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purl_by_uuid( &self, base_purl_uuid: &Uuid, @@ -245,6 +251,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purl_by_purl( &self, purl: &Purl, @@ -269,6 +276,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn versioned_purl_by_uuid( &self, purl_version_uuid: &Uuid, @@ -286,6 +294,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn versioned_purl_by_purl( &self, purl: &Purl, @@ -320,6 +329,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn purl_by_purl( &self, purl: &Purl, @@ -377,7 +387,7 @@ impl PurlService { }) } - #[instrument(skip(self, connection), err)] + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn purls( &self, query: Query, @@ -405,23 +415,31 @@ impl PurlService { .get_constraint_for_field(LICENSE) .map(|constraint| q(&format!("{constraint}"))) { - #[derive(Debug, FromQueryResult)] - struct QualifiedPurlIdResult { - id: Uuid, - } - - // Build the CTEs for license filtering - let with_clause = build_license_filtering_with_clause(); + let base = || { + sbom_node_purl_ref::Entity::find() + .select_only() + .distinct() + .column(sbom_node_purl_ref::Column::QualifiedPurlId) + .join( + JoinType::InnerJoin, + sbom_node_purl_ref::Relation::Node.def(), + ) + .join( + JoinType::InnerJoin, + sbom_node::Relation::PackageLicense.def(), + ) + }; - let mut statement = sbom_package_purl_ref::Entity::find() - .distinct() - .select_only() - .column_as(sbom_package_purl_ref::Column::QualifiedPurlId, "id") + // Apply as subquery filter using UNION to allow index lookups instead of a full table scan + let mut spdx_select = base() + .join( + JoinType::InnerJoin, + sbom_package_license::Relation::SbomLicenseExpanded.def(), + ) .join( - JoinType::Join, - sbom_package_purl_ref::Relation::Package.def(), + JoinType::InnerJoin, + sbom_license_expanded::Relation::ExpandedLicense.def(), ) - .join(JoinType::Join, sbom_package::Relation::PackageLicense.def()) .filtering_with( license_query.clone(), Columns::default() @@ -430,54 +448,11 @@ impl PurlService { LICENSE => Some(format!("expanded_text{operator}{value}")), _ => None, }), - )? - .into_query(); - let x = statement - .join( - JoinType::Join, - Alias::new("expanded"), - Condition::all() - .add( - Expr::col(( - sbom_package_license::Entity, - sbom_package_license::Column::SbomId, - )) - .equals((Alias::new("expanded"), Alias::new("sbom_id"))), - ) - .add( - Expr::col(( - sbom_package_license::Entity, - sbom_package_license::Column::LicenseId, - )) - .equals((Alias::new("expanded"), Alias::new("license_id"))), - ), - ) - .to_owned(); - let main_query = x.with(with_clause); - let (sql, values) = main_query.build(PostgresQueryBuilder); - let qualified_purl_ids_filtered_by_license: Vec = - QualifiedPurlIdResult::find_by_statement(Statement::from_sql_and_values( - DbBackend::Postgres, - sql, - values, - )) - .all(connection) - .await? - .into_iter() - .map(|r| r.id) - .collect(); + )?; - let cyclonedx_subquery = sbom_package_purl_ref::Entity::find() - .distinct() - .select_only() - .column(sbom_package_purl_ref::Column::QualifiedPurlId) - .join( - JoinType::Join, - sbom_package_purl_ref::Relation::Package.def(), - ) - .join(JoinType::Join, sbom_package::Relation::PackageLicense.def()) + let cyclonedx_select = base() .join( - JoinType::Join, + JoinType::InnerJoin, sbom_package_license::Relation::License.def(), ) .filtering_with( @@ -488,21 +463,16 @@ impl PurlService { LICENSE => Some(format!("text{operator}{value}")), _ => None, }), - )? - .into_query(); - - // Combine SPDX and CycloneDX results - let combined_condition = Condition::any() - .add( - Expr::col((qualified_purl::Entity, qualified_purl::Column::Id)) - .eq(PgFunc::any(qualified_purl_ids_filtered_by_license)), - ) - .add(qualified_purl::Column::Id.in_subquery(cyclonedx_subquery)); - select = select.filter(combined_condition); + )?; + + QueryTrait::query(&mut spdx_select) + .union(UnionType::Distinct, cyclonedx_select.into_query()); + + select = + select.filter(qualified_purl::Column::Id.in_subquery(spdx_select.into_query())); } let limiter = select.limiting(connection, paginated.offset, paginated.limit); - let total = limiter.total().await?; Ok(PaginatedResults { @@ -524,9 +494,7 @@ impl PurlService { return Ok(recommendations); } - let base_purls = Self::fetch_base_purls(&input_purls, connection) - .instrument(info_span!("loading base purls")) - .await?; + let base_purls = Self::fetch_base_purls(&input_purls, connection).await?; if base_purls.is_empty() { for ip in &input_purls { recommendations.insert(ip.purl.to_string(), Vec::new()); @@ -534,11 +502,10 @@ impl PurlService { return Ok(recommendations); } - let versioned_by_base = Self::fetch_versioned_purls_by_base(&base_purls, connection) - .instrument(info_span!("loading versioned purls")) - .await?; + let versioned_by_base = + Self::fetch_versioned_purls_by_base(&base_purls, connection).await?; - let base_purl_map: HashMap, &base_purl::Model> = base_purls + let base_purl_map: HashMap<_, _> = base_purls .iter() .map(|bp| (PurlKey::from_base_purl(bp), bp)) .collect(); @@ -577,13 +544,12 @@ impl PurlService { } // Batch fetch vulnerability statuses and qualified PURLs for all winners - let winner_base_ids: Vec<_> = winners.iter().map(|w| w.base.id).unique().collect(); - let winner_vp_ids: Vec<_> = winners.iter().map(|w| w.versioned_purl.id).collect(); - - let statuses_by_base = - Self::fetch_vulnerability_statuses(&winner_base_ids, &winner_vp_ids, connection) - .instrument(info_span!("loading vulnerability statuses")) - .await?; + let statuses_by_base = Self::fetch_vulnerability_statuses( + winners.iter().map(|w| w.base.id).unique(), + winners.iter().map(|w| w.versioned_purl.id), + connection, + ) + .await?; // Assemble recommendations from batched data for winner in winners { @@ -596,15 +562,16 @@ impl PurlService { /// Batch-loads vulnerability statuses for the winning versioned PURLs, grouped by base PURL ID. /// Chunks by base PURL IDs to stay within Postgres bind parameter limits. - #[instrument(skip(winner_base_ids, winner_vp_ids, connection), err)] + #[instrument(skip_all, err(level = tracing::Level::INFO))] async fn fetch_vulnerability_statuses( - winner_base_ids: &[Uuid], - winner_vp_ids: &[Uuid], + winner_base_ids: impl IntoIterator, + winner_vp_ids: impl IntoIterator, connection: &C, ) -> Result>, Error> { let mut statuses_by_base: HashMap<_, Vec> = HashMap::new(); + let winner_vp_ids: Vec<_> = winner_vp_ids.into_iter().collect(); - let base_chunks = chunked_with(1, winner_base_ids.iter().copied()); + let base_chunks = chunked_with(1, winner_base_ids.into_iter()); for base_chunk in &base_chunks { let base_chunk: Vec<_> = base_chunk.collect(); let all_statuses = purl_status::Entity::find() @@ -629,13 +596,21 @@ impl PurlService { .arg(Expr::col((version_range::Entity, Asterisk))), )) .all(connection) + .instrument(info_span!("querying purl statuses")) .await?; let vulns = all_statuses .load_one(vulnerability::Entity, connection) + .instrument(info_span!("loading vulnerabilities")) + .await?; + let advisories_loaded = all_statuses + .load_one(advisory::Entity, connection) + .instrument(info_span!("loading advisories")) + .await?; + let status_models = all_statuses + .load_one(status::Entity, connection) + .instrument(info_span!("loading statuses")) .await?; - let advisories_loaded = all_statuses.load_one(advisory::Entity, connection).await?; - let status_models = all_statuses.load_one(status::Entity, connection).await?; let status_slug_map: HashMap<_, _> = status_models .into_iter() .flatten() @@ -647,6 +622,7 @@ impl PurlService { remediation_purl_status::Entity, connection, ) + .instrument(info_span!("loading remediations")) .await?; for (((vuln, advisory), ps), rems) in vulns @@ -735,7 +711,7 @@ impl PurlService { /// Batch-fetches base PURL entities for the deduplicated set of input PURLs. /// Chunks the OR conditions to stay within Postgres bind parameter limits. - #[instrument(skip(input_purls, connection), err)] + #[instrument(skip_all, err(level = tracing::Level::INFO))] async fn fetch_base_purls( input_purls: &[InputPurl], connection: &C, @@ -767,7 +743,7 @@ impl PurlService { /// Loads all versioned PURLs for the given base PURLs, grouped by base PURL ID. /// Chunks the IN clause to stay within Postgres bind parameter limits. - #[instrument(skip(base_purls, connection), err)] + #[instrument(skip_all, err(level = tracing::Level::INFO))] async fn fetch_versioned_purls_by_base( base_purls: &[base_purl::Model], connection: &C,