diff --git a/modules/fundamental/src/purl/endpoints/test.rs b/modules/fundamental/src/purl/endpoints/test.rs index 69604bed2..cb0cd3b91 100644 --- a/modules/fundamental/src/purl/endpoints/test.rs +++ b/modules/fundamental/src/purl/endpoints/test.rs @@ -3,6 +3,7 @@ use crate::purl::model::summary::base_purl::BasePurlSummary; use crate::purl::model::summary::purl::PurlSummary; use crate::test::caller; use actix_web::test::TestRequest; +use rstest::rstest; use serde_json::{Value, json}; use std::str::FromStr; use test_context::test_context; @@ -15,6 +16,16 @@ use trustify_test_context::{TrustifyContext, call::CallService, subset::Contains use urlencoding::encode; use uuid::Uuid; +async fn recommend(app: &impl CallService, purls: &[&str]) -> Value { + app.call_and_read_body_json( + TestRequest::post() + .uri("/api/v2/purl/recommend") + .set_json(json!({ "purls": purls })) + .to_request(), + ) + .await +} + async fn setup(db: &Database, graph: &Graph) -> Result<(), anyhow::Error> { let log4j = graph .ingest_package(&Purl::from_str("pkg:maven/org.apache/log4j")?, db) @@ -294,9 +305,11 @@ async fn test_purl_license_details(ctx: &TrustifyContext) -> Result<(), anyhow:: Ok(()) } +/// Verifies that duplicate input PURLs are deduplicated and recommendations include the correct upgraded package and vulnerabilities. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given advisories for multiple CVEs are ingested ctx.ingest_documents([ "cve/CVE-2022-45787.json", "cve/CVE-2023-28867.json", @@ -304,18 +317,20 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> ]) .await?; + // When requesting recommendations for a duplicated PURL let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:maven/jakarta.el/jakarta.el-api@3.0.3", "pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]})) - .to_request(), - ) - .await; + let recommendations = recommend( + &app, + &[ + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + ], + ) + .await; log::info!("{recommendations:#?}"); + // Then a single recommendation entry is returned with both CVEs assert_eq!( recommendations["recommendations"] .as_object() @@ -335,7 +350,7 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> ); assert_eq!( recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"][0]["package"], - "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002?repository_url=https://maven.repository.redhat.com/ga/&type=jar", + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002", ); let mut cves = recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"] @@ -351,24 +366,21 @@ async fn get_recommendations(ctx: &TrustifyContext) -> Result<(), anyhow::Error> Ok(()) } +/// Verifies that PURLs without a version produce no recommendations. #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn get_recommendations_no_version(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given advisories are ingested ctx.ingest_documents(["cve/CVE-2022-45787.json", "cve/CVE-2023-28867.json"]) .await?; + // When requesting recommendations for a PURL without a version let app = caller(ctx).await?; - let recommendations: Value = app - .call_and_read_body_json( - TestRequest::post() - .uri("/api/v2/purl/recommend") - .set_json(json!({"purls": ["pkg:maven/jakarta.el/jakarta.el-api"]})) - .to_request(), - ) - .await; + let recommendations = recommend(&app, &["pkg:maven/jakarta.el/jakarta.el-api"]).await; log::info!("{recommendations:#?}"); + // Then no recommendations are returned assert_eq!( recommendations["recommendations"] .as_object() @@ -379,3 +391,297 @@ async fn get_recommendations_no_version(ctx: &TrustifyContext) -> Result<(), any Ok(()) } + +/// Verifies that duplicate advisories for the same CVE produce a single vulnerability entry. +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_dedup(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given a Red Hat package and two duplicate advisories for the same CVE + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, + &ctx.db, + ) + .await?; + + ctx.ingest_documents([ + "osv/RUSTSEC-2021-0079.json", + "osv/RUSTSEC-2021-0079-DUPLICATE.json", + ]) + .await?; + + // When requesting recommendations + let app = caller(ctx).await?; + let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; + + log::info!("{recommendations:#?}"); + + // Then the recommendation contains a single deduplicated vulnerability + let entry = + &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; + assert_eq!(entry["vulnerabilities"].as_array().unwrap().len(), 1); + assert_eq!( + entry["vulnerabilities"].as_array().unwrap()[0]["id"] + .as_str() + .unwrap(), + "CVE-2021-32714" + ); + + Ok(()) +} + +/// Verifies that a custom vulnerability status is reflected in the recommendation response. +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_other_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set}; + use trustify_entity::{purl_status, status}; + + // Given a package with a vulnerability whose status is overridden to a custom value + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, + &ctx.db, + ) + .await?; + + ctx.ingest_documents(["osv/RUSTSEC-2021-0079.json"]).await?; + + let custom_status_id = Uuid::new_v4(); + let custom_status = status::ActiveModel { + id: Set(custom_status_id), + slug: Set("custom_status".to_string()), + name: Set("Custom Status".to_string()), + description: Set(Some("A custom status for testing".to_string())), + }; + status::Entity::insert(custom_status).exec(&ctx.db).await?; + + let purl_statuses = purl_status::Entity::find() + .filter(purl_status::Column::VulnerabilityId.eq("CVE-2021-32714")) + .all(&ctx.db) + .await?; + + assert!(!purl_statuses.is_empty()); + + for ps in purl_statuses { + let mut active: purl_status::ActiveModel = ps.into(); + active.status_id = Set(custom_status_id); + active.update(&ctx.db).await?; + } + + // When requesting recommendations + let app = caller(ctx).await?; + let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; + + log::info!("{recommendations:#?}"); + + // Then the vulnerability status reflects the custom status + let entry = + &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; + let vulns = entry["vulnerabilities"].as_array().unwrap(); + let vuln = vulns + .iter() + .find(|v| v["id"].as_str().unwrap() == "CVE-2021-32714") + .unwrap(); + + assert_eq!(vuln["status"], "custom_status"); + + Ok(()) +} + +/// Verifies that PURLs with no matching base package or an unparseable version produce the expected empty response. +#[test_context(TrustifyContext)] +#[rstest] +#[case::unknown_purl( + "pkg:maven/com.example/nonexistent@1.0.0", + json!({"pkg:maven/com.example/nonexistent@1.0.0": []}) +)] +#[case::invalid_version( + "pkg:maven/jakarta.el/jakarta.el-api@not-a-version", + json!({}) +)] +#[test_log::test(actix_web::test)] +async fn get_recommendations_no_match( + ctx: &TrustifyContext, + #[case] purl: &str, + #[case] expected: Value, +) -> Result<(), anyhow::Error> { + // Given an advisory is ingested + ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + + // When requesting recommendations for a non-matching PURL + let app = caller(ctx).await?; + let recommendations = recommend(&app, &[purl]).await; + + // Then the response matches the expected empty result + assert_eq!(recommendations["recommendations"], expected); + + Ok(()) +} + +/// Verifies that recommendations work for PURLs without a namespace (e.g., cargo packages). +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_no_namespace(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given a Red Hat package with no namespace + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/serde@1.0.0-redhat-00001")?, + &ctx.db, + ) + .await?; + + // When requesting recommendations + let app = caller(ctx).await?; + let recommendations = recommend(&app, &["pkg:cargo/serde@1.0.0"]).await; + + // Then the recommendation returns the Red Hat package + assert_eq!( + recommendations["recommendations"], + json!({ + "pkg:cargo/serde@1.0.0": [{ + "package": "pkg:cargo/serde@1.0.0-redhat-00001", + "vulnerabilities": [] + }] + }) + ); + + Ok(()) +} + +/// Verifies correct handling of mixed input: a known PURL, an unknown PURL, and a versionless PURL. +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_mixed(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Given an advisory is ingested + ctx.ingest_documents(["cve/CVE-2022-45787.json"]).await?; + + // When requesting recommendations for a mix of known, unknown, and versionless PURLs + let app = caller(ctx).await?; + let recommendations = recommend( + &app, + &[ + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3", + "pkg:maven/com.example/nonexistent@1.0.0", + "pkg:maven/jakarta.el/jakarta.el-api", + ], + ) + .await; + + // Then known PURLs get recommendations and unknown PURLs get empty arrays + let entry = &recommendations["recommendations"]["pkg:maven/jakarta.el/jakarta.el-api@3.0.3"]; + assert_eq!( + entry[0]["package"], + "pkg:maven/jakarta.el/jakarta.el-api@3.0.3.redhat-00002" + ); + assert_eq!( + entry[0]["vulnerabilities"], + json!([{"id": "CVE-2022-45787", "status": "NotAffected", "remediations": []}]) + ); + assert_eq!( + recommendations["recommendations"]["pkg:maven/com.example/nonexistent@1.0.0"], + json!([]) + ); + + Ok(()) +} + +/// Verifies that the versioned PURL (without qualifiers) is returned as the package string when no qualified PURL exists. +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_fallback_package_str( + ctx: &TrustifyContext, +) -> Result<(), anyhow::Error> { + // Given a versioned PURL without a qualified PURL + let base = ctx + .graph + .ingest_package(&Purl::from_str("pkg:cargo/tokio")?, &ctx.db) + .await?; + base.ingest_package_version( + &Purl::from_str("pkg:cargo/tokio@1.0.0-redhat-00001")?, + &ctx.db, + ) + .await?; + + // When requesting recommendations + let app = caller(ctx).await?; + let recommendations = recommend(&app, &["pkg:cargo/tokio@1.0.0"]).await; + + // Then the versioned PURL is returned as the package string + let recs = recommendations["recommendations"].as_object().unwrap(); + assert_eq!(recs.len(), 1); + + let rec_list = recs["pkg:cargo/tokio@1.0.0"].as_array().unwrap(); + assert_eq!(rec_list.len(), 1); + + let package = rec_list[0]["package"].as_str().unwrap(); + assert_eq!(package, "pkg:cargo/tokio@1.0.0-redhat-00001"); + + Ok(()) +} + +/// Verifies that a "fixed" vulnerability status maps to the Fixed VexStatus and uses the status name in the response. +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn get_recommendations_fixed_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set}; + use trustify_entity::{purl_status, status}; + + // Given a package with a vulnerability whose status is set to "fixed" + ctx.graph + .ingest_qualified_package( + &Purl::from_str("pkg:cargo/hyper@0.14.1-redhat-00001")?, + &ctx.db, + ) + .await?; + + ctx.ingest_documents(["osv/RUSTSEC-2021-0079.json"]).await?; + + let fixed_status = status::Entity::find() + .filter(status::Column::Slug.eq("fixed")) + .one(&ctx.db) + .await?; + + let status_id = if let Some(s) = fixed_status { + s.id + } else { + let id = Uuid::new_v4(); + let new_status = status::ActiveModel { + id: Set(id), + slug: Set("fixed".to_string()), + name: Set("Fixed".to_string()), + description: Set(Some("Vulnerability has been fixed".to_string())), + }; + status::Entity::insert(new_status).exec(&ctx.db).await?; + id + }; + + let purl_statuses = purl_status::Entity::find() + .filter(purl_status::Column::VulnerabilityId.eq("CVE-2021-32714")) + .all(&ctx.db) + .await?; + + for ps in purl_statuses { + let mut active: purl_status::ActiveModel = ps.into(); + active.status_id = Set(status_id); + active.update(&ctx.db).await?; + } + + // When requesting recommendations + let app = caller(ctx).await?; + let recommendations = recommend(&app, &["pkg:cargo/hyper@0.14.1"]).await; + + // Then the vulnerability status is reported as "Fixed" + let entry = + &recommendations["recommendations"].as_object().unwrap()["pkg:cargo/hyper@0.14.1"][0]; + let vuln = entry["vulnerabilities"] + .as_array() + .unwrap() + .iter() + .find(|v| v["id"].as_str().unwrap() == "CVE-2021-32714") + .unwrap(); + + assert_eq!(vuln["status"], "Fixed"); + + Ok(()) +} diff --git a/modules/fundamental/src/purl/service/mod.rs b/modules/fundamental/src/purl/service/mod.rs index 7750aad2d..a2f6b19bd 100644 --- a/modules/fundamental/src/purl/service/mod.rs +++ b/modules/fundamental/src/purl/service/mod.rs @@ -1,27 +1,30 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use crate::{ Error, - common::license_filtering::{LICENSE, build_license_filtering_with_clause}, + common::license_filtering::LICENSE, purl::model::{ - RecommendEntry, VulnerabilityStatus, + RecommendEntry, VexStatus, VulnerabilityStatus, details::{ base_purl::BasePurlDetails, purl::PurlDetails, versioned_purl::VersionedPurlDetails, }, - summary::{base_purl::BasePurlSummary, purl::PurlSummary, r#type::TypeSummary}, + summary::{ + base_purl::BasePurlSummary, purl::PurlSummary, remediation::RemediationSummary, + r#type::TypeSummary, + }, }, }; +use itertools::Itertools; use regex::Regex; use sea_orm::{ - ColumnTrait, ConnectionTrait, DbBackend, EntityTrait, FromQueryResult, QueryFilter, QueryOrder, - QuerySelect, QueryTrait, RelationTrait, Statement, prelude::Uuid, -}; -use sea_query::{ - Alias, ColumnType, Condition, Expr, JoinType, Order, PgFunc, PostgresQueryBuilder, + ColumnTrait, Condition, ConnectionTrait, EntityTrait, FromQueryResult, LoaderTrait, + QueryFilter, QueryOrder, QuerySelect, QueryTrait, RelationTrait, prelude::Uuid, }; -use tracing::instrument; +use sea_query::{Asterisk, ColumnType, Expr, Func, JoinType, Order, SimpleExpr, UnionType}; +use tracing::{Instrument, info_span, instrument}; use trustify_common::{ db::{ + chunk::chunked_with, limiter::LimiterTrait, query::{Columns, Filtering, IntoColumns, Query, q}, }, @@ -29,12 +32,93 @@ use trustify_common::{ purl::{Purl, PurlErr}, }; use trustify_entity::{ - base_purl, license, + advisory, base_purl, license, purl_status, qualified_purl::{self, CanonicalPurl}, - sbom_package, sbom_package_license, sbom_package_purl_ref, versioned_purl, + remediation, remediation_purl_status, sbom_license_expanded, sbom_node, sbom_node_purl_ref, + sbom_package_license, status, version_range, versioned_purl, vulnerability, }; use trustify_module_ingestor::common::Deprecation; +/// Composite key identifying a base PURL by type, namespace, and name (without version). +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +struct PurlKey<'a> { + ty: &'a str, + namespace: Option<&'a str>, + name: &'a str, +} + +/// Vulnerability status record linking a vulnerability ID to its VEX status and remediations. +struct StatusInfo { + vuln_id: String, + status_slug: String, + remediations: Vec, + /// The most recent date from the advisory that reported this status, used to pick the + /// latest assessment when the same vulnerability appears in multiple advisories. + advisory_date: Option, +} + +/// The highest Red Hat patch version selected for a given input PURL, used to build the recommendation. +struct Winner<'a> { + purl_string: String, + versioned_purl: &'a versioned_purl::Model, + base: &'a base_purl::Model, +} + +impl<'a> PurlKey<'a> { + fn from_purl(purl: &'a Purl) -> Self { + Self { + ty: &purl.ty, + namespace: purl.namespace.as_deref(), + name: &purl.name, + } + } + + fn from_base_purl(bp: &'a base_purl::Model) -> Self { + Self { + ty: &bp.r#type, + namespace: bp.namespace.as_deref(), + name: &bp.name, + } + } + + fn as_condition(&self) -> Condition { + let mut cond = Condition::all() + .add(base_purl::Column::Type.eq(self.ty)) + .add(base_purl::Column::Name.eq(self.name)); + if let Some(ns) = self.namespace { + cond = cond.add(base_purl::Column::Namespace.eq(ns)); + } else { + cond = cond.add(base_purl::Column::Namespace.is_null()); + } + cond + } +} + +/// A user-supplied PURL paired with its parsed semver version for version comparison. +struct InputPurl { + purl: Purl, + input_version: semver::Version, +} + +impl InputPurl { + fn try_from_purl(purl: &Purl) -> Option { + let input_version_str = purl.version.as_ref()?; + let input_version = lenient_semver::parse(input_version_str) + .inspect_err(|_| { + log::debug!( + "input purl {} version {:?} failed to parse", + purl, + input_version_str + ); + }) + .ok()?; + Some(Self { + purl: purl.clone(), + input_version, + }) + } +} + #[derive(Default)] pub struct PurlService {} @@ -43,6 +127,7 @@ impl PurlService { Self {} } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn purl_types( &self, connection: &C, @@ -68,6 +153,7 @@ impl PurlService { TypeSummary::from_names(&ecosystems, connection).await } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purls_by_type( &self, r#type: &str, @@ -88,6 +174,7 @@ impl PurlService { }) } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purl( &self, r#type: &str, @@ -114,6 +201,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn versioned_purl( &self, r#type: &str, @@ -145,6 +233,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purl_by_uuid( &self, base_purl_uuid: &Uuid, @@ -162,6 +251,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn base_purl_by_purl( &self, purl: &Purl, @@ -186,6 +276,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn versioned_purl_by_uuid( &self, purl_version_uuid: &Uuid, @@ -203,6 +294,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn versioned_purl_by_purl( &self, purl: &Purl, @@ -237,6 +329,7 @@ impl PurlService { } } + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn purl_by_purl( &self, purl: &Purl, @@ -294,7 +387,7 @@ impl PurlService { }) } - #[instrument(skip(self, connection), err)] + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] pub async fn purls( &self, query: Query, @@ -322,23 +415,31 @@ impl PurlService { .get_constraint_for_field(LICENSE) .map(|constraint| q(&format!("{constraint}"))) { - #[derive(Debug, FromQueryResult)] - struct QualifiedPurlIdResult { - id: Uuid, - } - - // Build the CTEs for license filtering - let with_clause = build_license_filtering_with_clause(); + let base = || { + sbom_node_purl_ref::Entity::find() + .select_only() + .distinct() + .column(sbom_node_purl_ref::Column::QualifiedPurlId) + .join( + JoinType::InnerJoin, + sbom_node_purl_ref::Relation::Node.def(), + ) + .join( + JoinType::InnerJoin, + sbom_node::Relation::PackageLicense.def(), + ) + }; - let mut statement = sbom_package_purl_ref::Entity::find() - .distinct() - .select_only() - .column_as(sbom_package_purl_ref::Column::QualifiedPurlId, "id") + // Apply as subquery filter using UNION to allow index lookups instead of a full table scan + let mut spdx_select = base() .join( - JoinType::Join, - sbom_package_purl_ref::Relation::Package.def(), + JoinType::InnerJoin, + sbom_package_license::Relation::SbomLicenseExpanded.def(), + ) + .join( + JoinType::InnerJoin, + sbom_license_expanded::Relation::ExpandedLicense.def(), ) - .join(JoinType::Join, sbom_package::Relation::PackageLicense.def()) .filtering_with( license_query.clone(), Columns::default() @@ -347,54 +448,11 @@ impl PurlService { LICENSE => Some(format!("expanded_text{operator}{value}")), _ => None, }), - )? - .into_query(); - let x = statement - .join( - JoinType::Join, - Alias::new("expanded"), - Condition::all() - .add( - Expr::col(( - sbom_package_license::Entity, - sbom_package_license::Column::SbomId, - )) - .equals((Alias::new("expanded"), Alias::new("sbom_id"))), - ) - .add( - Expr::col(( - sbom_package_license::Entity, - sbom_package_license::Column::LicenseId, - )) - .equals((Alias::new("expanded"), Alias::new("license_id"))), - ), - ) - .to_owned(); - let main_query = x.with(with_clause); - let (sql, values) = main_query.build(PostgresQueryBuilder); - let qualified_purl_ids_filtered_by_license: Vec = - QualifiedPurlIdResult::find_by_statement(Statement::from_sql_and_values( - DbBackend::Postgres, - sql, - values, - )) - .all(connection) - .await? - .into_iter() - .map(|r| r.id) - .collect(); + )?; - let cyclonedx_subquery = sbom_package_purl_ref::Entity::find() - .distinct() - .select_only() - .column(sbom_package_purl_ref::Column::QualifiedPurlId) + let cyclonedx_select = base() .join( - JoinType::Join, - sbom_package_purl_ref::Relation::Package.def(), - ) - .join(JoinType::Join, sbom_package::Relation::PackageLicense.def()) - .join( - JoinType::Join, + JoinType::InnerJoin, sbom_package_license::Relation::License.def(), ) .filtering_with( @@ -405,21 +463,16 @@ impl PurlService { LICENSE => Some(format!("text{operator}{value}")), _ => None, }), - )? - .into_query(); - - // Combine SPDX and CycloneDX results - let combined_condition = Condition::any() - .add( - Expr::col((qualified_purl::Entity, qualified_purl::Column::Id)) - .eq(PgFunc::any(qualified_purl_ids_filtered_by_license)), - ) - .add(qualified_purl::Column::Id.in_subquery(cyclonedx_subquery)); - select = select.filter(combined_condition); + )?; + + QueryTrait::query(&mut spdx_select) + .union(UnionType::Distinct, cyclonedx_select.into_query()); + + select = + select.filter(qualified_purl::Column::Id.in_subquery(spdx_select.into_query())); } let limiter = select.limiting(connection, paginated.offset, paginated.limit); - let total = limiter.total().await?; Ok(PaginatedResults { @@ -434,96 +487,308 @@ impl PurlService { purls: &[Purl], connection: &C, ) -> Result>, Error> { - let mut recommendations = HashMap::new(); + let mut recommendations = HashMap::with_capacity(purls.len()); + + let input_purls: Vec<_> = purls.iter().filter_map(InputPurl::try_from_purl).collect(); + if input_purls.is_empty() { + return Ok(recommendations); + } + + let base_purls = Self::fetch_base_purls(&input_purls, connection).await?; + if base_purls.is_empty() { + for ip in &input_purls { + recommendations.insert(ip.purl.to_string(), Vec::new()); + } + return Ok(recommendations); + } + + let versioned_by_base = + Self::fetch_versioned_purls_by_base(&base_purls, connection).await?; + + let base_purl_map: HashMap<_, _> = base_purls + .iter() + .map(|bp| (PurlKey::from_base_purl(bp), bp)) + .collect(); #[allow(clippy::unwrap_used)] let pattern = Regex::new("redhat-[0-9]+$").unwrap(); - for purl in purls { - let query = match purl.to_string().split_once('@') { - Some((p, _)) => format!("purl~{p}"), - None => format!("purl~{purl}"), - }; - let summaries = self - .purls(q(&query), Default::default(), connection) - .await?; + let mut winners = Vec::new(); - let Some(ref input_version_str) = purl.version else { - continue; - }; - let Ok(input_version) = lenient_semver::parse(input_version_str) else { - log::debug!( - "input purl {} version {:?} failed to parse", - purl, - input_version_str - ); + for ip in &input_purls { + let key = PurlKey::from_purl(&ip.purl); + let Some(&base) = base_purl_map.get(&key) else { + recommendations.insert(ip.purl.to_string(), Vec::new()); continue; }; - let highest_patch = summaries - .items + let highest = Self::find_highest_redhat_patch( + &pattern, + &ip.input_version, + versioned_by_base.get(&base.id), + ); + + if let Some(winner_vp) = highest { + winners.push(Winner { + purl_string: ip.purl.to_string(), + versioned_purl: winner_vp, + base, + }); + } else { + recommendations.insert(ip.purl.to_string(), Vec::new()); + } + } + + if winners.is_empty() { + return Ok(recommendations); + } + + // Batch fetch vulnerability statuses and qualified PURLs for all winners + let statuses_by_base = Self::fetch_vulnerability_statuses( + winners.iter().map(|w| w.base.id).unique(), + winners.iter().map(|w| w.versioned_purl.id), + connection, + ) + .await?; + + // Assemble recommendations from batched data + for winner in winners { + let entry = Self::assemble_recommend_entry(&winner, &statuses_by_base); + recommendations.insert(winner.purl_string, vec![entry]); + } + + Ok(recommendations) + } + + /// Batch-loads vulnerability statuses for the winning versioned PURLs, grouped by base PURL ID. + /// Chunks by base PURL IDs to stay within Postgres bind parameter limits. + #[instrument(skip_all, err(level = tracing::Level::INFO))] + async fn fetch_vulnerability_statuses( + winner_base_ids: impl IntoIterator, + winner_vp_ids: impl IntoIterator, + connection: &C, + ) -> Result>, Error> { + let mut statuses_by_base: HashMap<_, Vec> = HashMap::new(); + let winner_vp_ids: Vec<_> = winner_vp_ids.into_iter().collect(); + + let base_chunks = chunked_with(1, winner_base_ids.into_iter()); + for base_chunk in &base_chunks { + let base_chunk: Vec<_> = base_chunk.collect(); + let all_statuses = purl_status::Entity::find() + .columns([ + version_range::Column::Id, + version_range::Column::LowVersion, + version_range::Column::LowInclusive, + version_range::Column::HighVersion, + version_range::Column::HighInclusive, + ]) + .left_join(base_purl::Entity) + .join( + JoinType::LeftJoin, + base_purl::Relation::VersionedPurls.def(), + ) + .left_join(version_range::Entity) + .filter(purl_status::Column::BasePurlId.is_in(base_chunk)) + .filter(versioned_purl::Column::Id.is_in(winner_vp_ids.iter().copied())) + .filter(SimpleExpr::FunctionCall( + Func::cust(trustify_common::db::VersionMatches) + .arg(Expr::col(versioned_purl::Column::Version)) + .arg(Expr::col((version_range::Entity, Asterisk))), + )) + .all(connection) + .instrument(info_span!("querying purl statuses")) + .await?; + + let vulns = all_statuses + .load_one(vulnerability::Entity, connection) + .instrument(info_span!("loading vulnerabilities")) + .await?; + let advisories_loaded = all_statuses + .load_one(advisory::Entity, connection) + .instrument(info_span!("loading advisories")) + .await?; + let status_models = all_statuses + .load_one(status::Entity, connection) + .instrument(info_span!("loading statuses")) + .await?; + let status_slug_map: HashMap<_, _> = status_models .into_iter() - .fold( - None, - |acc: Option<(PurlSummary, semver::Version)>, summary: PurlSummary| { - summary - .head - .purl - .version - .as_ref() - .filter(|version| pattern.is_match(version)) - .and_then(|version| { - lenient_semver::parse(version) - .inspect_err(|_| { - log::debug!( - "purl {} version {:?} failed to parse", - summary.head.purl, - summary.head.purl.version - ) - }) - .ok() - }) - .filter(|version| { - version.major == input_version.major - && version.minor == input_version.minor - && version.patch == input_version.patch - }) - .and_then(|version| match &acc { - Some((_, v)) if version.pre > v.pre => Some((summary, version)), - None => Some((summary, version)), - _ => None, - }) - .or(acc) - }, + .flatten() + .map(|s| (s.id, s.slug)) + .collect(); + let remediations = all_statuses + .load_many_to_many( + remediation::Entity, + remediation_purl_status::Entity, + connection, ) - .map(|(summary, _)| summary); + .instrument(info_span!("loading remediations")) + .await?; - let mut recommended_purls = Vec::new(); - if let Some(highest) = highest_patch - && let Some(purl_details) = self - .versioned_purl_by_uuid(&highest.head.purl.version_uuid(), connection) - .await? + for (((vuln, advisory), ps), rems) in vulns + .into_iter() + .zip(advisories_loaded) + .zip(all_statuses) + .zip(remediations) { - recommended_purls.push(RecommendEntry { - package: highest.head.purl.to_string(), - vulnerabilities: purl_details - .advisories - .iter() - .flat_map(|advisory| { - advisory.status.iter().map(|status| VulnerabilityStatus { - id: status.vulnerability.identifier.clone(), - status: Some(status.into()), - justification: None, - }) - }) - .collect(), - }); + if let (Some(v), Some(advisory)) = (vuln, advisory) { + let slug = status_slug_map + .get(&ps.status_id) + .cloned() + .unwrap_or_else(|| "unknown".to_string()); + statuses_by_base + .entry(ps.base_purl_id) + .or_default() + .push(StatusInfo { + vuln_id: v.id, + status_slug: slug, + remediations: rems, + advisory_date: advisory.modified.or(advisory.published), + }); + } } + } + + Ok(statuses_by_base) + } + + /// Builds a single recommendation entry from a winner and its vulnerability statuses. + /// + /// Returns the versioned PURL (without qualifiers) as the recommended package string. + /// Qualifiers are context-dependent (arch, repository_url, type) and the system cannot + /// know which qualifiers match the caller's environment. + fn assemble_recommend_entry( + winner: &Winner<'_>, + statuses_by_base: &HashMap>, + ) -> RecommendEntry { + let package_str = Purl { + ty: winner.base.r#type.clone(), + namespace: winner.base.namespace.clone(), + name: winner.base.name.clone(), + version: Some(winner.versioned_purl.version.clone()), + qualifiers: Default::default(), + } + .to_string(); + + // When the same vulnerability appears in multiple advisories with different statuses, + // keep the one from the most recent advisory so that newer assessments (e.g. "fixed") + // take precedence over older ones (e.g. "affected"). + let mut best_by_vuln: HashMap<&str, &StatusInfo> = HashMap::new(); + for info in statuses_by_base.get(&winner.base.id).into_iter().flatten() { + best_by_vuln + .entry(&info.vuln_id) + .and_modify(|existing| { + if info.advisory_date > existing.advisory_date { + *existing = info; + } + }) + .or_insert(info); + } - recommendations.insert(purl.to_string(), recommended_purls); + RecommendEntry { + package: package_str, + vulnerabilities: best_by_vuln + .into_values() + .map(|info| { + let vex_status = match info.status_slug.as_str() { + "affected" => VexStatus::Affected, + "fixed" => VexStatus::Fixed, + "not_affected" => VexStatus::NotAffected, + "under_investigation" => VexStatus::UnderInvestigation, + "recommended" => VexStatus::Recommended, + other => VexStatus::Other(other.to_string()), + }; + VulnerabilityStatus { + id: info.vuln_id.clone(), + status: Some(vex_status), + justification: None, + remediations: RemediationSummary::from_entities(&info.remediations), + } + }) + .collect(), } + } - Ok(recommendations) + /// Batch-fetches base PURL entities for the deduplicated set of input PURLs. + /// Chunks the OR conditions to stay within Postgres bind parameter limits. + #[instrument(skip_all, err(level = tracing::Level::INFO))] + async fn fetch_base_purls( + input_purls: &[InputPurl], + connection: &C, + ) -> Result, Error> { + let mut seen_keys = HashSet::new(); + let unique_keys: Vec<_> = input_purls + .iter() + .filter_map(|ip| { + let key = PurlKey::from_purl(&ip.purl); + seen_keys.insert(key).then_some(key) + }) + .collect(); + + let mut results = Vec::new(); + let key_chunks = chunked_with(3, unique_keys.into_iter()); + for chunk in &key_chunks { + let chunk: Vec<_> = chunk.collect(); + let condition = chunk + .iter() + .fold(Condition::any(), |cond, key| cond.add(key.as_condition())); + let batch = base_purl::Entity::find() + .filter(condition) + .all(connection) + .await?; + results.extend(batch); + } + Ok(results) + } + + /// Loads all versioned PURLs for the given base PURLs, grouped by base PURL ID. + /// Chunks the IN clause to stay within Postgres bind parameter limits. + #[instrument(skip_all, err(level = tracing::Level::INFO))] + async fn fetch_versioned_purls_by_base( + base_purls: &[base_purl::Model], + connection: &C, + ) -> Result>, Error> { + let base_purl_ids: Vec<_> = base_purls.iter().map(|bp| bp.id).collect(); + + let mut by_base: HashMap<_, Vec<_>> = HashMap::new(); + let id_chunks = chunked_with(1, base_purl_ids.into_iter()); + for chunk in &id_chunks { + let chunk: Vec<_> = chunk.collect(); + let batch = versioned_purl::Entity::find() + .filter(versioned_purl::Column::BasePurlId.is_in(chunk)) + .all(connection) + .await?; + for vp in batch { + by_base.entry(vp.base_purl_id).or_default().push(vp); + } + } + Ok(by_base) + } + + /// Selects the versioned PURL with the highest Red Hat pre-release suffix matching the input version. + fn find_highest_redhat_patch<'a>( + pattern: &Regex, + input_version: &semver::Version, + versioned_purls: Option<&'a Vec>, + ) -> Option<&'a versioned_purl::Model> { + versioned_purls? + .iter() + .filter(|vp| pattern.is_match(&vp.version)) + .filter_map(|vp| { + lenient_semver::parse(&vp.version) + .inspect_err(|_| { + log::debug!("purl version {:?} failed to parse", vp.version); + }) + .ok() + .map(|v| (vp, v)) + }) + .filter(|(_, version)| { + version.major == input_version.major + && version.minor == input_version.minor + && version.patch == input_version.patch + }) + .max_by(|(_, a), (_, b)| a.pre.cmp(&b.pre)) + .map(|(vp, _)| vp) } }