diff --git a/Cargo.lock b/Cargo.lock index 426cc051a..36300a032 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8478,6 +8478,7 @@ dependencies = [ "serde_json", "thiserror 2.0.18", "tokio", + "trustify-module-fundamental", "wiremock", ] @@ -8906,6 +8907,7 @@ dependencies = [ "log", "rand 0.10.1", "rstest", + "serde", "serde_json", "sha2 0.11.0", "strum 0.28.0", @@ -8918,6 +8920,7 @@ dependencies = [ "tracing", "trustify-common", "urlencoding", + "utoipa", "uuid", ] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index eabb2215d..49508a959 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -11,6 +11,8 @@ name = "trustify" path = "src/main.rs" [dependencies] +trustify-module-fundamental = { workspace = true } + anyhow = { workspace = true } clap = { workspace = true, features = ["derive", "env"] } dotenvy = { workspace = true } diff --git a/cli/src/api/client.rs b/cli/src/api/client.rs index b6add8448..9f9f3565c 100644 --- a/cli/src/api/client.rs +++ b/cli/src/api/client.rs @@ -163,6 +163,21 @@ impl ApiClient { .await } + /// Perform a DELETE request with query parameters and retry logic + pub async fn delete_with_query( + &self, + path: &str, + query: &T, + ) -> Result { + self.execute_with_retry(|| async { + let url = self.url(path); + let request = self.client.delete(&url).query(query); + let response = self.authorize(request).await.send().await?; + self.handle_response(response).await + }) + .await + } + /// Execute a request with retry logic for timeouts and token refresh async fn execute_with_retry(&self, f: F) -> Result where diff --git a/cli/src/api/sbom.rs b/cli/src/api/sbom.rs index ccf1cf969..2d49d6aa6 100644 --- a/cli/src/api/sbom.rs +++ b/cli/src/api/sbom.rs @@ -12,6 +12,7 @@ use log; use serde::{Deserialize, Serialize}; use serde_json::Value; use tokio::sync::Mutex; +use trustify_module_fundamental::sbom::model::DeleteContext; use super::client::{ApiClient, ApiError}; use crate::common::{ @@ -20,6 +21,7 @@ use crate::common::{ }; const SBOM_PATH: &str = "/v2/sbom"; +const PRUNE_PATH: &str = "/v3/prune"; /// Parameters for find duplicates pub struct FindDuplicatesParams { @@ -55,6 +57,16 @@ pub async fn list(client: &ApiClient, params: &ListParams) -> Result Result { + client.get_with_query(PRUNE_PATH, params).await +} + +/// Prune SBOMs - returns raw JSON +pub async fn prune_prunable(client: &ApiClient, params: &ListParams) -> Result { + client.delete_with_query(PRUNE_PATH, params).await +} + /// Fetch a single page and extract SBOM entries async fn fetch_page( client: &ApiClient, @@ -365,41 +377,16 @@ pub async fn prune(client: &ApiClient, params: &PruneParams) -> Result = items - .iter() - .filter_map(|item| { - let id = item.get("id").and_then(|v| v.as_str())?; - let document_id = item - .get("document_id") - .and_then(|v| v.as_str()) - .unwrap_or("unknown"); - Some(DeleteEntry { - id: id.to_string(), - identifier: document_id.to_string(), - }) - }) - .collect(); + let response = if params.dry_run { + list_prunable(client, &list_params).await? + } else { + prune_prunable(client, &list_params).await? + }; - // If dry run, just return the count without deleting - if params.dry_run { - return Ok(new_delete_result(total)); - } + let parsed: DeleteContext = serde_json::from_str(&response) + .map_err(|e| ApiError::InternalError(format!("Failed to parse response: {}", e)))?; - // Perform the actual deletion - delete_list(client, entries, params.concurrency).await + Ok(parsed.into()) } /// Read delete entries from a file diff --git a/cli/src/common/mod.rs b/cli/src/common/mod.rs index 5ca0f6376..2401694f4 100644 --- a/cli/src/common/mod.rs +++ b/cli/src/common/mod.rs @@ -5,6 +5,7 @@ use futures::stream::{self, StreamExt}; use indicatif::{ProgressBar, ProgressStyle}; use serde::{Deserialize, Serialize}; use tokio::sync::Mutex; +use trustify_module_fundamental::sbom::model::{DeletableSbomState, DeleteContext}; use crate::api::client::{ApiClient, ApiError}; @@ -108,6 +109,56 @@ pub struct FailedResult { pub error: String, } +impl From for DeleteResult { + fn from(value: DeleteContext) -> Self { + let sboms = value.sboms.unwrap_or(vec![]); + + let mut deleted = vec![]; + let mut skipped = vec![]; + let mut failed = vec![]; + + for sbom in &sboms { + let id = sbom.head.sbom_id.to_string(); + let identifier = sbom + .head + .document_id + .clone() + .unwrap_or("unknown".to_string()); + match &sbom.state { + DeletableSbomState::Deleted(_) => { + deleted.push(DeletedResult { id, identifier }); + } + DeletableSbomState::Skipped => { + skipped.push(SkippedResult { id, identifier }); + } + DeletableSbomState::Failed(error) => { + failed.push(FailedResult { + id, + identifier, + error: error.clone(), + }); + } + _ => (), + } + } + + let deleted_total = deleted.len() as u32; + let skipped_total = skipped.len() as u32; + let failed_total = failed.len() as u32; + let total = sboms.len() as u32; + + Self { + deleted, + deleted_total, + skipped, + skipped_total, + failed, + failed_total, + total, + } + } +} + pub async fn delete_entries( client: &ApiClient, base_path: &str, diff --git a/docs/adrs/00012-sbom-pruning-api.md b/docs/adrs/00012-sbom-pruning-api.md new file mode 100644 index 000000000..7633b2a88 --- /dev/null +++ b/docs/adrs/00012-sbom-pruning-api.md @@ -0,0 +1,223 @@ +# 00012. SBOM Prune API Endpoint + +## Status + +ACCEPTED + +## Context + +### Problem Statement + +Production Trustify deployments contain millions of SBOMs, causing significant +storage and database cost concerns. An external CLI tool +([Mobster](https://github.com/konflux-ci/mobster)) exists for bulk operations, +but it requires: + +- Direct server access or token management +- Manual execution +- No server-side tracking or logging + +### Current State + +- **External CLI:** Bulk pruning implemented in + [Mobster](https://github.com/konflux-ci/mobster) (external project) +- **Delete API:** `DELETE /v2/sbom/{id}` supports single SBOM deletion only +- **Search API:** Supports date filtering (`ingested<30 days ago`) and label + filtering +- **Gap:** No server-side bulk prune operation with tracking + +### Requirements + +- Server-side API endpoint for bulk SBOM pruning +- Reuse existing filter query syntax +- Safety (needs proper permission for doing pruning operation) +- Audit logging for compliance +- Foundation for future scheduled pruning service + +## Decision + +Add a `DELETE /v3/sbom/prune` endpoint that executes pruning logic server-side +and a `GET /v3/sbom/prune` endpoint that only returns what will be pruned but +takes no action (dry-run mode). + +## API Specification + +### Endpoints + +``` +GET /v3/sbom/prune +DELETE /v3/sbom/prune +``` + +### Parameters + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `q` | string | Yes | - | Filter query (e.g., `ingested<90 days ago&label:env=staging`) | +| `sort` | string | No | - | Sort SBOMs that match the filter query | +| `limit` | integer | No | 100 | Maximum SBOMs to delete (safety limit) | +| `offset` | integer | No | 0 | Do not delete the first N SBOMs | + +### Responses + +`GET` endpoint should return following (example): + +```json +{ + "fetched": 2, + "skipped": 0, + "deleted": 0, + "failed": 0, + "limit": 100, + "sboms": [ + { + "sbom_id": "sbom001", + "document_id": "urn:...", + "source_document_id": "sd0001", + "state": "fetched" + }, + { + "sbom_id": "sbom002", + "document_id": "urn:...", + "source_document_id": "sd0002", + "state": "fetched" + } + ] +} +``` + +Again, `GET` will only return what will be deleted, limited to first N items +(`limit` field). + +`DELETE` endpoint should return following (example): + +```json +{ + "fetched": 2, + "skipped": 0, + "deleted": 2, + "failed": 0, + "limit": 100, + "sboms": [ + { + "sbom_id": "sbom001", + "document_id": "urn:...", + "source_document_id": "sd0001", + "state": {"deleted": "sha512:..."} + }, + { + "sbom_id": "sbom002", + "document_id": "urn:...", + "source_document_id": "sd0002", + "state": {"deleted": "sha512:..."} + } + ] +} +``` + +### Permission + +Requires `DeleteSbom` permission for `DELETE` and `ReadSbom` permission for +`GET` (reuses existing permission). + +## Usage Examples + +```bash +# Preview SBOMs older than 90 days +http GET localhost:8080/api/v3/sbom/prune q=='ingested<90 days ago' + +# Delete staging SBOMs older than 30 days +http DELETE localhost:8080/api/v3/sbom/prune \ + q=='ingested<30 days ago&label:env=staging' \ + limit==5000 + +# Delete SBOMs with specific label +http DELETE localhost:8080/api/v3/sbom/prune \ + q=='label:temporary=true' +``` + +## Safety Features + +1. **Limit cap:** Server-enforced maximum per request +1. **Filter required:** No "delete all" without explicit filter +1. **Permission check:** Requires `DeleteSbom` permission (`DELETE`) or + `ReadSbom` permission (`GET`) + +## Logging + +All prune operations should be logged for audit purposes using +`target: "prune"` to enable separate audit file configuration at deployment +time. + +**Log on operation start:** + +``` +INFO [prune]: Prune operation started | user={user_id} | filter="{query}" | dry_run={bool} | limit={n} +``` + +**Log on each deletion (when not dry-run):** + +``` +INFO [prune]: SBOM deleted | sbom_id={uuid} | name="{name}" | ingested={timestamp} +``` + +**Log on operation complete:** + +``` +INFO [prune]: Prune operation completed | user={user_id} | matched={n} | deleted={n} | failed={n} | duration={ms} +``` + +**Log on failure:** + +``` +WARN [prune]: SBOM deletion failed | sbom_id={uuid} | error="{message}" +``` + +These logs provide: + +- Audit trail for compliance (can be routed to separate file via log + configuration) +- Debugging information for failures +- Metrics for monitoring prune operations + +## Design Considerations + +### Reusable Logic + +The pruning logic should be implemented in the service layer so it can be +called by: + +1. This REST endpoint (`DELETE /v3/sbom/prune`) +1. Future scheduled pruner background service (next phase) + +### Synchronous Execution + +Initial implementation is synchronous - client waits for completion. This is +suitable for batches up to a few thousand SBOMs. Asynchronous execution with +job tracking will be addressed in the scheduled pruner phase. + +## Success Criteria + +- [ ] Endpoint accepts filter query and limit parameters +- [ ] Dry-run mode returns accurate preview +- [ ] Actual pruning deletes matching SBOMs +- [ ] Failed deletions are tracked and reported +- [ ] All operations logged with user context +- [ ] Permission check enforced +- [ ] OpenAPI spec updated + +## Next Phase: Scheduled Pruning Service + +This phase will add a background pruning service following the importer +pattern: + +- Database-backed job configuration +- REST API for managing scheduled prune jobs (`/v3/pruner`) +- Period-based scheduling +- Reuses the prune service logic from this phase + +## Related + +- **External:** [Mobster](https://github.com/konflux-ci/mobster) - CLI tool for + bulk SBOM operations +- **Future ADR:** SBOM Pruning Background Service (next phase) diff --git a/modules/fundamental/src/sbom/endpoints/mod.rs b/modules/fundamental/src/sbom/endpoints/mod.rs index 97fb85aad..797e0e324 100644 --- a/modules/fundamental/src/sbom/endpoints/mod.rs +++ b/modules/fundamental/src/sbom/endpoints/mod.rs @@ -18,8 +18,9 @@ use crate::{ }, sbom::{ model::{ - SbomExternalPackageReference, SbomModel, SbomNodeReference, SbomPackage, - SbomPackageRelation, SbomSummary, Which, details::SbomAdvisory, + DeletableSbomState, DeleteContext, DeleteParams, SbomExternalPackageReference, + SbomModel, SbomNodeReference, SbomPackage, SbomPackageRelation, SbomSummary, Which, + details::SbomAdvisory, }, service::{SbomService, sbom::FetchOptions}, }, @@ -77,6 +78,8 @@ pub fn configure( .service(get) .service(get_sbom_advisories) .service(delete) + .service(prunable) + .service(prune) .service(packages) .service(models) .service(related) @@ -411,6 +414,9 @@ pub async fn delete( let id = Id::from_str(&id)?; match service.fetch_sbom(id, &tx).await? { + // Since `delete_sbom` is now implemented via `delete_sboms`, the conversion + // from `source_document` to storage key is performed twice (once in `delete_sboms` + // and once in `delete_doc`) here, but for now we can deal with it Some((v, _, source_document)) => match service.delete_sbom(v.sbom_id, &tx).await? { false => Ok(HttpResponse::NotFound().finish()), true => { @@ -427,6 +433,74 @@ pub async fn delete( } } +/// Get all SBOMs that can be pruned up to the given limit +#[utoipa::path( + tag = "sbom", + operation_id = "getPrunableSboms", + params( + Query, + DeleteParams, + ), + responses( + (status = 200, description = "Candidate SBOMs to be pruned", body = DeleteContext), + ), +)] +#[get("/v3/sbom/prune")] +pub async fn prunable( + service: web::Data, + db: web::Data, + web::Query(search): web::Query, + web::Query(params): web::Query, + _: Require, +) -> actix_web::Result { + let tx = db.begin_read().await?; + let ctx = service.fetch_prunable_sboms(search, params, &tx).await?; + + Ok(HttpResponse::Ok().json(ctx)) +} + +/// Prune SBOMs +#[utoipa::path( + tag = "sbom", + operation_id = "pruneSboms", + params( + Query, + DeleteParams, + ), + responses( + (status = 200, description = "Pruned SBOMs", body = DeleteContext), + ), +)] +#[delete("/v3/sbom/prune")] +pub async fn prune( + i: web::Data, + service: web::Data, + db: web::Data, + web::Query(search): web::Query, + web::Query(params): web::Query, + _: Require, +) -> actix_web::Result { + let tx = db.begin().await?; + + let mut ctx = service.fetch_prunable_sboms(search, params, &tx).await?; + + service.delete_sboms(&mut ctx, &tx).await?; + if ctx.deleted > 0 { + tx.commit().await?; + + let storage = i.storage(); + for sbom in ctx.sboms.clone().unwrap_or(vec![]) { + if let DeletableSbomState::Deleted(Some(key)) = &sbom.state + && let Err(e) = storage.delete(key.clone()).await + { + log::warn!("Ignoring {e}"); + } + } + } + + Ok(HttpResponse::Ok().json(ctx)) +} + /// Search for packages of an SBOM #[utoipa::path( tag = "sbom", diff --git a/modules/fundamental/src/sbom/model/mod.rs b/modules/fundamental/src/sbom/model/mod.rs index 09e649742..a4c513f5c 100644 --- a/modules/fundamental/src/sbom/model/mod.rs +++ b/modules/fundamental/src/sbom/model/mod.rs @@ -9,16 +9,83 @@ use crate::{ sbom::service::sbom::IntoPackage, source_document::model::SourceDocument, }; -use sea_orm::{ConnectionTrait, FromQueryResult, ModelTrait, PaginatorTrait, prelude::Uuid}; +use sea_orm::{ + ColumnTrait, ConnectionTrait, EntityTrait, FromQueryResult, ModelTrait, PaginatorTrait, + QueryFilter, QueryResult, QuerySelect, prelude::Uuid, +}; use serde::{Deserialize, Serialize}; +use std::{ + collections::{HashMap, HashSet}, + future::Future, + str::FromStr, +}; use time::OffsetDateTime; use tracing::{info_span, instrument}; use tracing_futures::Instrument; -use trustify_common::{cpe::Cpe, purl::Purl, requested_field::RequestedField}; +use trustify_common::{ + cpe::Cpe, + id::{Id, IdError}, + model::Pagination, + purl::Purl, + requested_field::RequestedField, +}; use trustify_entity::{ labels::Labels, relationship::Relationship, sbom, sbom_node, sbom_package, source_document, }; -use utoipa::ToSchema; +use trustify_module_storage::service::StorageKey; +use utoipa::{IntoParams, ToSchema}; + +pub trait TryFromDb: Sized { + type Error: std::error::Error; + + fn try_from_db( + value: T, + connection: &C, + ) -> impl Future> + Send + where + C: ConnectionTrait; +} + +#[derive(Serialize, Deserialize, Debug, Copy, Clone, IntoParams)] +#[into_params(parameter_in = Query)] +pub struct DeleteParams { + /// The maximum number of items to be deleted. + /// + /// Zero means that no items will be deleted. + #[serde(default = "default::limit")] + pub limit: u64, + /// From where the delete operation should have start. + pub offset: u64, +} + +impl Default for DeleteParams { + fn default() -> Self { + Self { + limit: default::limit(), + offset: 0, + } + } +} + +mod default { + pub(super) const fn limit() -> u64 { + 100 + } +} + +impl Pagination for DeleteParams { + fn offset(&self) -> u64 { + self.offset + } + + fn limit(&self) -> u64 { + self.limit + } + + fn total(&self) -> bool { + false + } +} #[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)] pub struct SbomHead { @@ -106,6 +173,249 @@ impl SbomSummary

{ } } +/// Necessary information about an SBOM that is going to be deleted +#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default, FromQueryResult)] +pub struct DeletableSbomHead { + /// SBOM id (primary key of the `sbom` table) + #[serde(with = "uuid::serde::urn")] + #[schema(value_type = String)] + pub sbom_id: Uuid, + + /// SBOM document id + #[serde(skip_serializing_if = "Option::is_none")] + pub document_id: Option, + + /// SBOM source document id (primary key of the `source_document` table) + #[serde(with = "uuid::serde::urn")] + #[schema(value_type = String)] + pub source_document_id: Uuid, +} + +/// An SBOM state during its deletion +#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, ToSchema, Default)] +#[serde(rename_all = "snake_case")] +pub enum DeletableSbomState { + /// An SBOM has been fetched, its `id`, `document_id`, and `source_document_id` + /// are present + #[default] + Fetched, + /// An SBOM has been successfully removed from the database, use the provided + /// storage key to remove its leftovers from the storage + Deleted(Option), + /// The deletion of an SBOM has been skipped, probably due to some information + /// has not been provided + Skipped, + /// The deletion of an SBOM has failed, use the provided error message to + /// see why + Failed(String), +} + +/// Only keeps information needed for pruning operation, i.e. the SBOM id and +/// the id of associated document +#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)] +pub struct DeletableSbom { + #[serde(flatten)] + pub head: DeletableSbomHead, + /// The deletion status of the SBOM + pub state: DeletableSbomState, +} + +/// Source document storage keys for later deletion from the storage +#[derive(Debug, Clone, Default, FromQueryResult)] +pub struct StorageKeys { + pub id: Uuid, + pub sha256: String, + pub sha384: String, + pub sha512: String, +} + +impl TryInto for StorageKeys { + type Error = IdError; + + fn try_into(self) -> Result { + if let Ok(key) = Id::from_str(&format!("sha256:{}", self.sha256))?.try_into() { + return Ok(key); + } + + if let Ok(key) = Id::from_str(&format!("sha384:{}", self.sha384))?.try_into() { + return Ok(key); + } + + if let Ok(key) = Id::from_str(&format!("sha512:{}", self.sha512))?.try_into() { + return Ok(key); + } + + Err(IdError::MissingPrefix) + } +} + +/// The context of SBOM prune/delete operation +#[derive(Serialize, Deserialize, Debug, Clone, ToSchema)] +pub struct DeleteContext { + /// The number of fetched SBOMs + pub fetched: u64, + /// The total number of SBOMs skipped during the deletion + pub skipped: u64, + /// The total number of deleted SBOMs + pub deleted: u64, + /// The total number of SBOMs that failed to be deleted + pub failed: u64, + /// The fetch limit + pub limit: u64, + /// SBOMs + #[serde(skip_serializing_if = "Option::is_none")] + pub sboms: Option>, +} + +impl Default for DeleteContext { + fn default() -> Self { + Self { + fetched: 0, + skipped: 0, + deleted: 0, + failed: 0, + limit: DeleteParams::default().limit, + sboms: None, + } + } +} + +impl DeleteContext { + pub fn get_fetched_sbom_ids(&self) -> Vec { + if self.fetched == 0 { + return vec![]; + } + + self.sboms + .clone() + .unwrap_or(vec![]) + .iter() + .filter_map(|x| match x.state { + DeletableSbomState::Fetched => Some(x.head.sbom_id), + _ => None, + }) + .collect() + } + + pub async fn commit_deleted( + &mut self, + delete_result: &Vec, + connection: &C, + ) -> Result, Error> + where + C: ConnectionTrait, + { + let mut source_document_ids = vec![]; + + for row in delete_result { + // According to `sea-orm` and `sqlx` documentation, if this fails + // for one item it probably also fails for all items (since index, + // column name, and requested data type are invariant). Therefore + // we end here after the first encountered error. + if let Some(id) = row + .try_get_by_index::>(0) + // If we cannot process delete result, we treat deletion of + // all requested SBOMs as failed + .inspect_err(|e| self.commit_failure(e.to_string()))? + { + source_document_ids.push(id); + } + } + + let docs: HashSet = source_document_ids.iter().copied().collect(); + + let keys = source_document::Entity::find() + .select_only() + .column(source_document::Column::Id) + .column(source_document::Column::Sha256) + .column(source_document::Column::Sha384) + .column(source_document::Column::Sha512) + .filter(source_document::Column::Id.is_in(source_document_ids.clone())) + .into_model::() + .all(connection) + .await + .unwrap_or(vec![]); + + let id2key: HashMap = keys + .iter() + .filter_map(|key| key.clone().try_into().map_or(None, |x| Some((key.id, x)))) + .collect(); + + self.skipped = 0; + self.deleted = 0; + + for sbom in &mut self.sboms.clone().unwrap_or(vec![]) { + if sbom.state != DeletableSbomState::Fetched { + continue; + } + let doc = sbom.head.source_document_id; + match docs.contains(&doc) { + true => { + sbom.state = DeletableSbomState::Deleted(id2key.get(&doc).cloned()); + self.deleted += 1; + } + false => { + sbom.state = DeletableSbomState::Skipped; + self.skipped += 1; + } + } + } + + Ok(source_document_ids) + } + + pub fn commit_failure(&mut self, err: String) { + let delete_not_started = self.skipped == 0 && self.deleted == 0; + + self.failed = if delete_not_started { + self.fetched + } else { + self.deleted + }; + self.deleted = 0; + + for sbom in &mut self.sboms.clone().unwrap_or(vec![]) { + match (sbom.state.clone(), delete_not_started) { + (DeletableSbomState::Fetched, true) | (DeletableSbomState::Deleted(_), _) => { + sbom.state = DeletableSbomState::Failed(err.clone()); + } + _ => (), + } + } + } +} + +impl TryFromDb for DeleteContext { + type Error = Error; + + async fn try_from_db(value: Uuid, connection: &C) -> Result + where + C: ConnectionTrait, + { + let sbom = sbom::Entity::find() + .select_only() + .column(sbom::Column::SbomId) + .column(sbom::Column::DocumentId) + .column(sbom::Column::SourceDocumentId) + .filter(sbom::Column::SbomId.eq(value)) + .into_model::() + .one(connection) + .await?; + + Ok(match sbom { + Some(head) => Self { + fetched: 1, + sboms: Some(vec![DeletableSbom { + head, + state: DeletableSbomState::Fetched, + }]), + ..Default::default() + }, + None => Default::default(), + }) + } +} + #[derive(FromQueryResult)] pub struct ModelCatcher { pub id: String, diff --git a/modules/fundamental/src/sbom/service/sbom.rs b/modules/fundamental/src/sbom/service/sbom.rs index 3d85d47e7..5709b0c6f 100644 --- a/modules/fundamental/src/sbom/service/sbom.rs +++ b/modules/fundamental/src/sbom/service/sbom.rs @@ -4,8 +4,10 @@ use crate::{ common::license_filtering::{LICENSE, license_text_coalesce}, purl::model::summary::purl::PurlSummary, sbom::model::{ + DeletableSbom, DeletableSbomHead, DeletableSbomState, DeleteContext, DeleteParams, ModelCatcher, SbomExternalPackageReference, SbomModel, SbomNodeReference, SbomPackage, - SbomPackageRelation, SbomPackageSummary, SbomSummary, Which, details::SbomDetails, + SbomPackageRelation, SbomPackageSummary, SbomSummary, TryFromDb, Which, + details::SbomDetails, }, }; use futures_util::{StreamExt, TryStreamExt, stream}; @@ -17,12 +19,12 @@ use sea_orm::{ use sea_query::{ColumnType, Expr, JoinType, UnionType, extension::postgres::PgExpr}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use std::{collections::HashMap, fmt::Debug, sync::Arc}; +use std::{collections::HashMap, fmt::Debug, sync::Arc, vec::Vec}; use tracing::{Instrument, info_span, instrument}; use trustify_common::{ cpe::Cpe, db::{ - limiter::{LimitedResult, LimiterTrait, limit_selector}, + limiter::{LimitedResult, LimiterAsModelTrait, LimiterTrait, limit_selector}, multi_model::{FromQueryResultMultiModel, SelectIntoMultiModel}, query::{Columns, Filtering, IntoColumns, Query, q}, }, @@ -43,6 +45,22 @@ use trustify_entity::{ versioned_purl, vulnerability, }; +struct SeaQueryValueWrapper(sea_query::Value); + +impl From> for SeaQueryValueWrapper { + fn from(value: Vec) -> Self { + Self(sea_query::Value::Array( + sea_query::ArrayType::Uuid, + Some(Box::new( + value + .iter() + .map(|&id| sea_query::Value::Uuid(Some(Box::new(id)))) + .collect(), + )), + )) + } +} + #[derive(Clone, Debug, Default)] pub struct FetchOptions { labels: Labels, @@ -121,73 +139,103 @@ impl SbomService { id: Uuid, connection: &C, ) -> Result { + let mut ctx = DeleteContext::try_from_db(id, connection).await?; + self.delete_sboms(&mut ctx, connection).await?; + + Ok(ctx.deleted == 1) + } + + /// delete multiple sboms + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] + pub async fn delete_sboms( + &self, + ctx: &mut DeleteContext, + connection: &C, + ) -> Result<(), Error> { + let ids = ctx.get_fetched_sbom_ids(); + + if ids.is_empty() { + return Ok(()); + } + let nids: u64 = ids.len() as u64; + // IMPORTANT: Capture qualified_purl IDs before CASCADE deletion. - // After SBOM deletion, CASCADE removes sbom_node_purl_ref entries, + // After SBOMs deletion, CASCADE removes sbom_node_purl_ref entries, // then GC uses the captured IDs to clean up orphaned PURLs. let qualified_purl_ids: Vec = sbom_node_purl_ref::Entity::find() .select_only() .column(sbom_node_purl_ref::Column::QualifiedPurlId) - .filter(sbom_node_purl_ref::Column::SbomId.eq(id)) + .filter(match nids { + 1 => sbom_node_purl_ref::Column::SbomId.eq(ids[0]), + _ => sbom_node_purl_ref::Column::SbomId.is_in(ids.clone()), + }) .into_tuple() .all(connection) - .await?; + .await + .inspect_err(|e| ctx.commit_failure(e.to_string()))?; log::debug!( - "Captured {} qualified_purl IDs from SBOM {} for cleanup", + "Captured {} qualified_purl IDs from SBOMs {:?} for cleanup", qualified_purl_ids.len(), - id + ids ); - // Delete the SBOM - CASCADE will properly delete sbom_package and sbom_node_purl_ref + let (op, param): (_, sea_query::Value) = match nids { + 1 => ("=", ids[0].into()), + _ => ("IN", Into::::into(ids.clone()).0), + }; + + // Delete SBOMs - CASCADE will properly delete sbom_package and sbom_node_purl_ref let stmt = Statement::from_sql_and_values( connection.get_database_backend(), - r#"DELETE FROM sbom WHERE sbom_id=$1 RETURNING source_document_id"#, - [id.into()], + format!(r#"DELETE FROM sbom WHERE sbom_id {op} $1 RETURNING source_document_id"#), + vec![param], ); - let result = connection.query_all(stmt).await?; - if result.len() > 1 { - return Err(Error::Data(format!("Too many rows deleted for {id}"))); + let result = connection + .query_all(stmt) + .await + .inspect_err(|e| ctx.commit_failure(e.to_string()))?; + let nres: u64 = result.len() as u64; + if nres > nids { + return Err(Error::Data(format!("Too many rows deleted for {ids:?}"))) + .inspect_err(|e| ctx.commit_failure(e.to_string())); } - for row in &result { - let source_document = row.try_get_by_index::>(0)?; - if let Some(doc) = source_document { - source_document::Entity::delete_by_id(doc) - .exec(connection) - .await?; + let source_document_ids = ctx.commit_deleted(&result, connection).await?; + + if !source_document_ids.is_empty() { + match source_document_ids.len() { + 1 => source_document::Entity::delete_by_id(source_document_ids[0]), + _ => source_document::Entity::delete_many() + .filter(source_document::Column::Id.is_in(source_document_ids)), } + .exec(connection) + .await + .inspect_err(|e| ctx.commit_failure(e.to_string()))?; } // Cleanup orphaned PURLs if deletion succeeded and we had PURLs to check - if !qualified_purl_ids.is_empty() && result.len() == 1 { - // Build array parameter for the GC query - let array_param = sea_query::Value::Array( - sea_query::ArrayType::Uuid, - Some(Box::new( - qualified_purl_ids - .iter() - .map(|&id| sea_query::Value::Uuid(Some(Box::new(id)))) - .collect(), - )), - ); - + if !qualified_purl_ids.is_empty() && nres == nids { let gc_stmt = Statement::from_sql_and_values( connection.get_database_backend(), // it looks much more readable in an SQL file include_str!("gc_purls_after_sbom_deletion.sql"), - vec![array_param], + vec![Into::::into(qualified_purl_ids).0], ); - let gc_result = connection.execute(gc_stmt).await?; + let gc_result = connection + .execute(gc_stmt) + .await + .inspect_err(|e| ctx.commit_failure(e.to_string()))?; log::debug!( - "Cleaned up {} orphaned purl records after SBOM {} deletion", + "Cleaned up {} orphaned purl records after SBOMs {:?} deletion", gc_result.rows_affected(), - id + ids, ); } - Ok(result.len() == 1) + Ok(()) } /// fetch all SBOMs @@ -308,6 +356,50 @@ impl SbomService { Ok(PaginatedResults { total, items }) } + /// Fetch SBOMs selected for pruning + #[instrument(skip(self, connection), err(level=tracing::Level::INFO))] + pub async fn fetch_prunable_sboms( + &self, + search: Query, + params: DeleteParams, + connection: &C, + ) -> Result { + let limiter = sbom::Entity::find() + .join(JoinType::Join, sbom::Relation::SourceDocument.def()) + .select_only() + .column(sbom::Column::SbomId) + .column(sbom::Column::DocumentId) + .column(sbom::Column::SourceDocumentId) + .filtering_with( + search, + Columns::from_entity::() + .add_columns(source_document::Entity) + .translator(|f, op, v| match f.split_once(':') { + Some(("label", key)) => Some(format!("labels:{key}{op}{v}")), + _ => None, + }), + )? + .limiting_as::(connection, params, &self.cache)?; + + let LimitedResult { items, total } = limiter.fetch().await?; + let total = total.total().await?; + + Ok(DeleteContext { + fetched: total, + limit: params.limit, + sboms: Some( + items + .iter() + .map(|head| DeletableSbom { + head: head.clone(), + state: DeletableSbomState::Fetched, + }) + .collect(), + ), + ..Default::default() + }) + } + /// Fetch all packages from an SBOM. /// /// If you need to find packages based on their relationship, even in the relationship to diff --git a/modules/storage/Cargo.toml b/modules/storage/Cargo.toml index f54fd006f..21b7581cc 100644 --- a/modules/storage/Cargo.toml +++ b/modules/storage/Cargo.toml @@ -20,6 +20,7 @@ clap = { workspace = true } futures = { workspace = true } hex = { workspace = true } log = { workspace = true } +serde = { workspace = true } strum = { workspace = true, features = ["derive"] } tempfile = { workspace = true } thiserror = { workspace = true } @@ -27,6 +28,7 @@ tokio = { workspace = true, features = ["full"] } tokio-util = { workspace = true, features = ["full"] } tracing = { workspace = true } urlencoding = { workspace = true } +utoipa = { workspace = true } uuid = { version = "1.16.0", features = ["v4"] } [dev-dependencies] diff --git a/modules/storage/src/service/mod.rs b/modules/storage/src/service/mod.rs index 460694cb0..fad7729af 100644 --- a/modules/storage/src/service/mod.rs +++ b/modules/storage/src/service/mod.rs @@ -13,11 +13,13 @@ use crate::service::fs::FileSystemBackend; use bytes::Bytes; use futures::Stream; use hex::ToHex; +use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter}; use std::future::Future; use tokio::io::AsyncRead; use trustify_common::hashing::Digests; use trustify_common::id::Id; +use utoipa::ToSchema; #[derive(Debug, thiserror::Error)] pub enum StoreError { @@ -27,7 +29,7 @@ pub enum StoreError { Backend(#[source] B), } -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, ToSchema)] pub struct StorageKey(String); impl Display for StorageKey { diff --git a/openapi.yaml b/openapi.yaml index af4570db6..7be76e5c4 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -3888,6 +3888,219 @@ paths: application/json: schema: $ref: '#/components/schemas/PaginatedResults_SbomPackageSummary' + /api/v3/sbom/prune: + get: + tags: + - sbom + summary: Get all SBOMs that can be pruned up to the given limit + operationId: getPrunableSboms + parameters: + - name: q + in: query + description: | + EBNF grammar for the _q_ parameter: + ```text + q = ( values | filter ) { '&' q } + values = value { '|', values } + filter = field, operator, values + operator = "=" | "!=" | "~" | "!~" | ">=" | ">" | "<=" | "<" + value = (* any text but escape special characters with '\' *) + field = (* must match an entity attribute name *) + ``` + Any values in a _q_ will result in a case-insensitive "full + text search", effectively producing an OR clause of LIKE + clauses for every string-ish field in the resource being + queried. + + Examples: + - `foo` - any field containing 'foo' + - `foo|bar` - any field containing either 'foo' OR 'bar' + - `foo&bar` - some field contains 'foo' AND some field contains 'bar' + + A _filter_ may also be used to constrain the results. The + filter's field name must correspond to one of the resource's + attributes. If it doesn't, an error will be returned + containing a list of the valid fields for that resource. + + An ASCII value of `NUL`, percent-encoded as `%00`, may be used + to find resources on which a particular field isn't set. For + example, `name=%00` and `name!=%00` yield the WHERE clauses, + 'NAME IS NULL' and 'NAME IS NOT NULL', respectively. + + Examples: + - `name=foo` - entity's _name_ matches 'foo' exactly + - `name~foo` - entity's _name_ contains 'foo', case-insensitive + - `name~foo|bar` - entity's _name_ contains either 'foo' OR 'bar', case-insensitive + - `name=` - entity's _name_ is the empty string, '' + - `name=%00` - entity's _name_ isn't set + - `published>3 days ago` - date values can be "human time" + + Multiple full text searches and/or filters should be + '&'-delimited -- they are logically AND'd together. + + - `red hat|fedora&labels:type=cve|osv&published>last wednesday 17:00` + + Fields corresponding to JSON objects in the database may use a + ':' to delimit the column name and the object key, + e.g. `purl:qualifiers:type=pom` + + Any operator or special character, e.g. '|', '&', within a + value should be escaped by prefixing it with a backslash. + required: false + schema: + type: string + - name: sort + in: query + description: | + EBNF grammar for the _sort_ parameter: + ```text + sort = field [ ':', order ] { ',' sort } + order = ( "asc" | "desc" ) + field = (* must match the name of entity's attributes *) + ``` + The optional _order_ should be one of "asc" or "desc". If + omitted, the order defaults to "asc". + + Each _field_ name must correspond to one of the columns of the + table holding the entities being queried. Those corresponding + to JSON objects in the database may use a ':' to delimit the + column name and the object key, + e.g. `purl:qualifiers:type:desc` + required: false + schema: + type: string + - name: limit + in: query + description: |- + The maximum number of items to be deleted. + + Zero means that no items will be deleted. + required: false + schema: + type: integer + format: int64 + minimum: 0 + - name: offset + in: query + description: From where the delete operation should have start. + required: true + schema: + type: integer + format: int64 + minimum: 0 + responses: + '200': + description: Candidate SBOMs to be pruned + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteContext' + delete: + tags: + - sbom + summary: Prune SBOMs + operationId: pruneSboms + parameters: + - name: q + in: query + description: | + EBNF grammar for the _q_ parameter: + ```text + q = ( values | filter ) { '&' q } + values = value { '|', values } + filter = field, operator, values + operator = "=" | "!=" | "~" | "!~" | ">=" | ">" | "<=" | "<" + value = (* any text but escape special characters with '\' *) + field = (* must match an entity attribute name *) + ``` + Any values in a _q_ will result in a case-insensitive "full + text search", effectively producing an OR clause of LIKE + clauses for every string-ish field in the resource being + queried. + + Examples: + - `foo` - any field containing 'foo' + - `foo|bar` - any field containing either 'foo' OR 'bar' + - `foo&bar` - some field contains 'foo' AND some field contains 'bar' + + A _filter_ may also be used to constrain the results. The + filter's field name must correspond to one of the resource's + attributes. If it doesn't, an error will be returned + containing a list of the valid fields for that resource. + + An ASCII value of `NUL`, percent-encoded as `%00`, may be used + to find resources on which a particular field isn't set. For + example, `name=%00` and `name!=%00` yield the WHERE clauses, + 'NAME IS NULL' and 'NAME IS NOT NULL', respectively. + + Examples: + - `name=foo` - entity's _name_ matches 'foo' exactly + - `name~foo` - entity's _name_ contains 'foo', case-insensitive + - `name~foo|bar` - entity's _name_ contains either 'foo' OR 'bar', case-insensitive + - `name=` - entity's _name_ is the empty string, '' + - `name=%00` - entity's _name_ isn't set + - `published>3 days ago` - date values can be "human time" + + Multiple full text searches and/or filters should be + '&'-delimited -- they are logically AND'd together. + + - `red hat|fedora&labels:type=cve|osv&published>last wednesday 17:00` + + Fields corresponding to JSON objects in the database may use a + ':' to delimit the column name and the object key, + e.g. `purl:qualifiers:type=pom` + + Any operator or special character, e.g. '|', '&', within a + value should be escaped by prefixing it with a backslash. + required: false + schema: + type: string + - name: sort + in: query + description: | + EBNF grammar for the _sort_ parameter: + ```text + sort = field [ ':', order ] { ',' sort } + order = ( "asc" | "desc" ) + field = (* must match the name of entity's attributes *) + ``` + The optional _order_ should be one of "asc" or "desc". If + omitted, the order defaults to "asc". + + Each _field_ name must correspond to one of the columns of the + table holding the entities being queried. Those corresponding + to JSON objects in the database may use a ':' to delimit the + column name and the object key, + e.g. `purl:qualifiers:type:desc` + required: false + schema: + type: string + - name: limit + in: query + description: |- + The maximum number of items to be deleted. + + Zero means that no items will be deleted. + required: false + schema: + type: integer + format: int64 + minimum: 0 + - name: offset + in: query + description: From where the delete operation should have start. + required: true + schema: + type: integer + format: int64 + minimum: 0 + responses: + '200': + description: Pruned SBOMs + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteContext' /api/v3/sbom/{id}/advisory: get: tags: @@ -4571,6 +4784,120 @@ components: properties: source: type: string + DeletableSbom: + allOf: + - $ref: '#/components/schemas/DeletableSbomHead' + - type: object + required: + - state + properties: + state: + $ref: '#/components/schemas/DeletableSbomState' + description: The deletion status of the SBOM + description: |- + Only keeps information needed for pruning operation, i.e. the SBOM id and + the id of associated document + DeletableSbomHead: + type: object + description: Necessary information about an SBOM that is going to be deleted + required: + - sbom_id + - source_document_id + properties: + document_id: + type: + - string + - 'null' + description: SBOM document id + sbom_id: + type: string + description: SBOM id (primary key of the `sbom` table) + source_document_id: + type: string + description: SBOM source document id (primary key of the `source_document` table) + DeletableSbomState: + oneOf: + - type: string + description: |- + An SBOM has been fetched, its `id`, `document_id`, and `source_document_id` + are present + enum: + - fetched + - type: object + description: |- + An SBOM has been successfully removed from the database, use the provided + storage key to remove its leftovers from the storage + required: + - deleted + properties: + deleted: + oneOf: + - type: 'null' + - $ref: '#/components/schemas/StorageKey' + description: |- + An SBOM has been successfully removed from the database, use the provided + storage key to remove its leftovers from the storage + - type: string + description: |- + The deletion of an SBOM has been skipped, probably due to some information + has not been provided + enum: + - skipped + - type: object + description: |- + The deletion of an SBOM has failed, use the provided error message to + see why + required: + - failed + properties: + failed: + type: string + description: |- + The deletion of an SBOM has failed, use the provided error message to + see why + description: An SBOM state during its deletion + DeleteContext: + type: object + description: The context of SBOM prune/delete operation + required: + - fetched + - skipped + - deleted + - failed + - limit + properties: + deleted: + type: integer + format: int64 + description: The total number of deleted SBOMs + minimum: 0 + failed: + type: integer + format: int64 + description: The total number of SBOMs that failed to be deleted + minimum: 0 + fetched: + type: integer + format: int64 + description: The number of fetched SBOMs + minimum: 0 + limit: + type: integer + format: int64 + description: The fetch limit + minimum: 0 + sboms: + type: + - array + - 'null' + items: + $ref: '#/components/schemas/DeletableSbom' + description: SBOMs + skipped: + type: integer + format: int64 + description: The total number of SBOMs skipped during the deletion + minimum: 0 ErrorInformation: type: object required: @@ -6399,6 +6726,8 @@ components: properties: cpe: type: string + StorageKey: + type: string Update: type: object description: |