From f7710046bc00cde8aeb65fa44bad0babd497ab76 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Fri, 27 Feb 2026 15:19:18 +0100 Subject: [PATCH 1/9] BE-311: Implement entity deletion with multi-level scoping and provenance --- Cargo.lock | 1 + .../rust/src/knowledge/entity/provenance.rs | 60 + libs/@local/graph/api/openapi/openapi.json | 58 +- libs/@local/graph/postgres-store/Cargo.toml | 1 + .../store/postgres/knowledge/entity/delete.rs | 820 ++++++++++++ .../store/postgres/knowledge/entity/mod.rs | 48 +- .../graph/postgres-store/tests/common/mod.rs | 70 ++ .../postgres-store/tests/deletion/drafts.rs | 772 ++++++++++++ .../postgres-store/tests/deletion/erase.rs | 455 +++++++ .../postgres-store/tests/deletion/links.rs | 762 ++++++++++++ .../postgres-store/tests/deletion/main.rs | 474 +++++++ .../postgres-store/tests/deletion/purge.rs | 1104 +++++++++++++++++ .../tests/deletion/validation.rs | 347 ++++++ .../postgres-store/tests/principals/main.rs | 77 +- libs/@local/graph/store/src/entity/mod.rs | 12 +- libs/@local/graph/store/src/entity/store.rs | 87 +- libs/@local/graph/store/src/error.rs | 20 +- libs/@local/graph/test-server/src/lib.rs | 2 +- libs/@local/graph/type-fetcher/src/store.rs | 18 +- .../benches/graph/scenario/stages/reset_db.rs | 2 +- tests/graph/integration/postgres/lib.rs | 18 +- 21 files changed, 5077 insertions(+), 131 deletions(-) create mode 100644 libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs create mode 100644 libs/@local/graph/postgres-store/tests/common/mod.rs create mode 100644 libs/@local/graph/postgres-store/tests/deletion/drafts.rs create mode 100644 libs/@local/graph/postgres-store/tests/deletion/erase.rs create mode 100644 libs/@local/graph/postgres-store/tests/deletion/links.rs create mode 100644 libs/@local/graph/postgres-store/tests/deletion/main.rs create mode 100644 libs/@local/graph/postgres-store/tests/deletion/purge.rs create mode 100644 libs/@local/graph/postgres-store/tests/deletion/validation.rs diff --git a/Cargo.lock b/Cargo.lock index 27c456d179e..9d5ff0ea5b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3447,6 +3447,7 @@ dependencies = [ "hash-graph-migrations", "hash-graph-store", "hash-graph-temporal-versioning", + "hash-graph-test-data", "hash-graph-types", "hash-graph-validation", "hash-status", diff --git a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs index 00fdab3415e..45260b2b3ef 100644 --- a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs +++ b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs @@ -63,6 +63,25 @@ pub struct ProvidedEntityEditionProvenance { pub origin: OriginProvenance, } +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[cfg_attr(target_arch = "wasm32", derive(tsify::Tsify))] +#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[allow( + clippy::struct_field_names, + clippy::allow_attributes, + reason = "prefix required for flattened serde serialization into `InferredEntityProvenance`. + `#[expect]` does not work here because serde's derive macro interferes with lint \ + expectation fulfillment (https://github.com/rust-lang/rust-clippy/issues/12035)" +)] +pub struct EntityDeletionProvenance { + pub deleted_by_id: ActorEntityUuid, + #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] + pub deleted_at_transaction_time: Timestamp, + #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] + pub deleted_at_decision_time: Timestamp, +} + #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[cfg_attr(target_arch = "wasm32", derive(tsify::Tsify))] #[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] @@ -81,6 +100,9 @@ pub struct InferredEntityProvenance { #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] #[serde(default, skip_serializing_if = "Option::is_none")] pub first_non_draft_created_at_decision_time: Option>, + #[cfg_attr(feature = "utoipa", schema(nullable = false))] + #[serde(default, flatten, skip_serializing_if = "Option::is_none")] + pub deletion: Option, } #[cfg(feature = "postgres")] @@ -119,3 +141,41 @@ pub struct EntityProvenance { pub inferred: InferredEntityProvenance, pub edition: EntityEditionProvenance, } + +#[cfg(test)] +mod tests { + use uuid::Uuid; + + use super::*; + + #[test] + fn inferred_provenance_roundtrip_without_deletion() { + let json = serde_json::json!({ + "createdById": Uuid::new_v4(), + "createdAtTransactionTime": Timestamp::::now(), + "createdAtDecisionTime": Timestamp::::now(), + }); + let provenance: InferredEntityProvenance = + serde_json::from_value(json.clone()).expect("deserialization failed"); + assert!(provenance.deletion.is_none()); + let roundtrip = serde_json::to_value(&provenance).expect("serialization failed"); + assert_eq!(roundtrip, json); + } + + #[test] + fn inferred_provenance_roundtrip_with_deletion() { + let json = serde_json::json!({ + "createdById": Uuid::new_v4(), + "createdAtTransactionTime": Timestamp::::now(), + "createdAtDecisionTime": Timestamp::::now(), + "deletedById": Uuid::new_v4(), + "deletedAtTransactionTime": Timestamp::::now(), + "deletedAtDecisionTime": Timestamp::::now(), + }); + let provenance: InferredEntityProvenance = + serde_json::from_value(json.clone()).expect("deserialization failed"); + assert!(provenance.deletion.is_some()); + let roundtrip = serde_json::to_value(&provenance).expect("serialization failed"); + assert_eq!(roundtrip, json); + } +} diff --git a/libs/@local/graph/api/openapi/openapi.json b/libs/@local/graph/api/openapi/openapi.json index 33f73250465..5288438f35d 100644 --- a/libs/@local/graph/api/openapi/openapi.json +++ b/libs/@local/graph/api/openapi/openapi.json @@ -5592,38 +5592,48 @@ ] }, "InferredEntityProvenance": { - "type": "object", - "required": [ - "createdById", - "createdAtTransactionTime", - "createdAtDecisionTime" - ], - "properties": { - "createdAtDecisionTime": { - "$ref": "#/components/schemas/Timestamp" - }, - "createdAtTransactionTime": { - "$ref": "#/components/schemas/Timestamp" - }, - "createdById": { - "$ref": "#/components/schemas/ActorEntityUuid" - }, - "firstNonDraftCreatedAtDecisionTime": { + "allOf": [ + { "allOf": [ { - "$ref": "#/components/schemas/Timestamp" + "$ref": "#/components/schemas/EntityDeletionProvenance" } ] }, - "firstNonDraftCreatedAtTransactionTime": { - "allOf": [ - { + { + "type": "object", + "required": [ + "createdById", + "createdAtTransactionTime", + "createdAtDecisionTime" + ], + "properties": { + "createdAtDecisionTime": { "$ref": "#/components/schemas/Timestamp" + }, + "createdAtTransactionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "createdById": { + "$ref": "#/components/schemas/ActorEntityUuid" + }, + "firstNonDraftCreatedAtDecisionTime": { + "allOf": [ + { + "$ref": "#/components/schemas/Timestamp" + } + ] + }, + "firstNonDraftCreatedAtTransactionTime": { + "allOf": [ + { + "$ref": "#/components/schemas/Timestamp" + } + ] } - ] + } } - }, - "additionalProperties": false + ] }, "JsonSchemaValueType": { "type": "string", diff --git a/libs/@local/graph/postgres-store/Cargo.toml b/libs/@local/graph/postgres-store/Cargo.toml index c0f478222d4..a96ddd34dc6 100644 --- a/libs/@local/graph/postgres-store/Cargo.toml +++ b/libs/@local/graph/postgres-store/Cargo.toml @@ -52,6 +52,7 @@ uuid = { workspace = true, features = ["v4", "serde"] } [dev-dependencies] hash-graph-migrations = { workspace = true } +hash-graph-test-data = { workspace = true } hash-telemetry = { workspace = true } indoc = { workspace = true } pretty_assertions = { workspace = true } diff --git a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs new file mode 100644 index 00000000000..ccd8010bc67 --- /dev/null +++ b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs @@ -0,0 +1,820 @@ +use core::ops::AddAssign; +use std::collections::{HashMap, HashSet, hash_map::Entry}; + +use error_stack::{Report, ResultExt as _}; +use futures::TryStreamExt as _; +use hash_graph_store::{ + entity::{ + DeleteEntitiesParams, DeletionScope, DeletionSummary, EntityQueryPath, LinkDeletionBehavior, + }, + error::DeletionError, + filter::Filter, + subgraph::temporal_axes::{PinnedTemporalAxis, QueryTemporalAxes, VariableTemporalAxis}, +}; +use hash_graph_temporal_versioning::{ + DecisionTime, LimitedTemporalBound, TemporalBound, TemporalTagged as _, Timestamp, + TransactionTime, +}; +use postgres_types::ToSql; +use tokio_postgres::Transaction; +use tracing::Instrument as _; +use type_system::{ + knowledge::{ + Entity, + entity::{ + id::{DraftId, EntityEditionId, EntityUuid}, + provenance::EntityDeletionProvenance, + }, + }, + principal::{actor::ActorEntityUuid, actor_group::WebId}, +}; + +use crate::store::{AsClient as _, PostgresStore, postgres::query::SelectCompiler}; + +/// Per-table row counts from [`delete_target_data`](PostgresStore::delete_target_data). +#[derive(Default)] +struct SatelliteDeletionCounts { + is_of_type: u64, + embeddings: u64, + temporal_metadata: u64, + editions: u64, + drafts: u64, +} + +impl AddAssign for SatelliteDeletionCounts { + fn add_assign(&mut self, rhs: Self) { + self.is_of_type += rhs.is_of_type; + self.embeddings += rhs.embeddings; + self.temporal_metadata += rhs.temporal_metadata; + self.editions += rhs.editions; + self.drafts += rhs.drafts; + } +} + +/// Parallel vecs for `UNNEST`-based batch operations on full entities. +struct FullEntityDeletionTarget { + web_ids: Vec, + entity_uuids: Vec, +} + +struct DraftOnlyDeletionTarget { + draft_ids: Vec, +} + +#[derive(Clone, Copy)] +enum DeletionTarget<'a> { + Full(&'a FullEntityDeletionTarget), + Drafts(&'a DraftOnlyDeletionTarget), +} + +/// Entity deletion operations. +/// +/// All methods require a transaction to guarantee correctness of the locking protocol: +/// +/// 1. [`collect_entity_edition_ids`] acquires `FOR UPDATE` on `entity_temporal_metadata` rows, +/// serializing with concurrent [`patch_entity`] calls (which use `FOR NO KEY UPDATE NOWAIT`). +/// 2. [`lock_entity_ids_for_erase`] acquires `FOR UPDATE` on `entity_ids` rows (erase scope only), +/// serializing with concurrent link creation (which needs `KEY SHARE` for FK checks). +/// +/// Without a transaction these locks would be released immediately, defeating the purpose. +/// +/// [`collect_entity_edition_ids`]: Self::collect_entity_edition_ids +/// [`lock_entity_ids_for_erase`]: Self::lock_entity_ids_for_erase +/// [`patch_entity`]: hash_graph_store::entity::EntityStore::patch_entity +impl PostgresStore> { + /// Finds entities matching `filter` and partitions them into full vs draft-only deletions. + /// + /// A published match (or a match that subsumes all drafts of a draft-only entity) produces + /// a [`FullEntityDeletionTarget`]. Draft matches on entities with a published version or + /// unmatched drafts remain [`DraftOnlyDeletionTarget`]. + async fn select_entities_for_deletion( + &self, + filter: &Filter<'_, Entity>, + include_drafts: bool, + decision_time: Timestamp, + transaction_time: Timestamp, + ) -> Result<(FullEntityDeletionTarget, DraftOnlyDeletionTarget), Report> { + let temporal_axes = QueryTemporalAxes::TransactionTime { + pinned: PinnedTemporalAxis::new(decision_time), + variable: VariableTemporalAxis::new( + TemporalBound::Inclusive(transaction_time), + LimitedTemporalBound::Inclusive(transaction_time), + ), + }; + + let mut compiler = SelectCompiler::new(Some(&temporal_axes), include_drafts); + compiler + .add_filter(filter) + .change_context(DeletionError::Store)?; + + let web_id_index = compiler.add_selection_path(&EntityQueryPath::WebId); + let entity_uuid_index = compiler.add_selection_path(&EntityQueryPath::Uuid); + let draft_id_index = compiler.add_selection_path(&EntityQueryPath::DraftId); + + let (statement, parameters) = compiler.compile(); + + // Empty draft ID vec → full entity deletion; non-empty → draft-only deletion. + let mut entity_ids = HashMap::<(WebId, EntityUuid), Vec>::new(); + + self.as_client() + .query_raw(&statement, parameters.iter().copied()) + .instrument(tracing::info_span!( + "SELECT entities for deletion", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store)? + .try_for_each(|row| { + let web_id: WebId = row.get(web_id_index); + let entity_uuid: EntityUuid = row.get(entity_uuid_index); + let draft_id: Option = row.get(draft_id_index); + + match (entity_ids.entry((web_id, entity_uuid)), draft_id) { + (Entry::Vacant(entry), Some(draft_id)) => { + entry.insert(vec![draft_id]); + } + (Entry::Vacant(entry), None) => { + entry.insert(Vec::new()); + } + (Entry::Occupied(mut entry), Some(draft_id)) => { + let tracked = entry.get_mut(); + if !tracked.is_empty() { + tracked.push(draft_id); + } + } + (Entry::Occupied(mut entry), None) => { + entry.get_mut().clear(); + } + } + + async { Ok(()) } + }) + .await + .change_context(DeletionError::Store)?; + + if include_drafts { + self.promote_draft_only_entities(&mut entity_ids).await?; + } + + let mut full_web_ids = Vec::new(); + let mut full_entity_uuids = Vec::new(); + let mut draft_ids = Vec::new(); + + for ((web_id, entity_uuid), drafts) in entity_ids { + if drafts.is_empty() { + full_web_ids.push(web_id); + full_entity_uuids.push(entity_uuid); + } else { + draft_ids.extend(drafts); + } + } + + Ok(( + FullEntityDeletionTarget { + web_ids: full_web_ids, + entity_uuids: full_entity_uuids, + }, + DraftOnlyDeletionTarget { draft_ids }, + )) + } + + /// Promotes draft-only entities to full deletes when eligible. + /// + /// An entity is promoted when all its drafts are in the matched set and no published version + /// exists, so that deletion provenance can be stamped on `entity_ids`. + async fn promote_draft_only_entities( + &self, + entity_ids: &mut HashMap<(WebId, EntityUuid), Vec>, + ) -> Result<(), Report> { + let (draft_only_web_ids, draft_only_entity_uuids) = entity_ids + .iter() + .filter(|(_, drafts)| !drafts.is_empty()) + .map(|((web_id, entity_uuid), _)| (*web_id, *entity_uuid)) + .collect::<(Vec, Vec)>(); + + if draft_only_web_ids.is_empty() { + return Ok(()); + } + + let all_matched_draft_ids = entity_ids + .iter() + .filter(|(_, drafts)| !drafts.is_empty()) + .flat_map(|(_, drafts)| drafts.iter().copied()) + .collect::>(); + + // Entities that have a published version or drafts outside our matched set. + let entities_with_remaining_data = self + .as_client() + .query_raw( + "SELECT DISTINCT web_id, entity_uuid + FROM entity_temporal_metadata + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + AND (draft_id IS NULL + OR NOT (draft_id = ANY($3::UUID[])))", + [ + &draft_only_web_ids as &(dyn ToSql + Sync), + &draft_only_entity_uuids, + &all_matched_draft_ids, + ], + ) + .instrument(tracing::info_span!( + "SELECT entities with remaining data", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store)? + .map_ok(|row| { + let web_id: WebId = row.get(0); + let entity_uuid: EntityUuid = row.get(1); + (web_id, entity_uuid) + }) + .try_collect::>() + .await + .change_context(DeletionError::Store)?; + + for (web_id, entity_uuid) in draft_only_web_ids.iter().zip(&draft_only_entity_uuids) { + let key = (*web_id, *entity_uuid); + if !entities_with_remaining_data.contains(&key) + && let Some(drafts) = entity_ids.get_mut(&key) + { + drafts.clear(); + } + } + + Ok(()) + } + + /// Collects edition IDs and acquires `FOR UPDATE` locks on `entity_temporal_metadata` rows. + /// + /// The `FOR UPDATE` lock serializes with concurrent [`patch_entity`] calls, which acquire + /// `FOR NO KEY UPDATE NOWAIT` via [`lock_entity_edition`]. This means: + /// + /// - If we lock first, the concurrent patch immediately fails with `RaceConditionOnUpdate`. + /// - If the patch locks first, we block until it commits. Since this is a separate statement + /// from [`select_entities_for_deletion`], it gets a fresh `READ COMMITTED` snapshot that + /// includes any edition IDs the patch inserted. + /// + /// The result may contain duplicate edition IDs (no `DISTINCT`, incompatible with + /// `FOR UPDATE`). Downstream `DELETE ... WHERE entity_edition_id = ANY(...)` handles + /// duplicates correctly. + /// + /// [`patch_entity`]: hash_graph_store::entity::EntityStore::patch_entity + /// [`lock_entity_edition`]: PostgresStore::lock_entity_edition + /// [`select_entities_for_deletion`]: Self::select_entities_for_deletion + async fn collect_entity_edition_ids( + &mut self, + target: DeletionTarget<'_>, + ) -> Result, Report> { + let rows = match target { + DeletionTarget::Full(entities) => self + .as_mut_client() + .query( + "SELECT entity_edition_id FROM entity_temporal_metadata + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + FOR UPDATE", + &[&entities.web_ids, &entities.entity_uuids], + ) + .instrument(tracing::info_span!( + "SELECT entity_edition_ids FOR UPDATE", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store)?, + DeletionTarget::Drafts(drafts) => self + .as_mut_client() + .query( + "SELECT entity_edition_id FROM entity_temporal_metadata + WHERE draft_id = ANY($1::UUID[]) + FOR UPDATE", + &[&drafts.draft_ids], + ) + .instrument(tracing::info_span!( + "SELECT entity_edition_ids FOR UPDATE", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store)?, + }; + Ok(rows + .into_iter() + .map(|row| row.get::<_, EntityEditionId>(0)) + .collect()) + } + + async fn delete_entity_embeddings( + &mut self, + target: DeletionTarget<'_>, + ) -> Result> { + match target { + DeletionTarget::Full(entities) => self + .as_mut_client() + .execute( + "DELETE FROM entity_embeddings + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + )", + &[&entities.web_ids, &entities.entity_uuids], + ) + .instrument(tracing::info_span!( + "DELETE entity_embeddings", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store), + DeletionTarget::Drafts(drafts) => self + .as_mut_client() + .execute( + "DELETE FROM entity_embeddings + WHERE draft_id = ANY($1::UUID[])", + &[&drafts.draft_ids], + ) + .instrument(tracing::info_span!( + "DELETE entity_embeddings", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store), + } + } + + async fn delete_entity_is_of_type( + &mut self, + edition_ids: &[EntityEditionId], + ) -> Result> { + self.as_mut_client() + .execute( + "DELETE FROM entity_is_of_type + WHERE entity_edition_id = ANY($1::UUID[])", + &[&edition_ids], + ) + .instrument(tracing::info_span!( + "DELETE entity_is_of_type", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store) + } + + async fn delete_entity_temporal_metadata( + &mut self, + target: DeletionTarget<'_>, + ) -> Result> { + match target { + DeletionTarget::Full(entities) => self + .as_mut_client() + .execute( + "DELETE FROM entity_temporal_metadata + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + )", + &[&entities.web_ids, &entities.entity_uuids], + ) + .instrument(tracing::info_span!( + "DELETE entity_temporal_metadata", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store), + DeletionTarget::Drafts(drafts) => self + .as_mut_client() + .execute( + "DELETE FROM entity_temporal_metadata + WHERE draft_id = ANY($1::UUID[])", + &[&drafts.draft_ids], + ) + .instrument(tracing::info_span!( + "DELETE entity_temporal_metadata", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store), + } + } + + async fn delete_entity_editions( + &mut self, + edition_ids: &[EntityEditionId], + ) -> Result> { + self.as_mut_client() + .execute( + "DELETE FROM entity_editions + WHERE entity_edition_id = ANY($1::UUID[])", + &[&edition_ids], + ) + .instrument(tracing::info_span!( + "DELETE entity_editions", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store) + } + + async fn delete_entity_drafts( + &mut self, + target: DeletionTarget<'_>, + ) -> Result> { + match target { + DeletionTarget::Full(entities) => self + .as_mut_client() + .execute( + "DELETE FROM entity_drafts + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + )", + &[&entities.web_ids, &entities.entity_uuids], + ) + .instrument(tracing::info_span!( + "DELETE entity_drafts", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store), + DeletionTarget::Drafts(drafts) => self + .as_mut_client() + .execute( + "DELETE FROM entity_drafts + WHERE draft_id = ANY($1::UUID[])", + &[&drafts.draft_ids], + ) + .instrument(tracing::info_span!( + "DELETE entity_drafts", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store), + } + } + + /// Removes all `entity_edge` rows belonging to entities in the deletion batch. + /// + /// Each link relationship is denormalized into four rows (two per endpoint): an `outgoing` row + /// (`source=link, target=endpoint`) and a paired `incoming` row (`source=endpoint, + /// target=link`). This deletes exactly the rows owned by batch entities: + /// - `source IN (batch) AND direction = 'outgoing'` — the canonical outgoing edges + /// - `target IN (batch) AND direction = 'incoming'` — the denormalized incoming copies + /// + /// Rows belonging to link entities *outside* the batch (where the batch entity appears as an + /// endpoint) are intentionally preserved — the tombstone in `entity_ids` satisfies their FKs. + /// + /// # Invariant + /// + /// Correctness depends on outgoing/incoming edges always being created in pairs. For every + /// `(source=endpoint, target=link, direction='incoming')` row there must exist a corresponding + /// `(source=link, target=endpoint, direction='outgoing')` row. If this pairing is broken + /// (e.g. by direct DB manipulation), [`count_incoming_links`](Self::count_incoming_links) may + /// miss the orphaned incoming row and [`delete_entity_ids`](Self::delete_entity_ids) will fail + /// with an FK violation. This invariant is enforced by application code in `create_entities`, + /// not by a database constraint. + /// + /// Only applies to full entity deletions (`entity_edge` has no `draft_id`). + async fn delete_entity_edge( + &mut self, + target: &FullEntityDeletionTarget, + ) -> Result> { + self.as_mut_client() + .execute( + "DELETE FROM entity_edge + WHERE ( + (source_web_id, source_entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + AND direction = 'outgoing' + ) + OR ( + (target_web_id, target_entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + AND direction = 'incoming' + )", + &[&target.web_ids, &target.entity_uuids], + ) + .instrument(tracing::info_span!( + "DELETE entity_edge", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store) + } + + /// Counts incoming links from entities outside the deletion batch. + /// + /// Only counts `direction = 'outgoing'` edges — these represent real link + /// relationships (a link entity's edge to its endpoint). Denormalized + /// `direction = 'incoming'` edges (stored for query optimization) are excluded + /// because they don't represent independent link relationships and are cleaned + /// up by [`delete_entity_edge`](Self::delete_entity_edge). + /// + /// # Invariant + /// + /// This check is sufficient to guard [`delete_entity_ids`](Self::delete_entity_ids) only if + /// every `direction = 'incoming'` row has a paired `direction = 'outgoing'` row (see + /// [`delete_entity_edge`](Self::delete_entity_edge) for details). If the pairing invariant is + /// violated, an orphaned incoming row (`source = batch_entity`) would not be counted here but + /// would still cause an FK violation when `entity_ids` is deleted. + async fn count_incoming_links( + &self, + target: &FullEntityDeletionTarget, + ) -> Result> { + let row = self + .as_client() + .query_one( + "SELECT COUNT(*) FROM entity_edge + WHERE (target_web_id, target_entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + AND (source_web_id, source_entity_uuid) NOT IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + AND direction = 'outgoing'", + &[&target.web_ids, &target.entity_uuids], + ) + .instrument(tracing::info_span!( + "SELECT COUNT incoming_links", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store)?; + Ok(row.get::<_, i64>(0).cast_unsigned()) + } + + /// Merges [`EntityDeletionProvenance`] into the existing `entity_ids` provenance JSONB. + async fn update_entity_ids_provenance( + &mut self, + target: &FullEntityDeletionTarget, + actor_id: ActorEntityUuid, + decision_time: Timestamp, + transaction_time: Timestamp, + ) -> Result> { + let provenance = EntityDeletionProvenance { + deleted_by_id: actor_id, + deleted_at_transaction_time: transaction_time, + deleted_at_decision_time: decision_time, + }; + self.as_mut_client() + .execute( + "UPDATE entity_ids + SET provenance = provenance || $3::jsonb + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + )", + &[ + &target.web_ids, + &target.entity_uuids, + &postgres_types::Json(&provenance), + ], + ) + .instrument(tracing::info_span!( + "UPDATE entity_ids provenance", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store) + } + + /// Acquires `FOR UPDATE` locks on `entity_ids` rows to serialize with concurrent link + /// creation (erase scope only). + /// + /// Concurrent `INSERT INTO entity_edge` performs an FK check that acquires a `KEY SHARE` lock + /// on the referenced `entity_ids` row. Our `FOR UPDATE` lock conflicts with `KEY SHARE`, + /// blocking the concurrent insert until we commit — at which point the row is gone and their + /// insert fails with an FK violation. + /// + /// This closes the TOCTOU gap between [`count_incoming_links`](Self::count_incoming_links) + /// (which reads `entity_edge`) and [`delete_entity_ids`](Self::delete_entity_ids) (which + /// removes the row). Without this lock, a concurrent transaction can insert an edge targeting + /// our entity between the check and the delete, causing a raw FK violation instead of a clean + /// [`DeletionError::IncomingLinksExist`]. + /// + /// Not needed for purge scope: the tombstoned `entity_ids` row satisfies FK checks from + /// concurrent link creation, so no lock conflict is required. + async fn lock_entity_ids_for_erase( + &mut self, + target: &FullEntityDeletionTarget, + ) -> Result<(), Report> { + self.as_mut_client() + .query( + "SELECT 1 FROM entity_ids + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + ) + FOR UPDATE", + &[&target.web_ids, &target.entity_uuids], + ) + .instrument(tracing::info_span!( + "LOCK entity_ids FOR UPDATE", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store)?; + Ok(()) + } + + /// Removes `entity_ids` rows entirely (erase scope, no tombstone). + async fn delete_entity_ids( + &mut self, + target: &FullEntityDeletionTarget, + ) -> Result> { + self.as_mut_client() + .execute( + "DELETE FROM entity_ids + WHERE (web_id, entity_uuid) IN ( + SELECT * FROM UNNEST($1::UUID[], $2::UUID[]) + )", + &[&target.web_ids, &target.entity_uuids], + ) + .instrument(tracing::info_span!( + "DELETE entity_ids", + otel.kind = "client", + db.system = "postgresql", + peer.service = "Postgres", + )) + .await + .change_context(DeletionError::Store) + } + + /// Deletes per-table data in FK-safe order for a single [`DeletionTarget`]. + /// + /// Returns per-table row counts for the caller to log. + async fn delete_target_data( + &mut self, + target: DeletionTarget<'_>, + ) -> Result> { + let edition_ids = self.collect_entity_edition_ids(target).await?; + let is_of_type = self.delete_entity_is_of_type(&edition_ids).await?; + let embeddings = self.delete_entity_embeddings(target).await?; + let temporal_metadata = self.delete_entity_temporal_metadata(target).await?; + let editions = self.delete_entity_editions(&edition_ids).await?; + let drafts = self.delete_entity_drafts(target).await?; + + Ok(SatelliteDeletionCounts { + is_of_type, + embeddings, + temporal_metadata, + editions, + drafts, + }) + } + + /// Selects matching entities, validates link constraints, deletes all associated data, + /// and either tombstones (purge) or fully removes (erase) `entity_ids`. + /// + /// # Errors + /// + /// - [`InvalidDecisionTime`] if `decision_time` exceeds `transaction_time` + /// - [`IncomingLinksExist`] if incoming links exist and [`LinkDeletionBehavior::Error`] or + /// [`DeletionScope::Erase`] is requested + /// - [`Store`] if a database operation fails + /// + /// [`InvalidDecisionTime`]: DeletionError::InvalidDecisionTime + /// [`IncomingLinksExist`]: DeletionError::IncomingLinksExist + /// [`Store`]: DeletionError::Store + pub(super) async fn execute_entity_deletion( + &mut self, + actor_id: ActorEntityUuid, + params: DeleteEntitiesParams<'_>, + ) -> Result> { + let transaction_time = Timestamp::::now(); + let decision_time = params + .decision_time + .unwrap_or_else(|| transaction_time.cast()); + + if decision_time > transaction_time.cast() { + return Err(Report::new(DeletionError::InvalidDecisionTime)); + } + + let (full_target, draft_target) = self + .select_entities_for_deletion( + ¶ms.filter, + params.include_drafts, + decision_time, + transaction_time, + ) + .await?; + + let summary = DeletionSummary { + full_entities: full_target.web_ids.len(), + draft_deletions: draft_target.draft_ids.len(), + }; + + if summary.full_entities == 0 && summary.draft_deletions == 0 { + return Ok(summary); + } + + if summary.full_entities > 0 { + if matches!(params.scope, DeletionScope::Erase) { + self.lock_entity_ids_for_erase(&full_target).await?; + } + + let should_check = match ¶ms.scope { + DeletionScope::Purge { link_behavior } => { + matches!(link_behavior, LinkDeletionBehavior::Error) + } + DeletionScope::Erase => true, + }; + if should_check { + let count = self.count_incoming_links(&full_target).await?; + if count > 0 { + return Err(Report::new(DeletionError::IncomingLinksExist { count })); + } + } + } + + let mut satellite_counts = SatelliteDeletionCounts::default(); + if summary.full_entities > 0 { + satellite_counts += self + .delete_target_data(DeletionTarget::Full(&full_target)) + .await?; + } + if summary.draft_deletions > 0 { + satellite_counts += self + .delete_target_data(DeletionTarget::Drafts(&draft_target)) + .await?; + } + + let mut entity_edge = 0_u64; + let mut entity_ids_affected = 0_u64; + if summary.full_entities > 0 { + entity_edge = self.delete_entity_edge(&full_target).await?; + + let expected = full_target.web_ids.len() as u64; + match params.scope { + DeletionScope::Purge { .. } => { + entity_ids_affected = self + .update_entity_ids_provenance( + &full_target, + actor_id, + decision_time, + transaction_time, + ) + .await?; + if entity_ids_affected != expected { + return Err(Report::new(DeletionError::InconsistentEntityIds { + expected, + actual: entity_ids_affected, + })); + } + } + DeletionScope::Erase => { + entity_ids_affected = self.delete_entity_ids(&full_target).await?; + if entity_ids_affected != expected { + return Err(Report::new(DeletionError::InconsistentEntityIds { + expected, + actual: entity_ids_affected, + })); + } + } + } + } + + tracing::trace!( + full_entities = summary.full_entities, + draft_deletions = summary.draft_deletions, + is_of_type = satellite_counts.is_of_type, + embeddings = satellite_counts.embeddings, + temporal_metadata = satellite_counts.temporal_metadata, + editions = satellite_counts.editions, + drafts = satellite_counts.drafts, + entity_edge, + entity_ids_affected, + "entity deletion complete" + ); + + Ok(summary) + } +} diff --git a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/mod.rs b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/mod.rs index 5bf8edd0fb8..c3a82333bb5 100644 --- a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/mod.rs +++ b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/mod.rs @@ -1,3 +1,4 @@ +mod delete; mod query; mod read; use alloc::borrow::Cow; @@ -15,15 +16,16 @@ use hash_graph_authorization::policies::{ }; use hash_graph_store::{ entity::{ - CountEntitiesParams, CreateEntityParams, EmptyEntityTypes, EntityPermissions, - EntityQueryCursor, EntityQueryPath, EntityQuerySorting, EntityStore, EntityTypeRetrieval, - EntityTypesError, EntityValidationReport, EntityValidationType, - HasPermissionForEntitiesParams, PatchEntityParams, QueryConversion, QueryEntitiesParams, - QueryEntitiesResponse, QueryEntitySubgraphParams, QueryEntitySubgraphResponse, - UpdateEntityEmbeddingsParams, ValidateEntityComponents, ValidateEntityParams, + CountEntitiesParams, CreateEntityParams, DeleteEntitiesParams, DeletionSummary, + EmptyEntityTypes, EntityPermissions, EntityQueryCursor, EntityQueryPath, + EntityQuerySorting, EntityStore, EntityTypeRetrieval, EntityTypesError, + EntityValidationReport, EntityValidationType, HasPermissionForEntitiesParams, + PatchEntityParams, QueryConversion, QueryEntitiesParams, QueryEntitiesResponse, + QueryEntitySubgraphParams, QueryEntitySubgraphResponse, UpdateEntityEmbeddingsParams, + ValidateEntityComponents, ValidateEntityParams, }, entity_type::{EntityTypeQueryPath, EntityTypeStore as _, IncludeEntityTypeOption}, - error::{CheckPermissionError, InsertionError, QueryError, UpdateError}, + error::{CheckPermissionError, DeletionError, InsertionError, QueryError, UpdateError}, filter::{ Filter, FilterExpression, FilterExpressionList, Parameter, ParameterList, protection::transform_filter, @@ -87,7 +89,7 @@ use uuid::Uuid; use crate::store::{ AsClient, PostgresStore, - error::{DeletionError, EntityDoesNotExist, RaceConditionOnUpdate}, + error::{EntityDoesNotExist, RaceConditionOnUpdate}, postgres::{ ResponseCountMap, TraversalContext, crud::{QueryIndices, TypedRow}, @@ -407,9 +409,9 @@ where /// /// # Errors /// - /// Returns [`DeletionError`] if the database deletion operation fails. + /// Returns [`DeletionError::Store`] if the database deletion operation fails. #[tracing::instrument(level = "info", skip(self))] - pub async fn delete_entities(&self) -> Result<(), Report> { + pub async fn delete_all_entities(&self) -> Result<(), Report> { tracing::debug!("Deleting all entities"); self.as_client() .client() @@ -431,7 +433,7 @@ where peer.service = "Postgres", )) .await - .change_context(DeletionError)?; + .change_context(DeletionError::Store)?; Ok(()) } @@ -1074,6 +1076,7 @@ where .draft_id .is_none() .then_some(decision_time), + deletion: None, }, edition: EntityEditionProvenance { created_by_id: actor_uuid, @@ -2338,6 +2341,29 @@ where Ok(entity) } + #[tracing::instrument(level = "info", skip(self))] + async fn delete_entities( + &mut self, + actor_id: ActorEntityUuid, + params: DeleteEntitiesParams<'_>, + ) -> Result> { + // TODO: Authorization — check delete permission via PolicyComponents + + let mut transaction = self + .transaction() + .await + .change_context(DeletionError::Store)?; + let summary = transaction + .execute_entity_deletion(actor_id, params) + .await?; + transaction + .commit() + .await + .change_context(DeletionError::Store)?; + + Ok(summary) + } + #[tracing::instrument(level = "info", skip(self, params))] #[expect(clippy::too_many_lines)] async fn update_entity_embeddings( diff --git a/libs/@local/graph/postgres-store/tests/common/mod.rs b/libs/@local/graph/postgres-store/tests/common/mod.rs new file mode 100644 index 00000000000..6bfc8480afc --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/common/mod.rs @@ -0,0 +1,70 @@ +use hash_graph_postgres_store::{ + Environment, load_env, + store::{ + DatabaseConnectionInfo, DatabasePoolConfig, DatabaseType, PostgresStorePool, + PostgresStoreSettings, + }, +}; +use hash_graph_store::pool::StorePool; +use hash_telemetry::logging::env_filter; +use tokio_postgres::NoTls; + +pub(crate) fn init_logging() { + let _: Result<_, _> = tracing_subscriber::fmt() + .with_ansi(true) + .with_env_filter(env_filter(None)) + .with_file(true) + .with_line_number(true) + .with_test_writer() + .try_init(); +} + +pub struct DatabaseTestWrapper { + _pool: PostgresStorePool, + pub connection: ::Store<'static>, +} + +impl DatabaseTestWrapper { + pub async fn new() -> Self { + load_env(Environment::Test); + init_logging(); + + let user = std::env::var("HASH_GRAPH_PG_USER").unwrap_or_else(|_| "graph".to_owned()); + let password = + std::env::var("HASH_GRAPH_PG_PASSWORD").unwrap_or_else(|_| "graph".to_owned()); + let host = std::env::var("HASH_GRAPH_PG_HOST").unwrap_or_else(|_| "localhost".to_owned()); + let port = std::env::var("HASH_GRAPH_PG_PORT").map_or(5432, |port| { + port.parse::().expect("could not parse port") + }); + let database = + std::env::var("HASH_GRAPH_PG_DATABASE").unwrap_or_else(|_| "graph".to_owned()); + + let connection_info = DatabaseConnectionInfo::new( + DatabaseType::Postgres, + user, + password, + host, + port, + database, + ); + + let pool = PostgresStorePool::new( + &connection_info, + &DatabasePoolConfig::default(), + NoTls, + PostgresStoreSettings::default(), + ) + .await + .expect("could not connect to database"); + + let connection = pool + .acquire_owned(None) + .await + .expect("could not acquire a database connection"); + + Self { + _pool: pool, + connection, + } + } +} diff --git a/libs/@local/graph/postgres-store/tests/deletion/drafts.rs b/libs/@local/graph/postgres-store/tests/deletion/drafts.rs new file mode 100644 index 00000000000..8b6dd2876f3 --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/deletion/drafts.rs @@ -0,0 +1,772 @@ +use std::collections::HashSet; + +use hash_graph_store::{ + entity::{ + DeleteEntitiesParams, DeletionScope, DeletionSummary, EntityStore as _, + LinkDeletionBehavior, PatchEntityParams, + }, + filter::Filter, +}; +use type_system::knowledge::entity::EntityId; + +use crate::{ + DatabaseTestWrapper, alice, bob, count_entity, create_person, get_deletion_provenance, + provenance, raw_count_by_draft_id, raw_count_entity_edge, seed, +}; + +/// Promotes a draft-only entity to a full delete when all drafts are matched. +/// +/// Entity created with `draft: true` and never published. With `include_drafts: true`, +/// `select_entities_for_deletion` puts the entity in the draft-only bucket. Then +/// `promote_draft_only_entities` queries `entity_temporal_metadata` (across ALL temporal history, +/// no time restriction) for rows where `draft_id IS NULL OR NOT (draft_id = ANY(matched_drafts))`. +/// Since there's no published version and no unmatched drafts, the query returns nothing → draft +/// vec is cleared → entity becomes a full target → `entity_ids` gets tombstone provenance via +/// `update_entity_ids_provenance`. +#[tokio::test] +async fn draft_only_entity_promoted_to_full_delete() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), true).await; + let entity_id = entity.metadata.record_id.entity_id; + let base_id = EntityId { + web_id: entity_id.web_id, + entity_uuid: entity_id.entity_uuid, + draft_id: None, + }; + + assert!(count_entity(&api, base_id, true).await >= 1); + assert_eq!(count_entity(&api, base_id, false).await, 0); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(base_id), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("delete should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, base_id, true).await, 0); + assert!( + get_deletion_provenance(&api, base_id.web_id, base_id.entity_uuid) + .await + .is_some() + ); +} + +/// Deletes only the draft of an entity that also has a published version. +/// +/// The published version's temporal row has `draft_id IS NULL`, which matches the first branch of +/// the promotion query (`draft_id IS NULL OR NOT (draft_id = ANY(...))`). This puts the entity in +/// `entities_with_remaining_data`, preventing promotion. The entity stays as a +/// `DraftOnlyDeletionTarget`. Only `delete_target_data(Drafts)` runs — deleting the draft's +/// temporal metadata, edition, and draft row by `draft_id`. `entity_ids` is untouched and receives +/// no deletion provenance. The published version remains fully queryable. +#[tokio::test] +async fn draft_of_published_entity_preserves_published() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Create a draft + let patched = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft"); + + let draft_entity_id = patched.metadata.record_id.entity_id; + let draft_id = draft_entity_id + .draft_id + .expect("patch should produce draft_id"); + + assert!(count_entity(&api, entity_id, false).await >= 1); + assert!(count_entity(&api, entity_id, true).await >= 2); + + // Delete filtering by the specific draft_id + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(draft_entity_id), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("draft deletion should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 1, + } + ); + + // Published survives + assert!(count_entity(&api, entity_id, false).await >= 1); + assert_eq!( + count_entity(&api, entity_id, true).await, + count_entity(&api, entity_id, false).await + ); + + // Draft temporal metadata gone + assert_eq!( + raw_count_by_draft_id(&api, "entity_temporal_metadata", draft_id).await, + 0 + ); + + // No tombstone (draft-only target) + assert!( + get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .is_none() + ); +} + +/// Skips draft entities when `include_drafts` is false. +/// +/// [`SelectCompiler`] adds `draft_id IS NULL` when `include_drafts=false`. A draft-only entity has +/// all temporal rows with `draft_id IS NOT NULL`, so none match. Note that +/// `Filter::for_entity_by_entity_id` with `draft_id: None` does NOT add a `draft_id IS NULL` +/// constraint itself — the restriction comes entirely from `include_drafts`. +#[tokio::test] +async fn include_drafts_false_skips_drafts() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), true).await; + let entity_id = entity.metadata.record_id.entity_id; + let base_id = EntityId { + web_id: entity_id.web_id, + entity_uuid: entity_id.entity_uuid, + draft_id: None, + }; + + assert!(count_entity(&api, base_id, true).await >= 1); + assert_eq!(count_entity(&api, base_id, false).await, 0); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(base_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("delete should succeed (noop)"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 0, + } + ); + + // Draft still exists + assert!(count_entity(&api, base_id, true).await >= 1); +} + +/// Does not promote when only some drafts of an entity are matched. +/// +/// A published entity has 2 drafts. The filter only matches 1 draft. +/// `promote_draft_only_entities` finds remaining data via +/// `draft_id IS NULL OR NOT (draft_id = ANY(matched_drafts))` — the published row's `draft_id IS +/// NULL` already prevents promotion, and the unmatched draft further blocks it. Only the matched +/// draft's temporal/edition/draft rows are deleted via `DraftOnlyDeletionTarget`. The published +/// version, unmatched draft, and `entity_ids` survive without deletion provenance. +#[tokio::test] +async fn partial_draft_match_not_promoted() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Published entity + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Create 2 drafts by patching the published entity twice + let patched_1 = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create first draft"); + + let draft_entity_id_1 = patched_1.metadata.record_id.entity_id; + let draft_id_1 = draft_entity_id_1 + .draft_id + .expect("first patch should produce draft_id"); + + let patched_2 = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create second draft"); + + let draft_id_2 = patched_2 + .metadata + .record_id + .entity_id + .draft_id + .expect("second patch should produce draft_id"); + + assert_ne!(draft_id_1, draft_id_2); + // Published + 2 drafts + assert!(count_entity(&api, entity_id, true).await >= 3); + + // Delete only draft_id_1 + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(draft_entity_id_1), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("partial draft deletion should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 1, + } + ); + + assert_eq!( + raw_count_by_draft_id(&api, "entity_temporal_metadata", draft_id_1).await, + 0 + ); + assert!(raw_count_by_draft_id(&api, "entity_temporal_metadata", draft_id_2).await > 0); + + // Published version + unmatched draft survive + assert!(count_entity(&api, entity_id, false).await >= 1); + assert!(count_entity(&api, entity_id, true).await >= 2); + + // Not promoted → no tombstone + assert!( + get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .is_none() + ); +} + +/// Upgrades to full delete when both published and draft rows match. +/// +/// Requires `include_drafts: true` so [`SelectCompiler`] does NOT add `draft_id IS NULL`. The +/// filter uses `Filter::for_entity_by_entity_id` with `draft_id: None`, which matches on `(web_id, +/// entity_uuid)` only — returning both published and draft rows. The `HashMap` partitioning handles +/// this: a draft row arrives as `Vacant + Some(draft_id)` → `vec![draft_id]`, then the published +/// row arrives as `Occupied + None` → `entry.clear()` → empty vec = full target. Both published and +/// draft data are removed; `entity_ids` is tombstoned. +#[tokio::test] +async fn published_and_draft_matched_becomes_full_delete() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Create a draft + api.store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft"); + + assert!(count_entity(&api, entity_id, false).await >= 1); + assert!(count_entity(&api, entity_id, true).await >= 2); + + // Delete with include_drafts=true, filter by entity UUID (no draft_id) → matches both + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("full delete should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, entity_id, true).await, 0); + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert!( + get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .is_some() + ); +} + +/// Handles full and draft-only targets simultaneously in a single call. +/// +/// Entity A (published, matched by filter) produces a full target. Entity B (published + draft, +/// only draft matched) produces a draft-only target (published version blocks promotion). Both +/// branches of `delete_target_data` execute in the same `execute_entity_deletion` call. A gets +/// tombstoned with provenance, B's draft is removed but B's published version and `entity_ids` +/// survive without deletion provenance. +#[tokio::test] +async fn mixed_full_and_draft_targets() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Entity A: published, will be fully deleted + let entity_a = create_person(&mut api, alice(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + + // Entity B: published + draft, only draft will be deleted + let entity_b = create_person(&mut api, bob(), false).await; + let id_b = entity_b.metadata.record_id.entity_id; + + let patched_b = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id: id_b, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft of B"); + + let b_draft_entity_id = patched_b.metadata.record_id.entity_id; + + // Filter: A (full match via entity UUID) + B's specific draft + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(b_draft_entity_id), + ]), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("mixed deletion should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 1, + } + ); + + // A: fully deleted with tombstone + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert!( + get_deletion_provenance(&api, id_a.web_id, id_a.entity_uuid) + .await + .is_some() + ); + + // B: published survives, draft gone, no tombstone + assert!(count_entity(&api, id_b, false).await >= 1); + assert_eq!( + count_entity(&api, id_b, true).await, + count_entity(&api, id_b, false).await + ); + assert!( + get_deletion_provenance(&api, id_b.web_id, id_b.entity_uuid) + .await + .is_none() + ); +} + +/// Verifies the empty-target guards when only one target type has data. +/// +/// Create a published entity with a draft. Delete only the draft with `include_drafts: true` — the +/// published version blocks promotion, so `FullEntityDeletionTarget` is empty and +/// `DraftOnlyDeletionTarget` has the draft. The guards `!full_target.web_ids.is_empty()` and +/// `!draft_target.draft_ids.is_empty()` must correctly skip the empty branch without errors. In +/// particular, the empty full target must not trigger `delete_entity_edge`, `count_incoming_links`, +/// or `update_entity_ids_provenance`. +#[tokio::test] +async fn empty_target_guards() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let patched = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft"); + + let draft_entity_id = patched.metadata.record_id.entity_id; + let draft_id = draft_entity_id.draft_id.expect("should be draft"); + + // Delete only the draft with Error behavior (full target is empty → link check skipped) + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(draft_entity_id), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect("draft-only deletion should succeed even with Error behavior"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 1, + } + ); + + assert!(count_entity(&api, entity_id, false).await >= 1); + assert_eq!( + raw_count_by_draft_id(&api, "entity_temporal_metadata", draft_id).await, + 0 + ); + assert!( + get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .is_none() + ); +} + +/// Draft link entity's `entity_edge` row survives draft-only deletion. +/// +/// A published link entity L(A→B) has a draft created via `patch_entity(draft: Some(true))`. +/// `delete_entity_edge` only runs for full targets, so deleting just the draft leaves +/// `entity_edge` intact (the published link still exists). Deleting the published entity +/// afterwards triggers full deletion which cleans up the edge rows. +#[tokio::test] +async fn draft_link_entity_edge_survives() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + // Create published link entity L A→B + let link = crate::create_link(&mut api, id_a, id_b).await; + let link_entity_id = link.metadata.record_id.entity_id; + assert!(link_entity_id.draft_id.is_none()); + + // Create a draft of the link entity + let patched_link = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id: link_entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft of link"); + + let draft_link_entity_id = patched_link.metadata.record_id.entity_id; + assert!(draft_link_entity_id.draft_id.is_some()); + + assert!( + raw_count_entity_edge(&api, link_entity_id.web_id, link_entity_id.entity_uuid).await > 0 + ); + + // Step 1: Delete only the draft → edge survives + let summary1 = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(draft_link_entity_id), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("deleting draft should succeed"); + + assert_eq!( + summary1, + DeletionSummary { + full_entities: 0, + draft_deletions: 1, + } + ); + + // Edge survives (draft-only deletion, published link still exists) + assert!( + raw_count_entity_edge(&api, link_entity_id.web_id, link_entity_id.entity_uuid).await > 0 + ); + assert!(count_entity(&api, link_entity_id, false).await >= 1); + + // Step 2: Delete the published entity → full deletion, edge cleaned up + let summary2 = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(link_entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("deleting published link should succeed"); + + assert_eq!( + summary2, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + // Edge cleaned up (full target deletion) + assert_eq!( + raw_count_entity_edge(&api, link_entity_id.web_id, link_entity_id.entity_uuid).await, + 0 + ); + assert_eq!(count_entity(&api, link_entity_id, true).await, 0); +} + +/// `DeletionSummary.draft_deletions` counts individual draft IDs, not entities. +/// +/// A published entity has 2 drafts. With `include_drafts: true` and a filter matching both +/// drafts specifically, `DraftOnlyDeletionTarget.draft_ids` has 2 entries. The published version +/// blocks promotion, so `DeletionSummary.draft_deletions` must be 2 (the number of draft records +/// deleted), not 1 (the number of entities affected), and `full_entities` must be 0. +#[tokio::test] +async fn summary_counts_draft_ids_not_entities() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Published entity + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Create 2 drafts by patching the published entity twice + let patched_1 = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create first draft"); + + let draft_entity_id_1 = patched_1.metadata.record_id.entity_id; + let draft_id_1 = draft_entity_id_1 + .draft_id + .expect("first patch should produce draft_id"); + + let patched_2 = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create second draft"); + + let draft_entity_id_2 = patched_2.metadata.record_id.entity_id; + let draft_id_2 = draft_entity_id_2 + .draft_id + .expect("second patch should produce draft_id"); + + assert_ne!(draft_id_1, draft_id_2); + // Published + 2 drafts + assert!(count_entity(&api, entity_id, true).await >= 3); + + // Delete both drafts (but not the published version) + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::Any(vec![ + Filter::for_entity_by_entity_id(draft_entity_id_1), + Filter::for_entity_by_entity_id(draft_entity_id_2), + ]), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("deleting 2 drafts should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 2, + } + ); + + // Published version survives + assert!(count_entity(&api, entity_id, false).await >= 1); + // No drafts remain + assert_eq!( + count_entity(&api, entity_id, true).await, + count_entity(&api, entity_id, false).await + ); +} diff --git a/libs/@local/graph/postgres-store/tests/deletion/erase.rs b/libs/@local/graph/postgres-store/tests/deletion/erase.rs new file mode 100644 index 00000000000..5144aab75f3 --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/deletion/erase.rs @@ -0,0 +1,455 @@ +use std::collections::HashSet; + +use hash_graph_store::{ + entity::{ + CreateEntityParams, DeleteEntitiesParams, DeletionScope, DeletionSummary, EntityStore as _, + PatchEntityParams, + }, + filter::Filter, +}; +use type_system::knowledge::property::{ + Property, PropertyObjectWithMetadata, PropertyPatchOperation, PropertyPath, + PropertyWithMetadata, +}; + +use crate::{ + DatabaseTestWrapper, alice, count_entity, create_person, get_deletion_provenance, + person_type_id, provenance, raw_count, raw_entity_ids_exists, seed, +}; + +/// Erases the `entity_ids` row entirely, leaving no tombstone. +/// +/// After erase, `entity_ids` has zero rows for the `(web_id, entity_uuid)` pair. Unlike purge +/// (which calls `update_entity_ids_provenance` to stamp a tombstone), erase calls +/// `delete_entity_ids` to remove the row completely. `count_incoming_links` always runs for erase +/// scope to prevent FK violations from `entity_edge.target → entity_ids`. +#[tokio::test] +async fn removes_entity_ids_row() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert!(!raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); +} + +/// Verifies all satellite tables are cleaned after erase. +/// +/// Same `delete_target_data` FK-safe ordering as purge (collect edition IDs → `entity_is_of_type` → +/// `entity_embeddings` → `entity_temporal_metadata` → `entity_editions` → `entity_drafts`), +/// followed by `delete_entity_edge` and `delete_entity_ids`. All tables with +/// `(web_id, entity_uuid)` columns must have zero rows for the entity. +#[tokio::test] +async fn satellite_tables_cleaned() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + let web_id = entity_id.web_id; + let entity_uuid = entity_id.entity_uuid; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await, + 0 + ); + assert_eq!( + raw_count(&api, "entity_embeddings", web_id, entity_uuid).await, + 0 + ); + assert_eq!( + raw_count(&api, "entity_drafts", web_id, entity_uuid).await, + 0 + ); + // entity_editions and entity_is_of_type have no (web_id, entity_uuid) columns — verified + // implicitly: deletion succeeded without FK violations and temporal metadata is empty. + + assert!(!raw_entity_ids_exists(&api, web_id, entity_uuid).await); +} + +/// Erases an entity with multiple temporal editions. +/// +/// `collect_entity_edition_ids` captures all historical edition IDs from `entity_temporal_metadata` +/// without temporal restriction. All editions, temporal rows, and the `entity_ids` row must be +/// completely gone. +#[tokio::test] +async fn entity_with_history() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Create a second edition via patch (must change properties to create new edition) + api.store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![PropertyPatchOperation::Replace { + path: PropertyPath::default(), + property: PropertyWithMetadata::from_parts( + Property::Object(crate::bob()), + None, + ) + .expect("could not create property with metadata"), + }], + draft: None, + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not patch entity"); + + assert!(count_entity(&api, entity_id, false).await >= 2); + + let web_id = entity_id.web_id; + let entity_uuid = entity_id.entity_uuid; + assert!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await >= 2, + "entity must have >= 2 temporal metadata rows before erase" + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert_eq!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await, + 0, + "all temporal metadata rows must be erased, not just the current edition" + ); + assert!(!raw_entity_ids_exists(&api, web_id, entity_uuid).await); +} + +/// Erasing the same entity twice succeeds silently on the second call. +/// +/// Unlike purge (where the `entity_ids` tombstone survives), erase removes `entity_ids` too. The +/// second call's [`SelectCompiler`] queries `entity_temporal_metadata` which is also gone — no rows +/// match, empty targets, immediate `Ok(())`. There is literally no trace of the entity in any +/// table. +#[tokio::test] +async fn double_deletion_is_noop() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary1 = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("first erase failed"); + + assert_eq!( + summary1, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + let summary2 = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("second erase should not fail"); + + assert_eq!( + summary2, + DeletionSummary { + full_entities: 0, + draft_deletions: 0, + } + ); + + assert!(!raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); +} + +/// Re-creates an entity with the same UUID after erase. +/// +/// After erase, `entity_ids` is gone and `delete_target_data` has removed all satellite rows. +/// Creating a new entity with the same `entity_uuid` must succeed without FK violations from +/// orphaned references in any table. Critical for the "erase = no trace" contract. The new entity +/// should be fully functional (queryable, patchable). +#[tokio::test] +async fn entity_reuse_after_erase() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + let reuse_uuid = entity_id.entity_uuid; + + api.store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase should succeed"); + + assert!(!raw_entity_ids_exists(&api, entity_id.web_id, reuse_uuid).await); + + // Re-create with the same UUID + let new_entity = api + .store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: entity_id.web_id, + entity_uuid: Some(reuse_uuid), + decision_time: None, + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(alice(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("re-creating entity with same UUID should succeed"); + + let new_id = new_entity.metadata.record_id.entity_id; + assert!(count_entity(&api, new_id, false).await >= 1); + + // Verify it's fully functional by patching + api.store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id: new_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: None, + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("patching re-created entity should succeed"); +} + +/// Erases a draft-only entity that was promoted to a full target. +/// +/// Draft-only entity with all drafts matched → `promote_draft_only_entities` clears the draft vec +/// (no published version and no unmatched drafts found in `entity_temporal_metadata`) → entity +/// becomes a full target → Erase scope calls `delete_entity_ids` instead of +/// `update_entity_ids_provenance`. The `entity_ids` row must be gone, not tombstoned. +#[tokio::test] +async fn promoted_draft_only_entity() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), true).await; + let entity_id = entity.metadata.record_id.entity_id; + let base_entity_id = type_system::knowledge::entity::EntityId { + web_id: entity_id.web_id, + entity_uuid: entity_id.entity_uuid, + draft_id: None, + }; + + assert!(count_entity(&api, base_entity_id, true).await >= 1); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(base_entity_id), + include_drafts: true, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase of promoted draft should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, base_entity_id, true).await, 0); + assert!(!raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); +} + +/// Erase scope with partial draft match does NOT delete `entity_ids`. +/// +/// Entity B has a published version and a draft. Filter matches only the draft with +/// `include_drafts: true`. The published version blocks promotion → draft-only target. +/// `execute_entity_deletion` only calls `delete_entity_ids` for full targets, so `entity_ids` +/// survives despite `DeletionScope::Erase`. The draft's temporal/edition/drafts rows are removed +/// but the published entity is untouched. +/// +/// This is a semantic edge case: callers passing `Erase` should not assume complete removal +/// when the filter only captures drafts of an entity with published data. +#[tokio::test] +async fn erase_partial_draft_preserves_entity_ids() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Create a draft + let patched = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft"); + + let draft_entity_id = patched.metadata.record_id.entity_id; + assert!(draft_entity_id.draft_id.is_some()); + + assert!(count_entity(&api, entity_id, true).await >= 2); + + // Erase filtering only the draft + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(draft_entity_id), + include_drafts: true, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase of draft should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 1, + } + ); + + // Published version survives + assert!(count_entity(&api, entity_id, false).await >= 1); + assert_eq!( + count_entity(&api, entity_id, true).await, + count_entity(&api, entity_id, false).await + ); + + // entity_ids is NOT deleted despite Erase scope (draft-only target) + assert!(raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); + assert!( + get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .is_none() + ); +} diff --git a/libs/@local/graph/postgres-store/tests/deletion/links.rs b/libs/@local/graph/postgres-store/tests/deletion/links.rs new file mode 100644 index 00000000000..4898bdb9d4b --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/deletion/links.rs @@ -0,0 +1,762 @@ +use core::assert_matches; +use std::collections::HashSet; + +use hash_graph_store::{ + entity::{ + DeleteEntitiesParams, DeletionScope, DeletionSummary, EntityStore as _, + LinkDeletionBehavior, PatchEntityParams, + }, + error::DeletionError, + filter::Filter, +}; + +use crate::{ + DatabaseTestWrapper, alice, bob, count_entity, create_link, create_person, + get_deletion_provenance, provenance, raw_count_entity_edge, raw_count_entity_edge_any, + raw_entity_ids_exists, seed, +}; + +/// Rejects purge with [`LinkDeletionBehavior::Error`] when incoming links exist. +/// +/// Creates A, B, and link entity L (A→B). L has an immutable `entity_edge` row with `source=L, +/// target=B`. Purging B with `Error` behavior triggers `count_incoming_links`, which finds L's edge +/// targeting B (L is outside the deletion batch) and returns [`DeletionError::IncomingLinksExist`]. +#[tokio::test] +async fn purge_error_rejects_with_incoming_links() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let _link = create_link(&mut api, id_a, id_b).await; + + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect_err("purge with Error should fail when incoming links exist"); + + assert_matches!( + err.current_context(), + DeletionError::IncomingLinksExist { count } if *count >= 1 + ); + + // Entity B must be completely intact after the error (transaction rolled back) + assert!(count_entity(&api, id_b, false).await >= 1); + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); + assert!( + get_deletion_provenance(&api, id_b.web_id, id_b.entity_uuid) + .await + .is_none() + ); +} + +/// Succeeds with [`LinkDeletionBehavior::Ignore`] despite incoming links. +/// +/// Creates A, B, and link entity L (A→B). Purges B with `Ignore` behavior — the link check is +/// skipped entirely. B's `entity_ids` row survives as a tombstone with deletion provenance. L's +/// `entity_edge` row (target=B) remains valid because the FK `entity_edge.target → entity_ids` +/// still points to B's tombstone. +#[tokio::test] +async fn purge_ignore_succeeds_with_incoming_links() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("purge with Ignore should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + // B is tombstoned + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); + assert!( + get_deletion_provenance(&api, id_b.web_id, id_b.entity_uuid) + .await + .is_some() + ); + + // L's entity_edge rows still exist (L is intact, its edges point to B's tombstone) + assert!(raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await > 0); +} + +/// Rejects erase when incoming links exist, regardless of link behavior. +/// +/// [`DeletionScope::Erase`] always runs `count_incoming_links` because `delete_entity_ids` would +/// violate the FK `entity_edge.target → entity_ids` if incoming edges exist. The explicit check +/// provides a clean [`DeletionError::IncomingLinksExist`] instead of a raw PostgreSQL FK violation. +#[tokio::test] +async fn erase_rejects_with_incoming_links() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let _link = create_link(&mut api, id_a, id_b).await; + + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect_err("erase should fail when incoming links exist"); + + assert_matches!( + err.current_context(), + DeletionError::IncomingLinksExist { count } if *count >= 1 + ); + + // Entity B must be completely intact after the error (transaction rolled back) + assert!(count_entity(&api, id_b, false).await >= 1); + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); + assert!( + get_deletion_provenance(&api, id_b.web_id, id_b.entity_uuid) + .await + .is_none() + ); +} + +/// Purges a link entity and verifies ALL `entity_edge` rows are removed. +/// +/// `entity_edge` stores bidirectional rows per link: +/// +/// ```text +/// Link L: A(left) → B(right) produces 4 rows: +/// source=L, target=A, kind=has-left-entity, direction=outgoing +/// source=A, target=L, kind=has-left-entity, direction=incoming ← reversed +/// source=L, target=B, kind=has-right-entity, direction=outgoing +/// source=B, target=L, kind=has-right-entity, direction=incoming ← reversed +/// ``` +/// +/// `delete_entity_edge` must clean up both the outgoing rows (source=L) AND the reversed +/// incoming-direction rows (target=L). Without cleaning up the reversed rows: +/// - Purge: orphaned rows with FK to tombstone (data leak) +/// - Erase: FK violation when `entity_ids` is deleted +#[tokio::test] +async fn purge_link_entity_removes_all_edges() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + // Verify edges exist before deletion + assert_eq!( + raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await, + 4 + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_link), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("purge link entity should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + // All 4 entity_edge rows should be gone + assert_eq!( + raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await, + 0 + ); + + // Endpoints are unaffected + assert!(raw_entity_ids_exists(&api, id_a.web_id, id_a.entity_uuid).await); + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); + assert!( + get_deletion_provenance(&api, id_a.web_id, id_a.entity_uuid) + .await + .is_none() + ); + assert!( + get_deletion_provenance(&api, id_b.web_id, id_b.entity_uuid) + .await + .is_none() + ); +} + +/// Links within the deletion batch are excluded from the incoming-link count. +/// +/// Creates A, B, and link L (A→B). Purges all three together with `Error` behavior. +/// `count_incoming_links` uses `(target_web_id, target_entity_uuid) IN (batch) AND (source_web_id, +/// source_entity_uuid) NOT IN (batch)`. Since L (the source of the edge to B) is also in the +/// deletion batch, L's edge is excluded from the count and B's deletion is not blocked. +/// +/// **Note**: `entity_edge` stores reversed (incoming-direction) rows. For L in the batch, its +/// reversed rows have `source=A/B, target=L`. If A and B are also in the batch, those sources are +/// excluded from the count too. This test must include A+B+L in the batch to succeed. +#[tokio::test] +async fn self_referential_batch_not_counted() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_b), + Filter::for_entity_by_entity_id(id_link), + ]), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect("batch purge with all entities should succeed with Error behavior"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 3, + draft_deletions: 0, + } + ); +} + +/// Draft-only deletion skips the incoming-link check entirely. +/// +/// The link check in `execute_entity_deletion` is gated behind `!full_target.web_ids.is_empty()`. +/// This is safe because draft-only deletion never touches `entity_ids`, so `entity_edge` FKs to +/// `entity_ids` remain intact. +/// +/// Setup: published entity B with a draft, and link L (A→B) with immutable `entity_edge` row. +/// Deleting B's draft with `include_drafts=true` and `Error` behavior produces only a +/// `DraftOnlyDeletionTarget` (the published version appears in `entities_with_remaining_data`, +/// preventing promotion). The full target is empty, so the link check is bypassed and deletion +/// succeeds. Published B and L's `entity_edge` survive. +#[tokio::test] +async fn draft_deletion_skips_link_check() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + // Create a draft of B + let patched_b = api + .store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id: id_b, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![], + draft: Some(true), + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not create draft of B"); + + let draft_entity_id = patched_b.metadata.record_id.entity_id; + assert!(draft_entity_id.draft_id.is_some()); + + // Create link L A→B (published) + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + // Delete B's draft with Error behavior — should succeed because full_target is empty + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(draft_entity_id), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect("draft-only deletion should skip link check and succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 1, + } + ); + + // Published B still exists + assert!(count_entity(&api, id_b, false).await >= 1); + + // Link L→B still valid + assert!(raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await > 0); +} + +/// Verifies [`DeletionError::IncomingLinksExist`] reports the correct count. +/// +/// Creates multiple distinct link entities pointing to the same target. `count_incoming_links` +/// returns `COUNT(*)` from `entity_edge` (as `i64` cast to `u64`). The `count` field in the error +/// must match the actual number of incoming `entity_edge` rows from sources outside the deletion +/// batch. +/// +/// **Note**: each link L→B produces one outgoing edge (source=L, target=B). The count reflects +/// `entity_edge` rows, not link entities. Since one link produces one outgoing row targeting B, +/// 3 links should produce count=3. +#[tokio::test] +async fn incoming_link_count_is_accurate() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a1 = create_person(&mut api, alice(), false).await; + let entity_a2 = create_person(&mut api, alice(), false).await; + let entity_a3 = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a1 = entity_a1.metadata.record_id.entity_id; + let id_a2 = entity_a2.metadata.record_id.entity_id; + let id_a3 = entity_a3.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let _l1 = create_link(&mut api, id_a1, id_b).await; + let _l2 = create_link(&mut api, id_a2, id_b).await; + let _l3 = create_link(&mut api, id_a3, id_b).await; + + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect_err("should fail with 3 incoming links"); + + assert_matches!( + err.current_context(), + DeletionError::IncomingLinksExist { count: 3 } + ); +} + +/// Handles a self-loop: entity A links to itself via link L. +/// +/// L has `entity_edge` rows with `source=L, target=A` (outgoing) and `source=A, target=L` +/// (incoming/reversed). Deleting A alone with `Error`: L is outside the batch, L's outgoing edge +/// targets A → `count_incoming_links` finds it → blocked. Deleting A + L together with `Error`: +/// L is in the batch, so L's edge is excluded from the count → succeeds. +#[tokio::test] +async fn self_loop_link() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_a).await; + let id_link = link.metadata.record_id.entity_id; + + // Purge A alone with Error → should fail (L targets A) + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_a), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect_err("purge A alone should fail with self-loop"); + + assert_matches!( + err.current_context(), + DeletionError::IncomingLinksExist { .. } + ); + + // Purge A+L together with Error → should succeed + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_link), + ]), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect("purge A+L together should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 2, + draft_deletions: 0, + } + ); +} + +/// Handles a chain: A → B → C (via link entities L1 and L2). +/// +/// L1 has `entity_edge` with `source=L1, target=B` (outgoing). L2 has `source=L2, target=C`. +/// Deleting B (middle) with `Error`: L1 is outside the batch, L1's edge to B counts → blocked. +/// Deleting B with `Ignore`: B is tombstoned, L1's `entity_edge` target → B's tombstone in +/// `entity_ids` (FK satisfied, dangling but valid reference). +#[tokio::test] +async fn chain_deletion() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let entity_c = create_person(&mut api, alice(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + let id_c = entity_c.metadata.record_id.entity_id; + + let l1 = create_link(&mut api, id_a, id_b).await; + let _l2 = create_link(&mut api, id_b, id_c).await; + let id_l1 = l1.metadata.record_id.entity_id; + + // Purge B alone with Error → fails (L1 points to B) + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect_err("purge B with Error should fail due to L1"); + + assert_matches!( + err.current_context(), + DeletionError::IncomingLinksExist { .. } + ); + + // Purge B alone with Ignore → succeeds + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("purge B with Ignore should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + // L1's entity_edge rows still intact (pointing to tombstoned B) + assert!(raw_count_entity_edge(&api, id_l1.web_id, id_l1.entity_uuid).await > 0); +} + +/// Handles bidirectional links: A → B and B → A (two separate link entities). +/// +/// L1 has `entity_edge` with `source=L1, target=B`. L2 has `source=L2, target=A`. Deleting A alone +/// with `Error`: L2 is outside the batch, L2's edge to A counts → blocked. Deleting A + B + L1 + L2 +/// together with `Error`: all sources (L1, L2) are in the batch → `source NOT IN (batch)` excludes +/// both → count is 0 → succeeds. +#[tokio::test] +async fn bidirectional_links() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let l1 = create_link(&mut api, id_a, id_b).await; + let l2 = create_link(&mut api, id_b, id_a).await; + let id_l1 = l1.metadata.record_id.entity_id; + let id_l2 = l2.metadata.record_id.entity_id; + + // Purge A alone with Error → fails (L2 targets A) + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_a), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect_err("purge A alone should fail with bidirectional links"); + + assert_matches!( + err.current_context(), + DeletionError::IncomingLinksExist { .. } + ); + + // Purge A+B+L1+L2 together with Error → succeeds + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_b), + Filter::for_entity_by_entity_id(id_l1), + Filter::for_entity_by_entity_id(id_l2), + ]), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect("batch purge of all bidirectional entities should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 4, + draft_deletions: 0, + } + ); +} + +/// Erasing A+B+L together succeeds — in-batch link sources are excluded from the count. +/// +/// Same setup as [`self_referential_batch_not_counted`] but with [`DeletionScope::Erase`] instead +/// of Purge. Erase always runs `count_incoming_links`; the batch-exclusion logic (`source NOT IN +/// batch`) must work here too. After erase, all three `entity_ids` rows are deleted (not +/// tombstoned) and no FK violation occurs because `delete_entity_edge` removes all edge rows +/// before `delete_entity_ids`. +#[tokio::test] +async fn erase_batch_excludes_in_batch_links() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_b), + Filter::for_entity_by_entity_id(id_link), + ]), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase batch with all entities should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 3, + draft_deletions: 0, + } + ); + + // All entity_ids rows erased (not tombstoned) + assert!(!raw_entity_ids_exists(&api, id_a.web_id, id_a.entity_uuid).await); + assert!(!raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); + assert!(!raw_entity_ids_exists(&api, id_link.web_id, id_link.entity_uuid).await); + + // All edge rows cleaned up + assert_eq!( + raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await, + 0 + ); +} + +/// Erasing a link entity alone succeeds — denormalized edges are not real incoming links. +/// +/// `entity_edge` stores `direction = 'incoming'` rows (source=endpoint, target=L) as denormalized +/// copies for query optimization. `count_incoming_links` only counts `direction = 'outgoing'` edges +/// (real link relationships from other link entities). Since no other link entity points TO L, +/// the count is 0 and erase proceeds. `delete_entity_edge` cleans up all 4 rows (both outgoing +/// and incoming-direction) before `delete_entity_ids` removes L's row. +#[tokio::test] +async fn erase_link_entity_alone_succeeds() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + // Verify 4 edges exist before erase + assert_eq!( + raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await, + 4 + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_link), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase link entity alone should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + // All 4 entity_edge rows cleaned up + assert_eq!( + raw_count_entity_edge_any(&api, id_link.web_id, id_link.entity_uuid).await, + 0 + ); + + // Link entity_ids row erased (not tombstoned) + assert!(!raw_entity_ids_exists(&api, id_link.web_id, id_link.entity_uuid).await); + + // Endpoints unaffected + assert!(raw_entity_ids_exists(&api, id_a.web_id, id_a.entity_uuid).await); + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); +} diff --git a/libs/@local/graph/postgres-store/tests/deletion/main.rs b/libs/@local/graph/postgres-store/tests/deletion/main.rs new file mode 100644 index 00000000000..ed6b6780613 --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/deletion/main.rs @@ -0,0 +1,474 @@ +#![feature(assert_matches)] +#![expect(clippy::missing_panics_doc, clippy::missing_errors_doc)] + +#[path = "../common/mod.rs"] +mod common; + +mod drafts; +mod erase; +mod links; +mod purge; +mod validation; + +use std::collections::{HashMap, HashSet}; + +use error_stack::{Report, ResultExt as _}; +use hash_graph_authorization::policies::store::{PolicyStore as _, PrincipalStore as _}; +use hash_graph_postgres_store::store::{AsClient as _, PostgresStore}; +use hash_graph_store::{ + account::{AccountStore as _, CreateUserActorParams}, + data_type::{CreateDataTypeParams, DataTypeStore as _}, + entity::{CountEntitiesParams, CreateEntityParams, EntityStore as _}, + entity_type::{CreateEntityTypeParams, EntityTypeStore as _}, + error::InsertionError, + filter::Filter, + property_type::{CreatePropertyTypeParams, PropertyTypeStore as _}, + query::ConflictBehavior, + subgraph::temporal_axes::{ + PinnedTemporalAxisUnresolved, QueryTemporalAxesUnresolved, VariableTemporalAxisUnresolved, + }, +}; +use hash_graph_temporal_versioning::TemporalBound; +use hash_graph_test_data::{data_type, entity, entity_type, property_type}; +use tokio_postgres::Transaction; +use type_system::{ + knowledge::{ + Entity, + entity::{ + EntityId, LinkData, + id::{DraftId, EntityUuid}, + provenance::{ + EntityDeletionProvenance, InferredEntityProvenance, ProvidedEntityEditionProvenance, + }, + }, + property::{PropertyObject, PropertyObjectWithMetadata, metadata::PropertyProvenance}, + }, + ontology::{ + data_type::DataType, + entity_type::EntityType, + id::{BaseUrl, OntologyTypeVersion, VersionedUrl}, + property_type::PropertyType, + provenance::{OntologyOwnership, ProvidedOntologyEditionProvenance}, + }, + principal::{ + actor::{ActorEntityUuid, ActorType}, + actor_group::WebId, + }, + provenance::{OriginProvenance, OriginType}, +}; + +pub use crate::common::DatabaseTestWrapper; + +pub struct DatabaseApi<'pool> { + pub store: PostgresStore>, + pub account_id: ActorEntityUuid, +} + +impl DatabaseTestWrapper { + pub async fn seed( + &mut self, + data_types: D, + property_types: P, + entity_types: E, + ) -> Result, Report> + where + D: IntoIterator + Send, + P: IntoIterator + Send, + E: IntoIterator + Send, + { + let mut store = self + .connection + .transaction() + .await + .expect("could not start test transaction"); + + store + .seed_system_policies() + .await + .expect("could not seed system policies"); + + let system_account_id = store + .get_or_create_system_machine("h") + .await + .change_context(InsertionError)?; + let user_id = store + .create_user_actor( + system_account_id.into(), + CreateUserActorParams { + user_id: None, + shortname: Some("test-user".to_owned()), + registration_complete: true, + }, + ) + .await + .change_context(InsertionError)? + .user_id; + + let ontology_provenance = ProvidedOntologyEditionProvenance { + actor_type: ActorType::User, + origin: OriginProvenance::from_empty_type(OriginType::Api), + sources: Vec::new(), + }; + + store + .create_data_types( + user_id.into(), + data_types.into_iter().map(|json| CreateDataTypeParams { + schema: serde_json::from_str::(json) + .expect("could not parse data type"), + ownership: OntologyOwnership::Local { + web_id: user_id.into(), + }, + conflict_behavior: ConflictBehavior::Skip, + provenance: ontology_provenance.clone(), + conversions: HashMap::new(), + }), + ) + .await?; + + store + .create_property_types( + user_id.into(), + property_types + .into_iter() + .map(|json| CreatePropertyTypeParams { + schema: serde_json::from_str::(json) + .expect("could not parse property type"), + ownership: OntologyOwnership::Local { + web_id: user_id.into(), + }, + conflict_behavior: ConflictBehavior::Skip, + provenance: ontology_provenance.clone(), + }), + ) + .await?; + + store + .create_entity_types( + user_id.into(), + entity_types.into_iter().map(|json| CreateEntityTypeParams { + schema: serde_json::from_str::(json) + .expect("could not parse entity type"), + ownership: OntologyOwnership::Local { + web_id: user_id.into(), + }, + conflict_behavior: ConflictBehavior::Skip, + provenance: ontology_provenance.clone(), + }), + ) + .await?; + + Ok(DatabaseApi { + store, + account_id: user_id.into(), + }) + } +} + +/// Counts rows in `table` for the given `(web_id, entity_uuid)` pair via raw SQL. +pub(crate) async fn raw_count( + api: &DatabaseApi<'_>, + table: &str, + web_id: WebId, + entity_uuid: EntityUuid, +) -> i64 { + api.store + .as_client() + .query_one( + &format!("SELECT COUNT(*) FROM {table} WHERE web_id = $1 AND entity_uuid = $2"), + &[&web_id, &entity_uuid], + ) + .await + .expect("raw count query failed") + .get(0) +} + +/// Returns the [`EntityDeletionProvenance`] from the `entity_ids` provenance column, or `None` +/// if the row is missing or the entity has not been deleted. +/// +/// Deserializes the full [`InferredEntityProvenance`] to validate the JSONB structure, then +/// extracts only the `deletion` field. +pub(crate) async fn get_deletion_provenance( + api: &DatabaseApi<'_>, + web_id: WebId, + entity_uuid: EntityUuid, +) -> Option { + api.store + .as_client() + .query_opt( + "SELECT provenance FROM entity_ids WHERE web_id = $1 AND entity_uuid = $2", + &[&web_id, &entity_uuid], + ) + .await + .expect("provenance query failed") + .and_then(|row| { + let prov: InferredEntityProvenance = row.get(0); + prov.deletion + }) +} + +/// Returns the full [`InferredEntityProvenance`] from `entity_ids`, or `None` if the row is +/// missing. +pub(crate) async fn get_inferred_provenance( + api: &DatabaseApi<'_>, + web_id: WebId, + entity_uuid: EntityUuid, +) -> Option { + api.store + .as_client() + .query_opt( + "SELECT provenance FROM entity_ids WHERE web_id = $1 AND entity_uuid = $2", + &[&web_id, &entity_uuid], + ) + .await + .expect("provenance query failed") + .map(|row| row.get(0)) +} + +// --------------------------------------------------------------------------- +// Shared test helpers — used by purge, erase, links, drafts, and validation +// --------------------------------------------------------------------------- + +pub(crate) fn person_type_id() -> VersionedUrl { + VersionedUrl { + base_url: BaseUrl::new( + "https://blockprotocol.org/@alice/types/entity-type/person/".to_owned(), + ) + .expect("couldn't construct Base URL"), + version: OntologyTypeVersion { + major: 1, + pre_release: None, + }, + } +} + +pub(crate) fn friend_of_type_id() -> VersionedUrl { + VersionedUrl { + base_url: BaseUrl::new( + "https://blockprotocol.org/@alice/types/entity-type/friend-of/".to_owned(), + ) + .expect("couldn't construct Base URL"), + version: OntologyTypeVersion { + major: 1, + pre_release: None, + }, + } +} + +pub(crate) fn alice() -> PropertyObject { + serde_json::from_str(entity::PERSON_ALICE_V1).expect("could not parse entity") +} + +pub(crate) fn bob() -> PropertyObject { + serde_json::from_str(entity::PERSON_BOB_V1).expect("could not parse entity") +} + +pub(crate) const fn provenance() -> ProvidedEntityEditionProvenance { + ProvidedEntityEditionProvenance { + actor_type: ActorType::User, + origin: OriginProvenance::from_empty_type(OriginType::Api), + sources: Vec::new(), + } +} + +pub(crate) async fn seed(database: &mut DatabaseTestWrapper) -> DatabaseApi<'_> { + database + .seed( + [ + data_type::VALUE_V1, + data_type::TEXT_V1, + data_type::NUMBER_V1, + ], + [ + property_type::NAME_V1, + property_type::AGE_V1, + property_type::FAVORITE_SONG_V1, + property_type::FAVORITE_FILM_V1, + property_type::HOBBY_V1, + property_type::INTERESTS_V1, + ], + [ + entity_type::LINK_V1, + entity_type::link::FRIEND_OF_V1, + entity_type::link::ACQUAINTANCE_OF_V1, + entity_type::PERSON_V1, + ], + ) + .await + .expect("could not seed database") +} + +pub(crate) async fn count_entity( + api: &DatabaseApi<'_>, + entity_id: EntityId, + include_drafts: bool, +) -> usize { + api.store + .count_entities( + api.account_id, + CountEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + temporal_axes: QueryTemporalAxesUnresolved::DecisionTime { + pinned: PinnedTemporalAxisUnresolved::new(None), + variable: VariableTemporalAxisUnresolved::new( + Some(TemporalBound::Unbounded), + None, + ), + }, + include_drafts, + }, + ) + .await + .expect("could not count entities") +} + +pub(crate) async fn create_person( + api: &mut DatabaseApi<'_>, + properties: PropertyObject, + draft: bool, +) -> Entity { + api.store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: WebId::new(api.account_id), + entity_uuid: None, + decision_time: None, + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(properties, None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create entity") +} + +pub(crate) async fn create_link( + api: &mut DatabaseApi<'_>, + source: EntityId, + target: EntityId, +) -> Entity { + api.store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: WebId::new(api.account_id), + entity_uuid: None, + decision_time: None, + entity_type_ids: HashSet::from([friend_of_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(PropertyObject::empty(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: Some(LinkData { + left_entity_id: source, + right_entity_id: target, + left_entity_confidence: None, + left_entity_provenance: PropertyProvenance::default(), + right_entity_confidence: None, + right_entity_provenance: PropertyProvenance::default(), + }), + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create link entity") +} + +pub(crate) async fn create_second_user(api: &mut DatabaseApi<'_>) -> ActorEntityUuid { + let system_account_id = api + .store + .get_or_create_system_machine("h") + .await + .expect("could not get system machine"); + let response = api + .store + .create_user_actor( + system_account_id.into(), + CreateUserActorParams { + user_id: None, + shortname: Some("test-user-2".to_owned()), + registration_complete: true, + }, + ) + .await + .expect("could not create second user"); + response.user_id.into() +} + +pub(crate) async fn raw_count_by_draft_id( + api: &DatabaseApi<'_>, + table: &str, + draft_id: DraftId, +) -> i64 { + api.store + .as_client() + .query_one( + &format!("SELECT COUNT(*) FROM {table} WHERE draft_id = $1"), + &[&draft_id], + ) + .await + .expect("raw count by draft_id query failed") + .get(0) +} + +/// Counts `entity_edge` rows where the entity is the **source**. +pub(crate) async fn raw_count_entity_edge( + api: &DatabaseApi<'_>, + source_web_id: WebId, + source_entity_uuid: EntityUuid, +) -> i64 { + api.store + .as_client() + .query_one( + "SELECT COUNT(*) FROM entity_edge WHERE source_web_id = $1 AND source_entity_uuid = $2", + &[&source_web_id, &source_entity_uuid], + ) + .await + .expect("raw count entity_edge query failed") + .get(0) +} + +/// Counts ALL `entity_edge` rows where the entity appears as **either** source or target. +/// +/// Useful for link entity tests: `entity_edge` stores bidirectional rows (outgoing + +/// incoming/reversed), so a link entity L between A and B has 4 rows total (2 as source, 2 as +/// target). +pub(crate) async fn raw_count_entity_edge_any( + api: &DatabaseApi<'_>, + web_id: WebId, + entity_uuid: EntityUuid, +) -> i64 { + api.store + .as_client() + .query_one( + "SELECT COUNT(*) FROM entity_edge WHERE (source_web_id = $1 AND source_entity_uuid = \ + $2) OR (target_web_id = $1 AND target_entity_uuid = $2)", + &[&web_id, &entity_uuid], + ) + .await + .expect("raw count entity_edge (any) query failed") + .get(0) +} + +pub(crate) async fn raw_entity_ids_exists( + api: &DatabaseApi<'_>, + web_id: WebId, + entity_uuid: EntityUuid, +) -> bool { + api.store + .as_client() + .query_one( + "SELECT EXISTS(SELECT 1 FROM entity_ids WHERE web_id = $1 AND entity_uuid = $2)", + &[&web_id, &entity_uuid], + ) + .await + .expect("raw entity_ids exists query failed") + .get(0) +} diff --git a/libs/@local/graph/postgres-store/tests/deletion/purge.rs b/libs/@local/graph/postgres-store/tests/deletion/purge.rs new file mode 100644 index 00000000000..f45d3533fde --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/deletion/purge.rs @@ -0,0 +1,1104 @@ +use std::collections::HashSet; + +use hash_graph_store::{ + entity::{ + CreateEntityParams, DeleteEntitiesParams, DeletionScope, DeletionSummary, EntityStore as _, + LinkDeletionBehavior, PatchEntityParams, UpdateEntityEmbeddingsParams, + }, + filter::Filter, +}; +use hash_graph_temporal_versioning::{TemporalTagged as _, Timestamp, TransactionTime}; +use hash_graph_types::{Embedding, knowledge::entity::EntityEmbedding}; +use type_system::{ + knowledge::{ + entity::{EntityId, id::EntityUuid}, + property::{ + Property, PropertyObjectWithMetadata, PropertyPatchOperation, PropertyPath, + PropertyWithMetadata, + }, + }, + principal::actor_group::WebId, +}; +use uuid::Uuid; + +use crate::{ + DatabaseTestWrapper, alice, bob, count_entity, create_link, create_person, create_second_user, + get_deletion_provenance, get_inferred_provenance, person_type_id, provenance, raw_count, + raw_entity_ids_exists, seed, +}; + +/// Helper: purge with default settings (`include_drafts=false`, Ignore link behavior). +const fn purge_params( + filter: Filter<'static, type_system::knowledge::Entity>, +) -> DeleteEntitiesParams<'static> { + DeleteEntitiesParams { + filter, + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + } +} + +/// Purges a single published entity with no history. +/// +/// Creates one published entity, purges it with `Ignore` link behavior, and verifies +/// `count_entities` drops from 1 to 0. The filter uses `Filter::for_entity_by_entity_id` with +/// `draft_id: None`, which matches on `(web_id, entity_uuid)` only — the `draft_id IS NULL` +/// constraint comes from `include_drafts: false` via [`SelectCompiler`], not from the filter +/// itself. +#[tokio::test] +async fn published_entity() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = api + .store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: WebId::new(api.account_id), + entity_uuid: None, + decision_time: None, + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(alice(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create entity"); + + let entity_id = entity.metadata.record_id.entity_id; + assert_eq!(count_entity(&api, entity_id, false).await, 1); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("could not delete entity"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + assert_eq!(count_entity(&api, entity_id, false).await, 0); +} + +/// Purges an entity that was updated, producing 2 temporal editions. +/// +/// Verifies ALL editions are removed, not just the latest. The select phase uses a point query on +/// `transaction_time` (now) pinned at `decision_time`, which finds only the current version. But +/// `collect_entity_edition_ids` then queries `entity_temporal_metadata` without temporal +/// restriction, capturing all historical edition IDs for the DELETE operations. +#[tokio::test] +async fn published_entity_with_history() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = api + .store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: WebId::new(api.account_id), + entity_uuid: None, + decision_time: None, + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(alice(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create entity"); + + let entity_id = entity.metadata.record_id.entity_id; + + api.store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + properties: vec![PropertyPatchOperation::Replace { + path: PropertyPath::default(), + property: PropertyWithMetadata::from_parts(Property::Object(bob()), None) + .expect("could not create property with metadata"), + }], + entity_type_ids: HashSet::new(), + archived: None, + draft: None, + decision_time: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not update entity"); + + // Unbounded temporal query shows both editions + assert_eq!(count_entity(&api, entity_id, false).await, 2); + + let web_id = entity_id.web_id; + let entity_uuid = entity_id.entity_uuid; + assert!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await >= 2, + "entity must have >= 2 temporal metadata rows before purge" + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("could not delete entity"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + // All history gone — both via read path and raw table count + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert_eq!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await, + 0, + "all temporal metadata rows must be deleted, not just the current edition" + ); +} + +/// Succeeds silently when the filter matches nothing. +/// +/// The [`SelectCompiler`] queries `entity_temporal_metadata`, not `entity_ids`. A nonexistent +/// entity has no temporal rows, so both full and draft targets are empty and +/// `execute_entity_deletion` returns `Ok(())` immediately. +#[tokio::test] +async fn no_match_is_noop() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let nonexistent_id = EntityId { + web_id: WebId::new(api.account_id), + entity_uuid: EntityUuid::new(Uuid::new_v4()), + draft_id: None, + }; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(nonexistent_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("deletion of nonexistent entity should not fail"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 0, + } + ); +} + +/// `include_drafts` has no effect on published-only entities. +/// +/// A published entity has `draft_id IS NULL` in all temporal rows. With `include_drafts: true`, +/// [`SelectCompiler`] does NOT add `draft_id IS NULL` — but since all rows already satisfy that +/// condition, the result is identical to `include_drafts: false`. Both calls must produce the same +/// `DeletionSummary` and leave the entity equally purged. +#[tokio::test] +async fn include_drafts_irrelevant_for_published() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let summary_a = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_a), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("could not delete entity A"); + + let summary_b = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(id_b), + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("could not delete entity B"); + + let expected = DeletionSummary { + full_entities: 1, + draft_deletions: 0, + }; + assert_eq!(summary_a, expected); + assert_eq!(summary_b, expected); + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert_eq!(count_entity(&api, id_b, false).await, 0); +} + +/// `Purge` with [`LinkDeletionBehavior::Error`] succeeds when no incoming links exist. +/// +/// All other purge tests use `Ignore` link behavior, skipping `count_incoming_links` entirely. +/// This test exercises the `Error` path: `count_incoming_links` runs, finds 0 incoming edges from +/// sources outside the batch, and deletion proceeds normally. Confirms the happy path through the +/// link check doesn't spuriously block deletion. +#[tokio::test] +async fn purge_error_succeeds_without_incoming_links() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Error, + }, + decision_time: None, + }, + ) + .await + .expect("purge with Error behavior should succeed when no incoming links exist"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert!(raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); +} + +/// Verifies the tombstone carries correct deletion provenance. +/// +/// After purge, the `entity_ids` row persists with `provenance->'deletion'` containing +/// `deletedById` (matching the acting actor), `deletedAtTransactionTime`, and +/// `deletedAtDecisionTime`. The provenance is merged into the existing JSONB via +/// `update_entity_ids_provenance` using PostgreSQL's `||` operator. Verified via raw SQL. +#[tokio::test] +async fn tombstone_has_deletion_provenance() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not delete entity"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert!(raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); + + let deletion = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("deletion provenance should exist"); + + assert_eq!(deletion.deleted_by_id, api.account_id); +} + +/// Verifies all satellite tables are cleaned after purge. +/// +/// After purge, `entity_temporal_metadata`, `entity_editions`, `entity_is_of_type`, +/// `entity_embeddings`, and `entity_drafts` must have zero rows for the entity. +/// `delete_target_data` deletes these in FK-safe order: edition IDs are collected first, then +/// `entity_is_of_type` and `entity_embeddings` (no children), then `entity_temporal_metadata` (FK +/// to editions and drafts), then `entity_editions`, and finally `entity_drafts`. Verified via raw +/// SQL `COUNT(*)` per table. +#[tokio::test] +async fn satellite_tables_cleaned() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not delete entity"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + let web_id = entity_id.web_id; + let entity_uuid = entity_id.entity_uuid; + assert_eq!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await, + 0 + ); + assert_eq!( + raw_count(&api, "entity_embeddings", web_id, entity_uuid).await, + 0 + ); + assert_eq!( + raw_count(&api, "entity_drafts", web_id, entity_uuid).await, + 0 + ); + // entity_editions and entity_is_of_type have no (web_id, entity_uuid) columns — verified + // implicitly: edition IDs are collected from temporal metadata before deletion, so if + // temporal metadata is empty and the deletion succeeded without FK violations, both must be + // clean. + + // entity_ids survives as tombstone + assert!(raw_entity_ids_exists(&api, web_id, entity_uuid).await); +} + +/// Purging the same entity twice succeeds silently on the second call. +/// +/// After the first purge, `entity_temporal_metadata` is gone. Since `select_entities_for_deletion` +/// queries temporal metadata (not `entity_ids`), the second call finds no matching rows, produces +/// empty targets, and returns `Ok(())`. The surviving tombstone in `entity_ids` does not interfere. +/// Provenance must not be double-stamped. +#[tokio::test] +async fn double_deletion_is_noop() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary1 = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("first purge failed"); + + assert_eq!( + summary1, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + let prov_after_first = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("provenance should exist after first purge"); + + let summary2 = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("second purge should not fail"); + + assert_eq!( + summary2, + DeletionSummary { + full_entities: 0, + draft_deletions: 0, + } + ); + + let prov_after_second = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("provenance should still exist after second purge"); + + assert_eq!( + prov_after_first, prov_after_second, + "provenance must not be double-stamped" + ); +} + +/// Purges multiple entities in a single call. +/// +/// Uses a filter matching 2+ entities. Exercises `UNNEST` arrays with multiple +/// elements. Both entities must be fully deleted (count = 0 for each). +#[tokio::test] +async fn multiple_entities_in_batch() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let filter = Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_b), + ]); + + let summary = api + .store + .delete_entities(api.account_id, purge_params(filter)) + .await + .expect("could not delete entities"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 2, + draft_deletions: 0, + } + ); + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert_eq!(count_entity(&api, id_b, false).await, 0); + + // Both tombstoned + assert!(raw_entity_ids_exists(&api, id_a.web_id, id_a.entity_uuid).await); + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); +} + +/// Purging one entity must not affect another entity's data. +/// +/// After purging A, entity B's satellite table rows, `entity_ids`, and temporal +/// data must be completely intact. Guards against `UNNEST` over-matching. +#[tokio::test] +async fn other_entity_unaffected() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(id_a)), + ) + .await + .expect("could not delete entity A"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert_eq!(count_entity(&api, id_b, false).await, 1); + + // B's satellite data intact + let web_b = id_b.web_id; + let uuid_b = id_b.entity_uuid; + assert!(raw_count(&api, "entity_temporal_metadata", web_b, uuid_b).await > 0); + assert!(raw_entity_ids_exists(&api, web_b, uuid_b).await); +} + +/// Purges a batch containing entities with different states in one call. +/// +/// Batch includes: a plain entity, a link entity (has immutable `entity_edge` rows), and a +/// draft-only entity. Uses `Ignore` link behavior and `include_drafts: true`. The draft-only entity +/// gets promoted to a full target by `promote_draft_only_entities` (all drafts matched, no +/// published version). Exercises the full partition → `delete_target_data` (both full and draft +/// branches) → `delete_entity_edge` → `update_entity_ids_provenance` pipeline. +#[tokio::test] +async fn batch_with_mixed_entity_states() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + let draft = create_person(&mut api, alice(), true).await; + let id_draft = draft.metadata.record_id.entity_id; + + let filter = Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_b), + Filter::for_entity_by_entity_id(id_link), + Filter::for_entity_by_entity_id(id_draft), + ]); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter, + include_drafts: true, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("could not delete mixed batch"); + + // A, B, L are full targets; D is promoted to full (all drafts matched, no published version) + assert_eq!( + summary, + DeletionSummary { + full_entities: 4, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert_eq!(count_entity(&api, id_b, false).await, 0); + assert_eq!(count_entity(&api, id_link, false).await, 0); + assert_eq!(count_entity(&api, id_draft, true).await, 0); + + // All tombstoned + assert!(raw_entity_ids_exists(&api, id_a.web_id, id_a.entity_uuid).await); + assert!(raw_entity_ids_exists(&api, id_draft.web_id, id_draft.entity_uuid).await); +} + +/// Purges entities owned by different [`WebId`]s in a single batch. +/// +/// All per-table DELETE operations use `UNNEST($1::UUID[], $2::UUID[])` which pairs `web_ids[i]` +/// with `entity_uuids[i]` positionally. If the parallel vecs in `FullEntityDeletionTarget` get +/// misaligned, wrong entities are deleted. This test creates a second user/web via `seed()` setup, +/// creates one entity per web, purges both, and verifies correct tombstoning without +/// cross-contamination. +#[tokio::test] +async fn cross_web_batch() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + + let second_user = create_second_user(&mut api).await; + let entity_b = api + .store + .create_entity( + second_user, + CreateEntityParams { + web_id: WebId::new(second_user), + entity_uuid: None, + decision_time: None, + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(bob(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create entity B"); + + let id_b = entity_b.metadata.record_id.entity_id; + assert_ne!( + id_a.web_id, id_b.web_id, + "entities must be in different webs" + ); + + let filter = Filter::Any(vec![ + Filter::for_entity_by_entity_id(id_a), + Filter::for_entity_by_entity_id(id_b), + ]); + + let summary = api + .store + .delete_entities(api.account_id, purge_params(filter)) + .await + .expect("could not delete cross-web batch"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 2, + draft_deletions: 0, + } + ); + + // Both tombstoned + assert!(raw_entity_ids_exists(&api, id_a.web_id, id_a.entity_uuid).await); + assert!(raw_entity_ids_exists(&api, id_b.web_id, id_b.entity_uuid).await); + + // Satellite data gone for both (read path AND raw table) + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert_eq!(count_entity(&api, id_b, false).await, 0); + assert_eq!( + raw_count( + &api, + "entity_temporal_metadata", + id_a.web_id, + id_a.entity_uuid + ) + .await, + 0, + "entity A temporal metadata must be cleaned in its web" + ); + assert_eq!( + raw_count( + &api, + "entity_temporal_metadata", + id_b.web_id, + id_b.entity_uuid + ) + .await, + 0, + "entity B temporal metadata must be cleaned in its web" + ); +} + +/// Querying a purged entity returns an empty result, not an error. +/// +/// After purge, `entity_ids` has a tombstone but `entity_temporal_metadata` and `entity_editions` +/// are gone. The read path uses a recursive CTE that expects non-null `entity_edition_id` and type +/// resolution JOINs on `entity_editions`. This test validates that those JOINs produce zero rows +/// (not errors) when the tombstone exists but satellite data does not. Both `query_entities` and +/// `get_entity_by_id` must handle this gracefully. +#[tokio::test] +async fn query_after_purge_returns_empty() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + api.store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not delete entity"); + + // count_entity exercises the full read path (SelectCompiler → SQL) + // If the tombstone caused a read error, this would panic instead of returning 0 + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert_eq!(count_entity(&api, entity_id, true).await, 0); +} + +/// Verifies provenance records the deleting actor, not the creating actor. +/// +/// Actor A creates the entity, Actor B deletes it. `update_entity_ids_provenance` receives the +/// `actor_id` from the `delete_entities` caller and stores it as `deleted_by_id` in +/// [`EntityDeletionProvenance`]. The tombstone's `provenance->'deletion'->'deletedById'` must be +/// Actor B's ID, not A's. +#[tokio::test] +async fn different_actor_deleting() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Entity created by api.account_id (actor A) + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Delete as actor B + let actor_b = create_second_user(&mut api).await; + let summary = api + .store + .delete_entities( + actor_b, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not delete entity as actor B"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + let deletion = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("deletion provenance should exist"); + + assert_eq!(deletion.deleted_by_id, actor_b); + assert_ne!(deletion.deleted_by_id, api.account_id); +} + +/// Purges an entity that has embeddings in `entity_embeddings`. +/// +/// No other deletion test creates embeddings. This verifies `delete_entity_embeddings` actually +/// deletes real rows. For full targets the DELETE uses `(web_id, entity_uuid) IN (UNNEST(...))`, +/// removing all embeddings regardless of `draft_id`. +#[tokio::test] +async fn entity_with_embeddings() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Add embeddings + let now_tt = Timestamp::::now(); + api.store + .update_entity_embeddings( + api.account_id, + UpdateEntityEmbeddingsParams { + entity_id, + embeddings: vec![EntityEmbedding { + property: None, + embedding: Embedding::from(vec![0.0_f32; Embedding::DIM]), + }], + updated_at_transaction_time: now_tt, + updated_at_decision_time: now_tt.cast(), + reset: true, + }, + ) + .await + .expect("could not add embeddings"); + + // Verify embeddings exist before deletion + let web_id = entity_id.web_id; + let entity_uuid = entity_id.entity_uuid; + assert!( + raw_count(&api, "entity_embeddings", web_id, entity_uuid).await > 0, + "embeddings should exist before purge" + ); + + let summary = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not delete entity with embeddings"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!( + raw_count(&api, "entity_embeddings", web_id, entity_uuid).await, + 0, + "embeddings should be cleaned up after purge" + ); +} + +/// Stress test with 50+ entities in a single batch. +/// +/// Exercises PostgreSQL `UNNEST` array performance with large parameters. +/// All entities must be fully deleted. +#[tokio::test] +async fn large_batch() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let mut ids = Vec::new(); + for _ in 0..50 { + let entity = create_person(&mut api, alice(), false).await; + ids.push(entity.metadata.record_id.entity_id); + } + + let filter = Filter::Any( + ids.iter() + .map(|id| Filter::for_entity_by_entity_id(*id)) + .collect(), + ); + + let summary = api + .store + .delete_entities(api.account_id, purge_params(filter)) + .await + .expect("could not delete large batch"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 50, + draft_deletions: 0, + } + ); + + for id in &ids { + assert_eq!(count_entity(&api, *id, false).await, 0); + } +} + +/// Purges entities matching a type-based filter instead of entity ID. +/// +/// All other deletion tests use `Filter::for_entity_by_entity_id`. This test uses a filter on +/// entity type, exercising a different [`SelectCompiler`] code path in +/// `select_entities_for_deletion`. The select phase must correctly partition type-matched entities +/// into `FullEntityDeletionTarget` entries. +#[tokio::test] +async fn filter_by_entity_type() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity_a = create_person(&mut api, alice(), false).await; + let entity_b = create_person(&mut api, bob(), false).await; + let id_a = entity_a.metadata.record_id.entity_id; + let id_b = entity_b.metadata.record_id.entity_id; + + // Create a link entity (friend-of type) as the "other type" + let link = create_link(&mut api, id_a, id_b).await; + let id_link = link.metadata.record_id.entity_id; + + // Delete by person type (not entity ID) — should match A and B but not the link + let person_type = person_type_id(); + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_type_id(&person_type), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("could not delete by entity type"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 2, + draft_deletions: 0, + } + ); + + // Both persons deleted + assert_eq!(count_entity(&api, id_a, false).await, 0); + assert_eq!(count_entity(&api, id_b, false).await, 0); + + // Link entity survives (different type) + assert!(count_entity(&api, id_link, false).await >= 1); +} + +/// Purges an entity that was previously archived. +/// +/// Archived entities have an upper temporal bound set. `select_entities_for_deletion` uses +/// `QueryTemporalAxes::TransactionTime` with pinned `decision_time` and a point query on +/// `transaction_time` at now. This test verifies that archived entities (closed temporal bounds) +/// are still found by this query window and correctly purged. +#[tokio::test] +async fn archived_entity() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Archive the entity + api.store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + properties: vec![], + entity_type_ids: HashSet::new(), + archived: Some(true), + draft: None, + decision_time: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not archive entity"); + + let summary = api + .store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not purge archived entity"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + assert_eq!(count_entity(&api, entity_id, false).await, 0); + assert!(raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); +} + +/// Verifies the JSONB `||` merge in `update_entity_ids_provenance` preserves existing provenance +/// keys. +/// +/// `entity_ids.provenance` already contains `createdById`, `createdAtTransactionTime`, +/// `createdAtDecisionTime`, and potentially `firstNonDraftCreatedAt*` from entity creation. The `|| +/// jsonb_build_object('deletion', ...)` merge adds a new top-level `deletion` key. PostgreSQL's +/// `||` on JSONB objects merges top-level keys (overwrites on collision, preserves non-colliding +/// ones). Since `deletion` is a new key, all existing keys must survive intact. +#[tokio::test] +async fn provenance_merge_preserves_existing_keys() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + api.store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("could not delete entity"); + + let prov = get_inferred_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("provenance should exist"); + + // Deserialization as InferredEntityProvenance guarantees creation keys survived the merge + assert_eq!(prov.created_by_id, api.account_id); + + // Deletion key must be added + assert!(prov.deletion.is_some(), "deletion key must be present"); +} + +/// Attempting to erase a previously purged entity is a no-op. +/// +/// After purge, `entity_ids` exists as a tombstone but `entity_temporal_metadata` is gone. +/// `select_entities_for_deletion` queries `entity_temporal_metadata` via [`SelectCompiler`], +/// finds no rows, and produces empty targets. Erase returns successfully with zero counts. +/// The tombstone in `entity_ids` is unreachable through the deletion API — it persists +/// permanently (or until direct SQL cleanup). +#[tokio::test] +async fn erase_after_purge_is_noop() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + api.store + .delete_entities( + api.account_id, + purge_params(Filter::for_entity_by_entity_id(entity_id)), + ) + .await + .expect("purge failed"); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Erase, + decision_time: None, + }, + ) + .await + .expect("erase after purge should not fail"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 0, + } + ); + + // Tombstone survives — unreachable through deletion API + assert!(raw_entity_ids_exists(&api, entity_id.web_id, entity_id.entity_uuid).await); +} diff --git a/libs/@local/graph/postgres-store/tests/deletion/validation.rs b/libs/@local/graph/postgres-store/tests/deletion/validation.rs new file mode 100644 index 00000000000..fccd077cd9b --- /dev/null +++ b/libs/@local/graph/postgres-store/tests/deletion/validation.rs @@ -0,0 +1,347 @@ +use core::assert_matches; +use std::collections::HashSet; + +use hash_graph_store::{ + entity::{ + CreateEntityParams, DeleteEntitiesParams, DeletionScope, DeletionSummary, EntityStore as _, + LinkDeletionBehavior, PatchEntityParams, + }, + error::DeletionError, + filter::Filter, +}; +use hash_graph_temporal_versioning::{ + DecisionTime, TemporalTagged as _, Timestamp, TransactionTime, +}; +use type_system::knowledge::property::{ + Property, PropertyObjectWithMetadata, PropertyPatchOperation, PropertyPath, + PropertyWithMetadata, +}; + +use crate::{ + DatabaseTestWrapper, alice, bob, count_entity, get_deletion_provenance, person_type_id, + provenance, raw_count, seed, +}; + +/// Rejects deletion when `decision_time` exceeds `transaction_time`. +/// +/// `execute_entity_deletion` computes `transaction_time = Timestamp::::now()` and +/// checks `decision_time > transaction_time.cast()`. Setting `decision_time` to the future triggers +/// this guard before any database work happens. +#[tokio::test] +async fn decision_time_exceeds_transaction_time() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = crate::create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // 1 hour in the future + let future_time = Timestamp::::from_unix_timestamp( + time::OffsetDateTime::now_utc().unix_timestamp() + 3600, + ); + + let err = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: Some(future_time), + }, + ) + .await + .expect_err("future decision_time should be rejected"); + + assert_matches!(err.current_context(), DeletionError::InvalidDecisionTime); +} + +/// Accepts a past `decision_time` and records it in the tombstone. +/// +/// The past `decision_time` is used both as the pinned axis for `select_entities_for_deletion` (the +/// entity must have been alive at that time) and as `deleted_at_decision_time` in +/// [`EntityDeletionProvenance`]. The tombstone must reflect the explicit past timestamp, not the +/// current time. +#[tokio::test] +async fn decision_time_in_past_succeeds() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Create entity with decision_time 2 hours ago so it's alive at 1 hour ago + let two_hours_ago = Timestamp::::from_unix_timestamp( + time::OffsetDateTime::now_utc().unix_timestamp() - 7200, + ); + + let entity = api + .store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: type_system::principal::actor_group::WebId::new(api.account_id), + entity_uuid: None, + decision_time: Some(two_hours_ago), + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(alice(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create entity"); + + let entity_id = entity.metadata.record_id.entity_id; + + // Delete with decision_time 1 hour ago (entity was alive then) + let one_hour_ago = Timestamp::::from_unix_timestamp( + time::OffsetDateTime::now_utc().unix_timestamp() - 3600, + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: Some(one_hour_ago), + }, + ) + .await + .expect("past decision_time should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + let deletion = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("deletion provenance should exist"); + + assert_eq!(deletion.deleted_at_decision_time, one_hour_ago); +} + +/// Defaults `decision_time` to `transaction_time` when `None`. +/// +/// When `decision_time` is `None`, `execute_entity_deletion` defaults it to +/// `transaction_time.cast()`. Both timestamps are derived from the same +/// `Timestamp::::now()` call, so `deleted_at_decision_time` and +/// `deleted_at_transaction_time` in the provenance must be exactly equal (`.cast()` is a zero-cost +/// type conversion, not a second clock read). +#[tokio::test] +async fn decision_time_defaults_to_transaction_time() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + let entity = crate::create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: None, + }, + ) + .await + .expect("delete should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + let deletion = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("deletion provenance should exist"); + + assert_eq!( + deletion.deleted_at_decision_time.cast::(), + deletion.deleted_at_transaction_time, + "decision_time should exactly equal transaction_time when defaulted" + ); +} + +/// Finds nothing when `decision_time` predates entity creation. +/// +/// `select_entities_for_deletion` uses `QueryTemporalAxes::TransactionTime` with the decision axis +/// pinned at `decision_time` and a point query on transaction time at now. An entity created after +/// the given `decision_time` was not alive at that decision time, so its temporal rows don't match +/// the pinned axis and the filter produces nothing. Deletion is a no-op and the entity survives. +#[tokio::test] +async fn decision_time_before_creation_finds_nothing() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Entity created "now" (default decision_time) + let entity = crate::create_person(&mut api, alice(), false).await; + let entity_id = entity.metadata.record_id.entity_id; + + // Attempt deletion with decision_time 1 hour ago (before entity existed) + let past_time = Timestamp::::from_unix_timestamp( + time::OffsetDateTime::now_utc().unix_timestamp() - 3600, + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: Some(past_time), + }, + ) + .await + .expect("delete with old decision_time should succeed (noop)"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 0, + draft_deletions: 0, + } + ); + + // Entity still exists + assert!(count_entity(&api, entity_id, false).await >= 1); +} + +/// Past `decision_time` still deletes ALL temporal editions, not just the one alive at that time. +/// +/// `select_entities_for_deletion` pins at `decision_time` to find the entity, but +/// `collect_entity_edition_ids` queries `entity_temporal_metadata` without temporal restriction. +/// This means even editions created AFTER the `decision_time` are captured and deleted. If +/// `collect_entity_edition_ids` accidentally filtered by `decision_time`, the second edition +/// (created later) would survive and the test would fail. +#[tokio::test] +async fn past_decision_time_deletes_all_editions() { + let mut database = DatabaseTestWrapper::new().await; + let mut api = seed(&mut database).await; + + // Create entity 2 hours ago + let two_hours_ago = Timestamp::::from_unix_timestamp( + time::OffsetDateTime::now_utc().unix_timestamp() - 7200, + ); + + let entity = api + .store + .create_entity( + api.account_id, + CreateEntityParams { + web_id: type_system::principal::actor_group::WebId::new(api.account_id), + entity_uuid: None, + decision_time: Some(two_hours_ago), + entity_type_ids: HashSet::from([person_type_id()]), + properties: PropertyObjectWithMetadata::from_parts(alice(), None) + .expect("could not create property with metadata object"), + confidence: None, + link_data: None, + draft: false, + policies: Vec::new(), + provenance: provenance(), + }, + ) + .await + .expect("could not create entity"); + + let entity_id = entity.metadata.record_id.entity_id; + let web_id = entity_id.web_id; + let entity_uuid = entity_id.entity_uuid; + + // Create a second edition "now" (default decision_time) + api.store + .patch_entity( + api.account_id, + PatchEntityParams { + entity_id, + decision_time: None, + entity_type_ids: HashSet::default(), + properties: vec![PropertyPatchOperation::Replace { + path: PropertyPath::default(), + property: PropertyWithMetadata::from_parts(Property::Object(bob()), None) + .expect("could not create property with metadata"), + }], + draft: None, + archived: None, + confidence: None, + provenance: provenance(), + }, + ) + .await + .expect("could not patch entity"); + + assert!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await >= 2, + "entity must have >= 2 temporal rows (original + patched)" + ); + + // Delete with decision_time 1 hour ago — entity was alive then (created 2h ago) + // The second edition (created "now") was NOT alive at 1h ago, but must still be deleted + let one_hour_ago = Timestamp::::from_unix_timestamp( + time::OffsetDateTime::now_utc().unix_timestamp() - 3600, + ); + + let summary = api + .store + .delete_entities( + api.account_id, + DeleteEntitiesParams { + filter: Filter::for_entity_by_entity_id(entity_id), + include_drafts: false, + scope: DeletionScope::Purge { + link_behavior: LinkDeletionBehavior::Ignore, + }, + decision_time: Some(one_hour_ago), + }, + ) + .await + .expect("deletion with past decision_time should succeed"); + + assert_eq!( + summary, + DeletionSummary { + full_entities: 1, + draft_deletions: 0, + } + ); + + // ALL temporal rows must be gone — not just the edition alive at 1h ago + assert_eq!( + raw_count(&api, "entity_temporal_metadata", web_id, entity_uuid).await, + 0, + "all temporal editions must be deleted, not just the one alive at decision_time" + ); + assert_eq!(count_entity(&api, entity_id, false).await, 0); + + let deletion = get_deletion_provenance(&api, entity_id.web_id, entity_id.entity_uuid) + .await + .expect("deletion provenance should exist"); + + assert_eq!(deletion.deleted_at_decision_time, one_hour_ago); +} diff --git a/libs/@local/graph/postgres-store/tests/principals/main.rs b/libs/@local/graph/postgres-store/tests/principals/main.rs index 08ed79df5c3..4fc0f11ce14 100644 --- a/libs/@local/graph/postgres-store/tests/principals/main.rs +++ b/libs/@local/graph/postgres-store/tests/principals/main.rs @@ -2,10 +2,13 @@ // Library Features assert_matches, )] -#![expect(clippy::panic_in_result_fn)] +#![expect(clippy::panic_in_result_fn, clippy::missing_panics_doc)] extern crate alloc; +#[path = "../common/mod.rs"] +mod common; + mod actions; mod ai; mod machine; @@ -22,79 +25,13 @@ use hash_graph_authorization::policies::{ principal::PrincipalConstraint, store::{PolicyCreationParams, PolicyStore as _, PrincipalStore as _}, }; -use hash_graph_postgres_store::{ - Environment, load_env, - store::{ - DatabaseConnectionInfo, DatabasePoolConfig, DatabaseType, PostgresStore, PostgresStorePool, - PostgresStoreSettings, error::StoreError, - }, -}; -use hash_graph_store::pool::StorePool; -use hash_telemetry::logging::env_filter; -use tokio_postgres::{NoTls, Transaction}; +use hash_graph_postgres_store::store::{PostgresStore, error::StoreError}; +use tokio_postgres::Transaction; use type_system::principal::actor::ActorId; -pub fn init_logging() { - // It's likely that the initialization failed due to a previous initialization attempt. In this - // case, we can ignore the error. - let _: Result<_, _> = tracing_subscriber::fmt() - .with_ansi(true) - .with_env_filter(env_filter(None)) - .with_file(true) - .with_line_number(true) - .with_test_writer() - .try_init(); -} - -pub struct DatabaseTestWrapper { - _pool: PostgresStorePool, - connection: ::Store<'static>, -} +pub use crate::common::DatabaseTestWrapper; impl DatabaseTestWrapper { - pub(crate) async fn new() -> Self { - load_env(Environment::Test); - init_logging(); - - let user = std::env::var("HASH_GRAPH_PG_USER").unwrap_or_else(|_| "graph".to_owned()); - let password = - std::env::var("HASH_GRAPH_PG_PASSWORD").unwrap_or_else(|_| "graph".to_owned()); - let host = std::env::var("HASH_GRAPH_PG_HOST").unwrap_or_else(|_| "localhost".to_owned()); - let port = std::env::var("HASH_GRAPH_PG_PORT").map_or(5432, |port| { - port.parse::().expect("could not parse port") - }); - let database = - std::env::var("HASH_GRAPH_PG_DATABASE").unwrap_or_else(|_| "graph".to_owned()); - - let connection_info = DatabaseConnectionInfo::new( - DatabaseType::Postgres, - user, - password, - host, - port, - database, - ); - - let pool = PostgresStorePool::new( - &connection_info, - &DatabasePoolConfig::default(), - NoTls, - PostgresStoreSettings::default(), - ) - .await - .expect("could not connect to database"); - - let connection = pool - .acquire_owned(None) - .await - .expect("could not acquire a database connection"); - - Self { - _pool: pool, - connection, - } - } - pub(crate) async fn seed( &mut self, ) -> Result<(PostgresStore>, ActorId), Report> { diff --git a/libs/@local/graph/store/src/entity/mod.rs b/libs/@local/graph/store/src/entity/mod.rs index 550202edf70..886e40201f5 100644 --- a/libs/@local/graph/store/src/entity/mod.rs +++ b/libs/@local/graph/store/src/entity/mod.rs @@ -4,12 +4,12 @@ pub use self::{ EntityQuerySortingToken, EntityQueryToken, }, store::{ - ClosedMultiEntityTypeMap, CountEntitiesParams, CreateEntityParams, DiffEntityParams, - DiffEntityResult, EntityPermissions, EntityStore, EntityValidationType, - HasPermissionForEntitiesParams, PatchEntityParams, QueryConversion, QueryEntitiesParams, - QueryEntitiesResponse, QueryEntitySubgraphParams, QueryEntitySubgraphResponse, - UpdateEntityEmbeddingsParams, ValidateEntityComponents, ValidateEntityError, - ValidateEntityParams, + ClosedMultiEntityTypeMap, CountEntitiesParams, CreateEntityParams, DeleteEntitiesParams, + DeletionScope, DeletionSummary, DiffEntityParams, DiffEntityResult, EntityPermissions, + EntityStore, EntityValidationType, HasPermissionForEntitiesParams, LinkDeletionBehavior, + PatchEntityParams, QueryConversion, QueryEntitiesParams, QueryEntitiesResponse, + QueryEntitySubgraphParams, QueryEntitySubgraphResponse, UpdateEntityEmbeddingsParams, + ValidateEntityComponents, ValidateEntityError, ValidateEntityParams, }, validation_report::{ EmptyEntityTypes, EntityRetrieval, EntityTypeRetrieval, EntityTypesError, diff --git a/libs/@local/graph/store/src/entity/store.rs b/libs/@local/graph/store/src/entity/store.rs index 480c35ee2fe..c064cbce4f8 100644 --- a/libs/@local/graph/store/src/entity/store.rs +++ b/libs/@local/graph/store/src/entity/store.rs @@ -36,7 +36,7 @@ use utoipa::{ use crate::{ entity::{EntityQueryCursor, EntityQuerySorting, EntityValidationReport}, entity_type::{EntityTypeResolveDefinitions, IncludeEntityTypeOption}, - error::{CheckPermissionError, InsertionError, QueryError, UpdateError}, + error::{CheckPermissionError, DeletionError, InsertionError, QueryError, UpdateError}, filter::Filter, subgraph::{ Subgraph, @@ -531,6 +531,52 @@ pub struct HasPermissionForEntitiesParams<'a> { pub include_drafts: bool, } +#[derive(Debug, Copy, Clone, Deserialize)] +#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] +#[serde( + tag = "scope", + rename_all = "kebab-case", + rename_all_fields = "camelCase" +)] +pub enum DeletionScope { + // Archive, + Purge { link_behavior: LinkDeletionBehavior }, + Erase, +} + +#[derive(Debug, Copy, Clone, Deserialize)] +#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] +#[serde(rename_all = "kebab-case")] +pub enum LinkDeletionBehavior { + Ignore, + Error, + // Cascade, +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] +#[serde(rename_all = "camelCase")] +pub struct DeleteEntitiesParams<'a> { + #[serde(borrow)] + pub filter: Filter<'a, Entity>, + pub include_drafts: bool, + #[serde(flatten)] + pub scope: DeletionScope, + #[serde(default)] + pub decision_time: Option>, +} + +/// Summary of a deletion operation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] +#[serde(rename_all = "camelCase")] +pub struct DeletionSummary { + /// Number of entities fully deleted (tombstoned or erased). + pub full_entities: usize, + /// Number of draft-only deletions performed. + pub draft_deletions: usize, +} + /// Describes the API of a store implementation for [Entities]. /// /// [Entities]: Entity @@ -640,6 +686,45 @@ pub trait EntityStore { params: PatchEntityParams, ) -> impl Future>> + Send; + /// Deletes entities matching the `params` filter. + /// + /// **Purge** keeps `entity_ids` as a tombstone with deletion provenance; all edition data, + /// temporal metadata, type associations, embeddings, drafts, and outgoing edges are removed. + /// **Erase** additionally removes the `entity_ids` row, leaving no trace. + /// + /// # Behavioral notes + /// + /// - **Erase + draft-only targets**: when [`DeletionScope::Erase`] is used but only draft-only + /// targets are produced (e.g., a partial draft match on an entity with a published version), + /// `entity_ids` is **not** deleted because published data still references it. Callers + /// passing `Erase` should not assume complete removal in this case. + /// + /// - **Filter interaction with `include_drafts`**: [`Filter::for_entity_by_entity_id`] with + /// `draft_id: None` matches on `(web_id, entity_uuid)` without restricting by `draft_id`. The + /// `draft_id IS NULL` constraint comes from `include_drafts: false` in the select compiler. + /// This means `include_drafts: true` + `draft_id: None` matches **all** rows (published + all + /// drafts) for the entity. + /// + /// - **Double-purge is a no-op**: after the first purge deletes temporal metadata, a second + /// call finds no matching rows and returns successfully without modifying the tombstone. + /// + /// # Errors + /// + /// - [`InvalidDecisionTime`] if `decision_time` exceeds `transaction_time` + /// - [`IncomingLinksExist`] if incoming links exist and [`LinkDeletionBehavior::Error`] or + /// [`DeletionScope::Erase`] is requested + /// - [`Store`] if a database operation fails + /// + /// [`InvalidDecisionTime`]: DeletionError::InvalidDecisionTime + /// [`IncomingLinksExist`]: DeletionError::IncomingLinksExist + /// [`Store`]: DeletionError::Store + /// [`Filter::for_entity_by_entity_id`]: crate::filter::Filter::for_entity_by_entity_id + fn delete_entities( + &mut self, + actor_id: ActorEntityUuid, + params: DeleteEntitiesParams<'_>, + ) -> impl Future>> + Send; + fn diff_entity( &self, actor_id: ActorEntityUuid, diff --git a/libs/@local/graph/store/src/error.rs b/libs/@local/graph/store/src/error.rs index 0ae31989d19..d9f39229103 100644 --- a/libs/@local/graph/store/src/error.rs +++ b/libs/@local/graph/store/src/error.rs @@ -36,16 +36,22 @@ impl fmt::Display for UpdateError { impl Error for UpdateError {} -#[derive(Debug)] +#[derive(Debug, derive_more::Display)] +#[display("Could not delete from the store: {_variant}")] #[must_use] -pub struct DeletionError; - -impl fmt::Display for DeletionError { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.write_str("Could not delete from the store") - } +pub enum DeletionError { + #[display("decision time must not exceed transaction time")] + InvalidDecisionTime, + #[display("{count} incoming links point to the target entities")] + IncomingLinksExist { count: u64 }, + #[display("expected {expected} entity_ids rows affected, got {actual}")] + InconsistentEntityIds { expected: u64, actual: u64 }, + #[display("store operation failed")] + Store, } +impl Error for DeletionError {} + #[derive(Debug, derive_more::Display)] #[display("Could not check permissions: {_variant}")] pub enum CheckPermissionError { diff --git a/libs/@local/graph/test-server/src/lib.rs b/libs/@local/graph/test-server/src/lib.rs index 7027374d11b..19cd2e7c9de 100644 --- a/libs/@local/graph/test-server/src/lib.rs +++ b/libs/@local/graph/test-server/src/lib.rs @@ -212,7 +212,7 @@ async fn delete_entities( pool.acquire(None) .await .map_err(store_acquisition_error)? - .delete_entities() + .delete_all_entities() .await .map_err(|report| { tracing::error!(error=?report, "Could not delete entities"); diff --git a/libs/@local/graph/type-fetcher/src/store.rs b/libs/@local/graph/type-fetcher/src/store.rs index 4bebd93562b..d804cf8d76c 100644 --- a/libs/@local/graph/type-fetcher/src/store.rs +++ b/libs/@local/graph/type-fetcher/src/store.rs @@ -35,10 +35,10 @@ use hash_graph_store::{ UnarchiveDataTypeParams, UpdateDataTypeEmbeddingParams, UpdateDataTypesParams, }, entity::{ - CountEntitiesParams, CreateEntityParams, EntityStore, EntityValidationReport, - HasPermissionForEntitiesParams, PatchEntityParams, QueryEntitiesParams, - QueryEntitiesResponse, QueryEntitySubgraphParams, QueryEntitySubgraphResponse, - UpdateEntityEmbeddingsParams, ValidateEntityParams, + CountEntitiesParams, CreateEntityParams, DeleteEntitiesParams, DeletionSummary, + EntityStore, EntityValidationReport, HasPermissionForEntitiesParams, PatchEntityParams, + QueryEntitiesParams, QueryEntitiesResponse, QueryEntitySubgraphParams, + QueryEntitySubgraphResponse, UpdateEntityEmbeddingsParams, ValidateEntityParams, }, entity_type::{ ArchiveEntityTypeParams, CommonQueryEntityTypesParams, CountEntityTypesParams, @@ -48,7 +48,7 @@ use hash_graph_store::{ QueryEntityTypesResponse, UnarchiveEntityTypeParams, UpdateEntityTypeEmbeddingParams, UpdateEntityTypesParams, }, - error::{CheckPermissionError, InsertionError, QueryError, UpdateError}, + error::{CheckPermissionError, DeletionError, InsertionError, QueryError, UpdateError}, filter::{Filter, QueryRecord}, pool::StorePool, property_type::{ @@ -1673,6 +1673,14 @@ where self.store.patch_entity(actor_id, params).await } + async fn delete_entities( + &mut self, + actor_id: ActorEntityUuid, + params: DeleteEntitiesParams<'_>, + ) -> Result> { + self.store.delete_entities(actor_id, params).await + } + async fn update_entity_embeddings( &mut self, actor_id: ActorEntityUuid, diff --git a/tests/graph/benches/graph/scenario/stages/reset_db.rs b/tests/graph/benches/graph/scenario/stages/reset_db.rs index 48d94d1be54..dfd78bca1e9 100644 --- a/tests/graph/benches/graph/scenario/stages/reset_db.rs +++ b/tests/graph/benches/graph/scenario/stages/reset_db.rs @@ -81,7 +81,7 @@ impl ResetDbStage { if self.entities { store - .delete_entities() + .delete_all_entities() .await .change_context(ResetDbError::DeleteEntities)?; reset_db_result.deleted_entities = true; diff --git a/tests/graph/integration/postgres/lib.rs b/tests/graph/integration/postgres/lib.rs index 745b5a4e5c5..f4c06a87244 100644 --- a/tests/graph/integration/postgres/lib.rs +++ b/tests/graph/integration/postgres/lib.rs @@ -47,10 +47,10 @@ use hash_graph_store::{ UnarchiveDataTypeParams, UpdateDataTypeEmbeddingParams, UpdateDataTypesParams, }, entity::{ - CountEntitiesParams, CreateEntityParams, EntityStore, EntityValidationReport, - HasPermissionForEntitiesParams, PatchEntityParams, QueryEntitiesParams, - QueryEntitiesResponse, QueryEntitySubgraphParams, QueryEntitySubgraphResponse, - UpdateEntityEmbeddingsParams, ValidateEntityParams, + CountEntitiesParams, CreateEntityParams, DeleteEntitiesParams, DeletionSummary, + EntityStore, EntityValidationReport, HasPermissionForEntitiesParams, PatchEntityParams, + QueryEntitiesParams, QueryEntitiesResponse, QueryEntitySubgraphParams, + QueryEntitySubgraphResponse, UpdateEntityEmbeddingsParams, ValidateEntityParams, }, entity_type::{ ArchiveEntityTypeParams, CountEntityTypesParams, CreateEntityTypeParams, EntityTypeStore, @@ -59,7 +59,7 @@ use hash_graph_store::{ QueryEntityTypeSubgraphResponse, QueryEntityTypesParams, QueryEntityTypesResponse, UnarchiveEntityTypeParams, UpdateEntityTypeEmbeddingParams, UpdateEntityTypesParams, }, - error::{CheckPermissionError, InsertionError, QueryError, UpdateError}, + error::{CheckPermissionError, DeletionError, InsertionError, QueryError, UpdateError}, pool::StorePool, property_type::{ ArchivePropertyTypeParams, CountPropertyTypesParams, CreatePropertyTypeParams, @@ -853,6 +853,14 @@ impl EntityStore for DatabaseApi<'_> { self.store.patch_entity(actor_id, params).await } + async fn delete_entities( + &mut self, + actor_id: ActorEntityUuid, + params: DeleteEntitiesParams<'_>, + ) -> Result> { + self.store.delete_entities(actor_id, params).await + } + async fn update_entity_embeddings( &mut self, actor_id: ActorEntityUuid, From 27948cc78f3cbc0b5771db3ece5699f2aa3736b1 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Fri, 27 Feb 2026 17:08:18 +0100 Subject: [PATCH 2/9] BE-311: Fix TOCTOU races, register EntityDeletionProvenance schema, deduplicate draft IDs - Bound deletion impl to `PostgresStore>` to enforce transactional locking at the type level - Add `FOR UPDATE` to `collect_entity_edition_ids` to serialize with concurrent `patch_entity` calls (`FOR NO KEY UPDATE NOWAIT` conflict) - Keep `lock_entity_ids_for_erase` for concurrent link creation (erase only) - Register `EntityDeletionProvenance` in OpenAPI schema components - Deduplicate draft IDs in `select_entities_for_deletion` to prevent inflated `DeletionSummary.draft_deletions` counts from join duplicates - Unify SQL string indentation --- libs/@local/graph/api/openapi/openapi.json | 20 +++++++++++++++++++ libs/@local/graph/api/src/rest/entity.rs | 5 +++-- .../store/postgres/knowledge/entity/delete.rs | 2 +- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/libs/@local/graph/api/openapi/openapi.json b/libs/@local/graph/api/openapi/openapi.json index 5288438f35d..b5404667f21 100644 --- a/libs/@local/graph/api/openapi/openapi.json +++ b/libs/@local/graph/api/openapi/openapi.json @@ -4447,6 +4447,26 @@ } } }, + "EntityDeletionProvenance": { + "type": "object", + "required": [ + "deletedById", + "deletedAtTransactionTime", + "deletedAtDecisionTime" + ], + "properties": { + "deletedAtDecisionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "deletedAtTransactionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "deletedById": { + "$ref": "#/components/schemas/ActorEntityUuid" + } + }, + "additionalProperties": false + }, "EntityEditionId": { "type": "string", "format": "uuid" diff --git a/libs/@local/graph/api/src/rest/entity.rs b/libs/@local/graph/api/src/rest/entity.rs index 7bc6e6c8887..ece300f0f9a 100644 --- a/libs/@local/graph/api/src/rest/entity.rs +++ b/libs/@local/graph/api/src/rest/entity.rs @@ -51,8 +51,8 @@ use type_system::{ id::{EntityEditionId, EntityId, EntityRecordId, EntityUuid}, metadata::{EntityTemporalMetadata, EntityTypeIdDiff}, provenance::{ - EntityEditionProvenance, EntityProvenance, InferredEntityProvenance, - ProvidedEntityEditionProvenance, + EntityDeletionProvenance, EntityEditionProvenance, EntityProvenance, + InferredEntityProvenance, ProvidedEntityEditionProvenance, }, }, property::{ @@ -150,6 +150,7 @@ use crate::rest::{ EntityEditionId, EntityMetadata, EntityProvenance, + EntityDeletionProvenance, EntityEditionProvenance, InferredEntityProvenance, ProvidedEntityEditionProvenance, diff --git a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs index ccd8010bc67..2f7108366cb 100644 --- a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs +++ b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs @@ -140,7 +140,7 @@ impl PostgresStore> { } (Entry::Occupied(mut entry), Some(draft_id)) => { let tracked = entry.get_mut(); - if !tracked.is_empty() { + if !tracked.is_empty() && !tracked.contains(&draft_id) { tracked.push(draft_id); } } From 5732f5746a148f7a8aa74a451a9bea182d4cacb8 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Fri, 27 Feb 2026 17:52:50 +0100 Subject: [PATCH 3/9] BE-311: Fix OpenAPI/tsify schemas for InferredEntityProvenance - Manual `ToSchema` impl for `InferredEntityProvenance` so utoipa correctly represents flattened `Option` fields as optional instead of required. - tsify patch module to generate a clean `interface` instead of a complex type alias (`& (EntityDeletionProvenance | {})`) that breaks `extends` in `EntityProvenance`. - Use `DISTINCT ON` in `select_entities_for_deletion` via `SelectCompiler::add_distinct_selection_with_ordering` instead of manual draft ID deduplication. --- .../rust/src/knowledge/entity/provenance.rs | 102 ++++++++++++++++-- libs/@local/graph/api/openapi/openapi.json | 70 ++++++------ .../store/postgres/knowledge/entity/delete.rs | 25 ++++- 3 files changed, 143 insertions(+), 54 deletions(-) diff --git a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs index 45260b2b3ef..94c31d5e80a 100644 --- a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs +++ b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs @@ -83,28 +83,71 @@ pub struct EntityDeletionProvenance { } #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[cfg_attr(target_arch = "wasm32", derive(tsify::Tsify))] -#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] #[serde(deny_unknown_fields, rename_all = "camelCase")] pub struct InferredEntityProvenance { pub created_by_id: ActorEntityUuid, - #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] pub created_at_transaction_time: Timestamp, - #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] pub created_at_decision_time: Timestamp, - #[cfg_attr(feature = "utoipa", schema(nullable = false))] - #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] #[serde(default, skip_serializing_if = "Option::is_none")] pub first_non_draft_created_at_transaction_time: Option>, - #[cfg_attr(feature = "utoipa", schema(nullable = false))] - #[cfg_attr(target_arch = "wasm32", tsify(type = "Timestamp"))] #[serde(default, skip_serializing_if = "Option::is_none")] pub first_non_draft_created_at_decision_time: Option>, - #[cfg_attr(feature = "utoipa", schema(nullable = false))] #[serde(default, flatten, skip_serializing_if = "Option::is_none")] pub deletion: Option, } +/// Manual [`ToSchema`] implementation because utoipa's derive macro cannot correctly represent +/// `#[serde(flatten)]` on `Option`: it generates an `allOf` that makes +/// the deletion fields required. The correct schema lists them as optional properties. +#[cfg(feature = "utoipa")] +impl utoipa::ToSchema<'static> for InferredEntityProvenance { + fn schema() -> ( + &'static str, + utoipa::openapi::RefOr, + ) { + use utoipa::openapi::{ObjectBuilder, Ref, Schema}; + + ( + "InferredEntityProvenance", + Schema::Object( + ObjectBuilder::new() + .property("createdById", Ref::from_schema_name("ActorEntityUuid")) + .required("createdById") + .property( + "createdAtTransactionTime", + Ref::from_schema_name("Timestamp"), + ) + .required("createdAtTransactionTime") + .property( + "createdAtDecisionTime", + Ref::from_schema_name("Timestamp"), + ) + .required("createdAtDecisionTime") + .property( + "firstNonDraftCreatedAtTransactionTime", + Ref::from_schema_name("Timestamp"), + ) + .property( + "firstNonDraftCreatedAtDecisionTime", + Ref::from_schema_name("Timestamp"), + ) + // Flattened from `Option` — all optional. + .property("deletedById", Ref::from_schema_name("ActorEntityUuid")) + .property( + "deletedAtTransactionTime", + Ref::from_schema_name("Timestamp"), + ) + .property( + "deletedAtDecisionTime", + Ref::from_schema_name("Timestamp"), + ) + .build(), + ) + .into(), + ) + } +} + #[cfg(feature = "postgres")] impl<'a> FromSql<'a> for InferredEntityProvenance { fn from_sql(ty: &Type, raw: &'a [u8]) -> Result> { @@ -142,6 +185,47 @@ pub struct EntityProvenance { pub edition: EntityEditionProvenance, } +/// Override tsify's generated type for [`InferredEntityProvenance`]. +/// +/// The main struct's `derive(tsify::Tsify)` generates +/// `type InferredEntityProvenance = { ... } & (EntityDeletionProvenance | {})` because of +/// `#[serde(flatten)]` on `Option`. That complex type alias cannot be +/// used with `extends` in [`EntityProvenance`]'s interface declaration. +/// +/// This patch generates a clean interface with the deletion fields as individually optional +/// properties, which overrides the broken declaration in the wasm output. +#[cfg(target_arch = "wasm32")] +#[expect(dead_code, reason = "Used in the generated TypeScript types")] +mod inferred_entity_provenance_patch { + use super::*; + + #[derive(tsify::Tsify)] + #[serde(rename_all = "camelCase")] + pub struct InferredEntityProvenance { + pub created_by_id: ActorEntityUuid, + #[tsify(type = "Timestamp")] + pub created_at_transaction_time: Timestamp, + #[tsify(type = "Timestamp")] + pub created_at_decision_time: Timestamp, + #[tsify(type = "Timestamp")] + #[serde(default, skip_serializing_if = "Option::is_none")] + pub first_non_draft_created_at_transaction_time: Option>, + #[tsify(type = "Timestamp")] + #[serde(default, skip_serializing_if = "Option::is_none")] + pub first_non_draft_created_at_decision_time: Option>, + // Flattened from `Option` — represented as individual optional + // fields instead of `& (EntityDeletionProvenance | {})`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub deleted_by_id: Option, + #[tsify(type = "Timestamp")] + #[serde(default, skip_serializing_if = "Option::is_none")] + pub deleted_at_transaction_time: Option>, + #[tsify(type = "Timestamp")] + #[serde(default, skip_serializing_if = "Option::is_none")] + pub deleted_at_decision_time: Option>, + } +} + #[cfg(test)] mod tests { use uuid::Uuid; diff --git a/libs/@local/graph/api/openapi/openapi.json b/libs/@local/graph/api/openapi/openapi.json index b5404667f21..7b5b439a2d4 100644 --- a/libs/@local/graph/api/openapi/openapi.json +++ b/libs/@local/graph/api/openapi/openapi.json @@ -5612,48 +5612,38 @@ ] }, "InferredEntityProvenance": { - "allOf": [ - { - "allOf": [ - { - "$ref": "#/components/schemas/EntityDeletionProvenance" - } - ] + "type": "object", + "required": [ + "createdById", + "createdAtTransactionTime", + "createdAtDecisionTime" + ], + "properties": { + "createdAtDecisionTime": { + "$ref": "#/components/schemas/Timestamp" }, - { - "type": "object", - "required": [ - "createdById", - "createdAtTransactionTime", - "createdAtDecisionTime" - ], - "properties": { - "createdAtDecisionTime": { - "$ref": "#/components/schemas/Timestamp" - }, - "createdAtTransactionTime": { - "$ref": "#/components/schemas/Timestamp" - }, - "createdById": { - "$ref": "#/components/schemas/ActorEntityUuid" - }, - "firstNonDraftCreatedAtDecisionTime": { - "allOf": [ - { - "$ref": "#/components/schemas/Timestamp" - } - ] - }, - "firstNonDraftCreatedAtTransactionTime": { - "allOf": [ - { - "$ref": "#/components/schemas/Timestamp" - } - ] - } - } + "createdAtTransactionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "createdById": { + "$ref": "#/components/schemas/ActorEntityUuid" + }, + "deletedAtDecisionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "deletedAtTransactionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "deletedById": { + "$ref": "#/components/schemas/ActorEntityUuid" + }, + "firstNonDraftCreatedAtDecisionTime": { + "$ref": "#/components/schemas/Timestamp" + }, + "firstNonDraftCreatedAtTransactionTime": { + "$ref": "#/components/schemas/Timestamp" } - ] + } }, "JsonSchemaValueType": { "type": "string", diff --git a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs index 2f7108366cb..c365819a2ea 100644 --- a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs +++ b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs @@ -29,7 +29,10 @@ use type_system::{ principal::{actor::ActorEntityUuid, actor_group::WebId}, }; -use crate::store::{AsClient as _, PostgresStore, postgres::query::SelectCompiler}; +use crate::store::{ + AsClient as _, PostgresStore, + postgres::query::{Distinctness, SelectCompiler}, +}; /// Per-table row counts from [`delete_target_data`](PostgresStore::delete_target_data). #[derive(Default)] @@ -107,9 +110,21 @@ impl PostgresStore> { .add_filter(filter) .change_context(DeletionError::Store)?; - let web_id_index = compiler.add_selection_path(&EntityQueryPath::WebId); - let entity_uuid_index = compiler.add_selection_path(&EntityQueryPath::Uuid); - let draft_id_index = compiler.add_selection_path(&EntityQueryPath::DraftId); + let web_id_index = compiler.add_distinct_selection_with_ordering( + &EntityQueryPath::WebId, + Distinctness::Distinct, + None, + ); + let entity_uuid_index = compiler.add_distinct_selection_with_ordering( + &EntityQueryPath::Uuid, + Distinctness::Distinct, + None, + ); + let draft_id_index = compiler.add_distinct_selection_with_ordering( + &EntityQueryPath::DraftId, + Distinctness::Distinct, + None, + ); let (statement, parameters) = compiler.compile(); @@ -140,7 +155,7 @@ impl PostgresStore> { } (Entry::Occupied(mut entry), Some(draft_id)) => { let tracked = entry.get_mut(); - if !tracked.is_empty() && !tracked.contains(&draft_id) { + if !tracked.is_empty() { tracked.push(draft_id); } } From 23db01af5608eb677ccd2b4bd3bc3a62674a5917 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Fri, 27 Feb 2026 17:54:10 +0100 Subject: [PATCH 4/9] Format schema property definitions in inferred entity provenance rust code --- .../rust/src/knowledge/entity/provenance.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs index 94c31d5e80a..1dffeae5586 100644 --- a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs +++ b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs @@ -118,10 +118,7 @@ impl utoipa::ToSchema<'static> for InferredEntityProvenance { Ref::from_schema_name("Timestamp"), ) .required("createdAtTransactionTime") - .property( - "createdAtDecisionTime", - Ref::from_schema_name("Timestamp"), - ) + .property("createdAtDecisionTime", Ref::from_schema_name("Timestamp")) .required("createdAtDecisionTime") .property( "firstNonDraftCreatedAtTransactionTime", @@ -131,16 +128,12 @@ impl utoipa::ToSchema<'static> for InferredEntityProvenance { "firstNonDraftCreatedAtDecisionTime", Ref::from_schema_name("Timestamp"), ) - // Flattened from `Option` — all optional. .property("deletedById", Ref::from_schema_name("ActorEntityUuid")) .property( "deletedAtTransactionTime", Ref::from_schema_name("Timestamp"), ) - .property( - "deletedAtDecisionTime", - Ref::from_schema_name("Timestamp"), - ) + .property("deletedAtDecisionTime", Ref::from_schema_name("Timestamp")) .build(), ) .into(), From c8839ae14546b80971f39588315e9b9b51efd6f4 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Sat, 28 Feb 2026 13:30:12 +0100 Subject: [PATCH 5/9] BE-311: Fix doc links to private methods on impl block --- .../src/store/postgres/knowledge/entity/delete.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs index c365819a2ea..3d5bcf60173 100644 --- a/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs +++ b/libs/@local/graph/postgres-store/src/store/postgres/knowledge/entity/delete.rs @@ -74,15 +74,13 @@ enum DeletionTarget<'a> { /// /// All methods require a transaction to guarantee correctness of the locking protocol: /// -/// 1. [`collect_entity_edition_ids`] acquires `FOR UPDATE` on `entity_temporal_metadata` rows, +/// 1. `collect_entity_edition_ids` acquires `FOR UPDATE` on `entity_temporal_metadata` rows, /// serializing with concurrent [`patch_entity`] calls (which use `FOR NO KEY UPDATE NOWAIT`). -/// 2. [`lock_entity_ids_for_erase`] acquires `FOR UPDATE` on `entity_ids` rows (erase scope only), +/// 2. `lock_entity_ids_for_erase` acquires `FOR UPDATE` on `entity_ids` rows (erase scope only), /// serializing with concurrent link creation (which needs `KEY SHARE` for FK checks). /// /// Without a transaction these locks would be released immediately, defeating the purpose. /// -/// [`collect_entity_edition_ids`]: Self::collect_entity_edition_ids -/// [`lock_entity_ids_for_erase`]: Self::lock_entity_ids_for_erase /// [`patch_entity`]: hash_graph_store::entity::EntityStore::patch_entity impl PostgresStore> { /// Finds entities matching `filter` and partitions them into full vs draft-only deletions. From f29dda89387bf367bb5a6ceba91b54c66a99eee3 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Sat, 28 Feb 2026 13:44:35 +0100 Subject: [PATCH 6/9] BE-311: Fix broken intra-doc link to ToSchema --- .../type-system/rust/src/knowledge/entity/provenance.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs index 1dffeae5586..0553d7a8ea9 100644 --- a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs +++ b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs @@ -99,6 +99,8 @@ pub struct InferredEntityProvenance { /// Manual [`ToSchema`] implementation because utoipa's derive macro cannot correctly represent /// `#[serde(flatten)]` on `Option`: it generates an `allOf` that makes /// the deletion fields required. The correct schema lists them as optional properties. +/// +/// [`ToSchema`]: utoipa::ToSchema #[cfg(feature = "utoipa")] impl utoipa::ToSchema<'static> for InferredEntityProvenance { fn schema() -> ( From 1bc8b313a25dac26b173b93f69e8fe68b2092e25 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Sat, 28 Feb 2026 13:54:48 +0100 Subject: [PATCH 7/9] BE-311: Remove deny_unknown_fields from provenance structs with flatten MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit serde does not support deny_unknown_fields in combination with flatten. The nested flatten chain (EntityProvenance → InferredEntity Provenance → EntityDeletionProvenance) caused "unknown field createdById" during snapshot restore deserialization. --- .../type-system/rust/src/knowledge/entity/provenance.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs index 0553d7a8ea9..ec25fb2fe14 100644 --- a/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs +++ b/libs/@blockprotocol/type-system/rust/src/knowledge/entity/provenance.rs @@ -83,7 +83,9 @@ pub struct EntityDeletionProvenance { } #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[serde(deny_unknown_fields, rename_all = "camelCase")] +// `deny_unknown_fields` is intentionally absent: serde does not support it together with +// `#[serde(flatten)]` (https://serde.rs/container-attrs.html#deny_unknown_fields). +#[serde(rename_all = "camelCase")] pub struct InferredEntityProvenance { pub created_by_id: ActorEntityUuid, pub created_at_transaction_time: Timestamp, @@ -173,7 +175,9 @@ impl ToSql for InferredEntityProvenance { #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[cfg_attr(target_arch = "wasm32", derive(tsify::Tsify))] #[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields, rename_all = "camelCase")] +// `deny_unknown_fields` is intentionally absent: serde does not support it together with +// `#[serde(flatten)]` (https://serde.rs/container-attrs.html#deny_unknown_fields). +#[serde(rename_all = "camelCase")] pub struct EntityProvenance { #[serde(flatten)] pub inferred: InferredEntityProvenance, From 36e46b03167dcce578b6d23f928868fb4e6db414 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Sat, 28 Feb 2026 14:46:51 +0100 Subject: [PATCH 8/9] Update package.json --- libs/@local/graph/postgres-store/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/@local/graph/postgres-store/package.json b/libs/@local/graph/postgres-store/package.json index becfa0248d1..20b59a9c1a8 100644 --- a/libs/@local/graph/postgres-store/package.json +++ b/libs/@local/graph/postgres-store/package.json @@ -25,6 +25,7 @@ }, "devDependencies": { "@rust/hash-graph-migrations": "workspace:*", + "@rust/hash-graph-test-data": "workspace:*", "@rust/hash-telemetry": "workspace:*" } } From 46a101520ef7d70ac435308875aa7cf962af7f14 Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Sat, 28 Feb 2026 14:49:10 +0100 Subject: [PATCH 9/9] Update yarn.lock --- yarn.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/yarn.lock b/yarn.lock index 9c1cc94cca7..544f4771b43 100644 --- a/yarn.lock +++ b/yarn.lock @@ -14770,6 +14770,7 @@ __metadata: "@rust/hash-graph-migrations": "workspace:*" "@rust/hash-graph-store": "workspace:*" "@rust/hash-graph-temporal-versioning": "workspace:*" + "@rust/hash-graph-test-data": "workspace:*" "@rust/hash-graph-types": "workspace:*" "@rust/hash-graph-validation": "workspace:*" "@rust/hash-status": "workspace:*"