diff --git a/docs/server/ongoing-tasks/cdc-sink/_category_.json b/docs/server/ongoing-tasks/cdc-sink/_category_.json new file mode 100644 index 0000000000..d0ee4d17db --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/_category_.json @@ -0,0 +1,4 @@ +{ + "position": 5, + "label": "CDC Sink" +} diff --git a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx new file mode 100644 index 0000000000..729c650602 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx @@ -0,0 +1,176 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: API Reference" +sidebar_label: API Reference +description: "Client API operations for creating, updating, toggling, and deleting CDC Sink tasks programmatically." +sidebar_position: 11 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: API Reference + + + +* CDC Sink tasks can be created, updated, and managed programmatically using + the RavenDB Client API. + +* In this page: + * [Add a CDC Sink Task](#add-a-cdc-sink-task) + * [Update a CDC Sink Task](#update-a-cdc-sink-task) + * [Get Task Info](#get-task-info) + * [Toggle Task State](#toggle-task-state) + * [Delete a Task](#delete-a-task) + + + +--- + +## Add a CDC Sink Task + +Use `AddCdcSinkOperation` to create a new CDC Sink task: + +```csharp +var config = new CdcSinkConfiguration +{ + Name = "OrdersSync", + ConnectionStringName = "MyPostgresConnection", + Tables = new List + { + new CdcSinkTableConfig + { + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = new List { "id" }, + ColumnsMapping = new Dictionary + { + { "id", "Id" }, + { "customer_name", "CustomerName" }, + { "total", "Total" } + } + } + } +}; + +var result = await store.Maintenance.SendAsync( + new AddCdcSinkOperation(config)); + +long taskId = result.TaskId; +``` + +`AddCdcSinkOperationResult`: + +| Property | Type | Description | +|----------|------|-------------| +| `TaskId` | `long` | Assigned task ID | +| `RaftCommandIndex` | `long` | Raft index of the command | + +--- + +## Update a CDC Sink Task + +Use `UpdateCdcSinkOperation` to modify an existing task. +Pass the full updated configuration including the `TaskId`: + +```csharp +config.TaskId = taskId; // Must be set +config.Tables.Add(new CdcSinkTableConfig +{ + Name = "Customers", + SourceTableName = "customers", + PrimaryKeyColumns = new List { "id" }, + ColumnsMapping = new Dictionary + { + { "id", "Id" }, + { "name", "Name" }, + { "email", "Email" } + } +}); + +await store.Maintenance.SendAsync( + new UpdateCdcSinkOperation(taskId, config)); +``` + + + +**PostgreSQL — table changes affect the replication slot:** + +* **Adding tables** to the task requires updating the publication to include the + new tables. If the slot and publication were auto-named (hash-based), this causes + a new slot/publication to be created under a new hash, and the old ones become + orphaned. +* **Removing tables** from the task means the old publication includes tables that + are no longer needed. The slot keeps receiving updates for those tables + unnecessarily. + +If you specified explicit `SlotName` and `PublicationName` in `CdcSinkPostgresSettings`, +you can update the publication manually to add/remove tables without affecting the slot. +See [Initial Setup](./postgres/initial-setup.mdx) for +guidance on manual slot management. + +Orphaned slots and publications must be dropped by the database administrator. +See [Cleanup and Maintenance](./postgres/cleanup-and-maintenance.mdx). + + + +--- + +## Get Task Info + +Use `GetOngoingTaskInfoOperation` to retrieve the current state of a CDC Sink task: + +```csharp +var taskInfo = await store.Maintenance.SendAsync( + new GetOngoingTaskInfoOperation(taskId, OngoingTaskType.CdcSink)); +``` + +--- + +## Toggle Task State + +Pause or resume a CDC Sink task using `ToggleOngoingTaskStateOperation`: + +```csharp +// Pause the task +await store.Maintenance.SendAsync( + new ToggleOngoingTaskStateOperation(taskId, OngoingTaskType.CdcSink, disable: true)); + +// Resume the task +await store.Maintenance.SendAsync( + new ToggleOngoingTaskStateOperation(taskId, OngoingTaskType.CdcSink, disable: false)); +``` + + + +**PostgreSQL:** Pausing a CDC Sink task stops the replication slot from being consumed. +PostgreSQL retains WAL segments for unconsumed slots, so pausing for an extended period +causes WAL to accumulate on disk. Monitor disk usage if a task is paused for more than +a short time. +See [Monitoring PostgreSQL](./postgres/monitoring-postgres.mdx). + + + +--- + +## Delete a Task + +Use `DeleteOngoingTaskOperation` to delete a CDC Sink task: + +```csharp +await store.Maintenance.SendAsync( + new DeleteOngoingTaskOperation(taskId, OngoingTaskType.CdcSink)); +``` + + + +Deleting the task in RavenDB does **not** drop the replication slot or publication +in the source database. These must be cleaned up manually by the database administrator. +See [Cleanup and Maintenance](./postgres/cleanup-and-maintenance.mdx). + + + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx new file mode 100644 index 0000000000..5035dfd99d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -0,0 +1,131 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Attachment Handling" +sidebar_label: Attachment Handling +description: "How to store binary SQL columns as RavenDB attachments using AttachmentNameMapping on root and embedded tables." +sidebar_position: 9 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Attachment Handling + + + +* Binary SQL columns can be stored as RavenDB **attachments** instead of document + properties using `AttachmentNameMapping`. + +* This applies to both root tables and embedded tables. + +* In this page: + * [Root Table Attachments](#root-table-attachments) + * [Embedded Table Attachments](#embedded-table-attachments) + * [Attachment Naming](#attachment-naming) + * [Attachment Lifecycle](#attachment-lifecycle) + + + +--- + +## Root Table Attachments + +Use `AttachmentNameMapping` to map a binary SQL column to a RavenDB attachment: + +```csharp +new CdcSinkTableConfig +{ + Name = "Files", + SourceTableName = "files", + PrimaryKeyColumns = ["id"], + ColumnsMapping = new Dictionary + { + { "id", "Id" }, + { "filename", "Filename" }, + { "mime_type","MimeType" } + }, + AttachmentNameMapping = new Dictionary + { + { "content", "file" } // SQL column "content" → attachment named "file" + } +} +``` + +The binary `content` column is stored as an attachment named `"file"` on the document. +The attachment is stored with content type `application/octet-stream`. + +--- + +## Embedded Table Attachments + +Binary columns on embedded tables are stored as attachments on the **parent** document. +The attachment name is automatically prefixed to ensure uniqueness: + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "photos", + PropertyName = "Photos", + PrimaryKeyColumns = ["photo_num"], + JoinColumns = ["product_id"], + ColumnsMapping = new Dictionary + { + { "photo_num", "PhotoNum" }, + { "caption", "Caption" } + }, + AttachmentNameMapping = new Dictionary + { + { "thumbnail", "thumb" } + } +} +``` + +A photo with `photo_num = 1` creates an attachment named `"Photos/1/thumb"` on the +parent document. The prefix `"Photos/1/"` is generated from the `PropertyName` and +the primary key value. + +--- + +## Attachment Naming + +**Root table attachments:** + +The attachment name is exactly the value you specify in `AttachmentNameMapping`. + +``` +AttachmentNameMapping = { ["content"] = "file" } +→ Attachment name: "file" +``` + +**Embedded table attachments:** + +The attachment name is prefixed with `{PropertyName}/{pkValue}/`: + +``` +PropertyName = "Photos" +PrimaryKeyColumns = ["photo_num"] → photo_num = 1 +AttachmentNameMapping = { { "thumbnail", "thumb" } } +→ Attachment name: "Photos/1/thumb" +``` + +For composite primary keys, all key values are joined: + +``` +PrimaryKeyColumns = ["date", "seq"] → date='2024-01', seq=3 +→ Attachment name: "Photos/2024-01/3/thumb" +``` + +--- + +## Attachment Lifecycle + +* **INSERT** — Attachment is created on the document +* **UPDATE** — Attachment is replaced with the new binary data +* **DELETE (embedded item)** — All attachments for that item are automatically removed + from the parent document +* **DELETE (root document)** — Document and all its attachments are deleted + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx new file mode 100644 index 0000000000..d0b3d5b2b4 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -0,0 +1,162 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Column Mapping" +sidebar_label: Column Mapping +description: "How to map SQL columns to RavenDB document properties, handle unmapped columns, store binary columns as attachments, and specify source table schemas." +sidebar_position: 5 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Column Mapping + + + +* Column mapping controls which SQL columns appear in the RavenDB document and + under what property names. + +* In this page: + * [Mapping Columns to Properties](#mapping-columns-to-properties) + * [Unmapped Columns](#unmapped-columns) + * [Attachment Mapping](#attachment-mapping) + * [Schema (Source Table Schema)](#schema-source-table-schema) + + + +--- + +## Mapping Columns to Properties + +`ColumnsMapping` is a `Dictionary` where each entry maps a SQL column +name to a RavenDB document property name: + +```csharp +ColumnsMapping = new Dictionary +{ + { "id", "Id" }, + { "customer_name", "CustomerName" }, + { "order_date", "OrderDate" }, + { "total_amount", "TotalAmount" } +} +``` + +**Key:** SQL column name (case-insensitive match against the column names in CDC events) +**Value:** Property name in the RavenDB document + + + +The primary key column(s) do not need to be mapped. When included in `ColumnsMapping`, +they become a regular document property. When omitted, the PK values are still used +to build the document ID — they just won't appear as a named property. + +Including the PK in the mapping is generally useful so the document carries its own +identifier, but it is not required. + + + +**Type conversions:** SQL numeric, boolean, and date types are converted to their +JSON equivalents. SQL `NULL` becomes JSON `null`. If you need custom type handling +or derived values, use a `Patch` script. + +**At least one mapping is required.** An empty `ColumnsMapping` is a validation error. + +The same rules apply to embedded table column mappings. + +--- + +## Unmapped Columns + +Columns not listed in `ColumnsMapping` are **not stored** in the document, but they +are available in patch scripts via `$row`. + +This allows you to use data for computations without permanently storing raw SQL values: + +```csharp +ColumnsMapping = new Dictionary +{ + { "id", "Id" }, + { "name", "Name" } + // base_price and tax_rate are NOT mapped — won't appear in document +}, +Patch = "this.FinalPrice = $row.base_price * (1 + $row.tax_rate);" +``` + +In this example, `base_price` and `tax_rate` are available during the patch but +not stored as document properties. Only the computed `FinalPrice` is stored. + + + +**Naming context:** +Property names (the values in `ColumnsMapping`) become properties on the RavenDB +document — accessible as `this.FinalPrice` inside a patch script. + +Column names (the keys in `ColumnsMapping`, plus any unmapped columns) are accessible +in patch scripts via `$row.base_price` (for the current row's values) and +`$old?.base_price` (for the previous row's values on UPDATE events). + + + +--- + +## Attachment Mapping + +Binary SQL columns (e.g., PostgreSQL `BYTEA`) can be stored as RavenDB attachments +instead of document properties using `AttachmentNameMapping`: + +```csharp +new CdcSinkTableConfig +{ + Name = "Files", + SourceTableName = "files", + PrimaryKeyColumns = ["id"], + ColumnsMapping = { { "id", "Id" }, { "filename", "Filename" } }, + AttachmentNameMapping = new Dictionary + { + { "content", "file" } // SQL column "content" → attachment named "file" + } +} +``` + +The binary column `content` becomes an attachment named `"file"` on the document. + +**Embedded table attachments:** + +Binary columns on embedded tables are also supported. The attachment name is prefixed +with the embedded property path and primary key to ensure uniqueness: + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "photos", + PropertyName = "Photos", + PrimaryKeyColumns = ["photo_num"], + AttachmentNameMapping = { { "thumbnail", "thumb" } } +} +``` + +A photo with `photo_num = 1` creates attachment `"Photos/1/thumb"` on the parent document. +When the embedded item is deleted, its attachments are automatically removed. + +--- + +## Schema (Source Table Schema) + +`SourceTableSchema` specifies the SQL schema containing the table. It defaults to +`"public"` for PostgreSQL if omitted. + +```csharp +new CdcSinkTableConfig +{ + SourceTableSchema = "sales", // Table is in the "sales" schema + SourceTableName = "orders", + // ... +} +``` + +For most PostgreSQL setups using the default `public` schema, this can be omitted. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx new file mode 100644 index 0000000000..3aba831f7b --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -0,0 +1,172 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Configuration Reference" +sidebar_label: Configuration Reference +description: "Complete reference for all CDC Sink configuration classes — CdcSinkConfiguration, CdcSinkTableConfig, CdcSinkEmbeddedTableConfig, CdcSinkLinkedTableConfig, and related types." +sidebar_position: 10 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Configuration Reference + + + +* This page documents all configuration classes used to define a CDC Sink task. + +* In this page: + * [CdcSinkConfiguration](#cdcsinkconfiguration) + * [CdcSinkPostgresSettings](#cdcsinkpostgressettings) + * [CdcSinkTableConfig](#cdcsinktableconfig) + * [CdcSinkEmbeddedTableConfig](#cdcsinkembeddedtableconfig) + * [CdcSinkLinkedTableConfig](#cdcsinklinkedtableconfig) + * [CdcSinkOnDeleteConfig](#cdcsinkondeleteconfig) + * [CdcSinkRelationType](#cdcsinkrelationtype) + + + +--- + +## CdcSinkConfiguration + +The top-level configuration object for a CDC Sink task. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `Name` | `string` | ✓ | Unique task name | +| `ConnectionStringName` | `string` | ✓ | Name of the SQL connection string | +| `Tables` | `List` | ✓ | Root table configurations (at least one required) | +| `Postgres` | `CdcSinkPostgresSettings` | | PostgreSQL-specific settings (slot and publication names) | +| `SkipInitialLoad` | `bool` | | When `true`, skip the initial full-table scan and start streaming CDC changes immediately. Only applies on first startup — has no effect once the initial load has completed. Default: `false` | +| `Disabled` | `bool` | | Pause the task without deleting it. Default: `false` | +| `MentorNode` | `string` | | Preferred cluster node for execution | +| `PinToMentorNode` | `bool` | | Pin the task to the mentor node. Default: `false` | +| `TaskId` | `long` | | Set by the server on creation | + +--- + +## CdcSinkPostgresSettings + +PostgreSQL-specific settings. Assigned to `CdcSinkConfiguration.Postgres`. +Leave `null` for non-PostgreSQL connections. + +| Property | Type | Description | +|----------|------|-------------| +| `SlotName` | `string` | Name of the PostgreSQL logical replication slot. If omitted on creation, a deterministic hash-based name is used. Immutable once set. Max 63 characters, alphanumeric and underscores only. | +| `PublicationName` | `string` | Name of the PostgreSQL publication. Same auto-fill and immutability rules as `SlotName`. | + +Setting these explicitly is useful when: +- A database administrator pre-creates the slot and publication with human-readable names +- Migrating from a previous CDC Sink task and reusing an existing slot +- Running multiple environments (dev/staging/prod) with predictable names + +See [Initial Setup](./postgres/initial-setup.mdx) for details. + + + +For embedded tables where the join columns are not part of the primary key, the +PostgreSQL table must have `REPLICA IDENTITY` configured so that DELETE events include +the join column values. See [REPLICA IDENTITY](./postgres/replica-identity.mdx). + + + +--- + +## CdcSinkTableConfig + +Configures a root table — one SQL table mapped to one RavenDB collection. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `Name` | `string` | ✓ | RavenDB collection name (e.g., `"Orders"`) | +| `SourceTableName` | `string` | ✓ | SQL table name (e.g., `"orders"`) | +| `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | +| `PrimaryKeyColumns` | `List` | ✓ | SQL columns used for document ID generation | +| `ColumnsMapping` | `Dictionary` | ✓ | SQL column → document property | +| `AttachmentNameMapping` | `Dictionary` | | Binary SQL column → attachment name | +| `Patch` | `string` | | JavaScript patch for INSERT and UPDATE | +| `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior. Default: delete document | +| `EmbeddedTables` | `List` | | Nested table configurations | +| `LinkedTables` | `List` | | Foreign key reference configurations | +| `Disabled` | `bool` | | Skip this table. Default: `false` | + +--- + +## CdcSinkEmbeddedTableConfig + +Configures a table whose rows are embedded as nested objects within a parent document. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `SourceTableName` | `string` | ✓ | SQL table name | +| `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | +| `PropertyName` | `string` | ✓ | Property name in the parent document (e.g., `"Lines"`) | +| `Type` | `CdcSinkRelationType` | ✓ | `Array`, `Map`, or `Value` | +| `JoinColumns` | `List` | ✓ | FK columns referencing parent's `PrimaryKeyColumns` | +| `PrimaryKeyColumns` | `List` | ✓ | PK columns for matching items on UPDATE/DELETE | +| `ColumnsMapping` | `Dictionary` | ✓ | SQL column → embedded property | +| `AttachmentNameMapping` | `Dictionary` | | Binary SQL column → attachment name | +| `Patch` | `string` | | JavaScript patch on **parent** document for INSERT/UPDATE | +| `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior for embedded items | +| `CaseSensitiveKeys` | `bool` | | Case-sensitive PK matching. Default: `false` | +| `EmbeddedTables` | `List` | | Nested embedded tables | +| `Disabled` | `bool` | | Skip this table. Default: `false` | + +--- + +## CdcSinkLinkedTableConfig + +Configures a foreign key reference that becomes a document ID in the parent document. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `SourceTableName` | `string` | ✓ | SQL table name of the referenced table | +| `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | +| `PropertyName` | `string` | ✓ | Property name in the parent document (e.g., `"Customer"`) | +| `LinkedCollectionName` | `string` | ✓ | Target RavenDB collection for ID generation (e.g., `"Customers"`) | +| `Type` | `CdcSinkRelationType` | ✓ | `Value` (single reference) or `Array` (multiple references) | +| `JoinColumns` | `List` | ✓ | FK columns used to build the referenced document ID | + +--- + +## CdcSinkOnDeleteConfig + +Controls how DELETE events are handled for a table or embedded table. + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `Patch` | `string` | null | JavaScript patch that runs when a DELETE event arrives | +| `IgnoreDeletes` | `bool` | `false` | When `true`, skip the delete — document/item is kept | + +**Available patch variables for OnDelete:** + +* `this` — the document (root or parent for embedded) +* `$row` — all SQL columns from the DELETE event +* `$old` — the embedded item's last known state (for embedded tables) + +**Behavior matrix:** + +| IgnoreDeletes | Patch | Result | +|---------------|-------|--------| +| `false` | null | Normal delete (default) | +| `false` | set | Patch runs, then delete proceeds | +| `true` | null | DELETE discarded silently | +| `true` | set | Patch runs, delete skipped | + +--- + +## CdcSinkRelationType + +Specifies the structure of embedded or linked data in the document. + +| Value | Description | +|-------|-------------| +| `Array` | One-to-many: stored as a JSON array. Items matched by PK for UPDATE/DELETE | +| `Map` | One-to-many: stored as a JSON object keyed by PK value(s) | +| `Value` | Many-to-one: stored as a single embedded object or document reference | + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx b/docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx new file mode 100644 index 0000000000..ab2ea379f0 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx @@ -0,0 +1,252 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Delete Strategies" +sidebar_label: Delete Strategies +description: "Configurable behavior for DELETE events in CDC Sink — archive, audit trail, silent ignore, and embedded table delete patterns." +sidebar_position: 7 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Delete Strategies + + + +* CDC Sink provides configurable behavior for DELETE events through the + `CdcSinkOnDeleteConfig` object on both root and embedded table configurations. + +* By default, a DELETE event deletes the corresponding RavenDB document or removes + the embedded item. The `OnDelete` configuration changes this. + +* In this page: + * [Default Behavior](#default-behavior) + * [OnDelete Configuration](#ondelete-configuration) + * [Pattern: Archive](#pattern-archive) + * [Pattern: Audit Trail](#pattern-audit-trail) + * [Pattern: Silent Ignore](#pattern-silent-ignore) + * [OnDelete for Embedded Tables](#ondelete-for-embedded-tables) + * [Behavior Summary](#behavior-summary) + * [DELETE Routing and REPLICA IDENTITY](#delete-routing-and-replica-identity) + + + +--- + +## Default Behavior + +When `OnDelete` is `null` (or not set): + +* **Root table DELETE** — The corresponding RavenDB document is deleted +* **Embedded table DELETE (Array/Map)** — The item is removed from the parent document's array or map +* **Embedded table DELETE (Value)** — The `Value`-type property on the parent document is set to `null` + +--- + +## OnDelete Configuration + +`CdcSinkOnDeleteConfig` has two properties: + +```csharp +public class CdcSinkOnDeleteConfig +{ + // JavaScript patch that runs when a DELETE event arrives + // Available variables: this, $row, $old + public string Patch { get; set; } + + // When true, the delete is not applied — document/item is kept + public bool IgnoreDeletes { get; set; } +} +``` + +* If `Patch` is set, it runs **before** the delete decision is made +* If `IgnoreDeletes = true`, the deletion is skipped after the patch runs +* If `IgnoreDeletes = false` (default), the deletion proceeds after the patch runs + + + +When `IgnoreDeletes = true`, CDC Sink skips the delete but still **saves any changes +made to `this`** in the patch. This means a patch can modify the document (e.g., +set an `Archived` flag) and those changes are written to RavenDB even though the +document is not deleted. + + + +--- + +## Pattern: Archive + +Keep the document in RavenDB but mark it as deleted. The patch runs to mark it, +and `IgnoreDeletes = true` prevents the actual deletion: + +```csharp +OnDelete = new CdcSinkOnDeleteConfig +{ + IgnoreDeletes = true, + Patch = @" + this.Archived = true; + this.ArchivedAt = new Date().toISOString(); + " +} +``` + +The document remains in RavenDB with `Archived = true`. Queries can filter on this +field to exclude archived records. + +--- + +## Pattern: Audit Trail (Root Document) + +When a root document is deleted, write an audit record to a separate RavenDB +collection using `put()` before the deletion proceeds. + +The document is deleted after the patch runs, so any writes to `this` are lost. +Use `put()` to create a document that survives the deletion: + +```csharp +OnDelete = new CdcSinkOnDeleteConfig +{ + // IgnoreDeletes defaults to false — delete proceeds after patch + Patch = @" + // Create a permanent audit record in a separate collection + put('DeletedOrders/' + this.Id, { + OriginalId: this.Id, + Customer: this.Customer, + Total: this.Total, + DeletedAt: new Date().toISOString() + }); + " +} +``` + +The patch creates a document in `DeletedOrders/` before `this` is deleted. +The audit record persists permanently. + +--- + +## Pattern: Silent Ignore + +Discard DELETE events without running any patch. Use this for append-only data +where deletes should never result in document removal: + +```csharp +OnDelete = new CdcSinkOnDeleteConfig +{ + IgnoreDeletes = true + // No patch — DELETE events are silently discarded +} +``` + +--- + +## OnDelete for Embedded Tables + +The same `OnDelete` configuration works on embedded tables. For embedded tables: + +* `Patch` runs on the **parent document** (not the embedded item) +* `$old` contains the embedded item's last known state before deletion +* `IgnoreDeletes = true` prevents the item from being removed from the array/map + +**Example: Keep deleted items in an audit array** + +Rather than removing a deleted line item from the array, move it to a separate +`DeletedLines` property: + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "order_lines", + PropertyName = "Lines", + // ... + OnDelete = new CdcSinkOnDeleteConfig + { + IgnoreDeletes = true, + Patch = @" + // Remove from active Lines + this.Lines = (this.Lines || []) + .filter(l => l.LineId !== $old?.LineId); + + // Add to DeletedLines audit array + this.DeletedLines = this.DeletedLines || []; + this.DeletedLines.push({ + LineId: $old?.LineId, + Product: $old?.Product, + Quantity: $old?.Quantity, + DeletedAt: new Date().toISOString() + }); + " + } +} +``` + +With `IgnoreDeletes = true`, CDC Sink does not automatically remove the item — +the patch takes full control of both the `Lines` array and the `DeletedLines` audit trail. + +**Example: Run a patch but still remove the item** + +To run some logic on DELETE while still removing the item from the array, +use `IgnoreDeletes = false` (default) — CDC Sink handles the removal, and the patch +runs before it: + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "order_lines", + PropertyName = "Lines", + // ... + OnDelete = new CdcSinkOnDeleteConfig + { + // IgnoreDeletes = false (default) — CDC Sink removes the item after patch + Patch = @" + // Log total for the line being removed + this.RemovedTotal = (this.RemovedTotal || 0) + + ($old?.UnitPrice || 0) * ($old?.Qty || 0); + " + } +} +``` + +The `Lines` item is removed by CDC Sink. The patch only needs to handle the +side-effect logic. + +--- + +## Behavior Summary + +| IgnoreDeletes | Patch | Behavior | +|---------------|-------|----------| +| `false` | null | Normal delete (default) | +| `false` | set | Patch runs, then delete proceeds | +| `true` | null | DELETE event discarded silently | +| `true` | set | Patch runs, then delete is skipped | + +--- + +## DELETE Routing and REPLICA IDENTITY + +For CDC Sink to route a DELETE event to the correct document or embedded item, +the source database must include the necessary column values in the DELETE event. + +For embedded tables where the join column is not in the primary key, the source +database may need additional configuration. + + + +The REPLICA IDENTITY requirement described below is **PostgreSQL-specific**. +Other databases may have different requirements or may not need any extra +configuration for DELETE routing. + + + +See: + +* [REPLICA IDENTITY](./postgres/replica-identity.mdx) (PostgreSQL) + +**Skipping REPLICA IDENTITY requirements:** Set `OnDelete.IgnoreDeletes = true` +to discard DELETE events for an embedded table entirely. This skips the REPLICA +IDENTITY check, since delete routing is no longer needed. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx new file mode 100644 index 0000000000..6f4c5be7ff --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx @@ -0,0 +1,235 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Embedded Tables" +sidebar_label: Embedded Tables +description: "Configuration options, nesting constraints, and how embedded items are updated and deleted in CDC Sink." +sidebar_position: 3 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Embedded Tables + + + +* Embedded tables allow CDC Sink to nest SQL table data inside a parent document as + arrays, maps, or single objects. + +* This page covers configuration options, nesting constraints, and how embedded items + are updated and deleted. + +* In this page: + * [Basic Configuration](#basic-configuration) + * [Join Columns and Primary Key Interaction](#join-columns-and-primary-key-interaction) + * [Matching Items on Update and Delete](#matching-items-on-update-and-delete) + * [Deep Nesting](#deep-nesting) + * [Attachments on Embedded Items](#attachments-on-embedded-items) + * [Disabling an Embedded Table](#disabling-an-embedded-table) + + + +--- + +## Basic Configuration + +`CdcSinkEmbeddedTableConfig` is placed inside a root table's `EmbeddedTables` list: + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableSchema = "public", // SQL schema (optional) + SourceTableName = "order_lines", // SQL table name + PropertyName = "Lines", // Property name in parent document + Type = CdcSinkRelationType.Array, // Array, Map, or Value + JoinColumns = ["order_id"], // FK to parent's PrimaryKeyColumns + PrimaryKeyColumns = ["line_id"], // Used to match items on UPDATE/DELETE + ColumnsMapping = new Dictionary + { + { "line_id", "LineId" }, + { "product", "Product" }, + { "quantity", "Quantity" } + } +} +``` + +--- + +## Join Columns and Primary Key Interaction + +### Purpose of JoinColumns + +`JoinColumns` specifies the foreign key columns in the embedded table that reference +the parent's primary key. CDC Sink uses these columns to route each row to the correct +parent document. + +The `JoinColumns` values must exactly match the parent's `PrimaryKeyColumns`: + +```csharp +// Parent root table: +PrimaryKeyColumns = ["order_id"] + +// Embedded table - correct: +JoinColumns = ["order_id"] // References parent's PK column + +// Embedded table - INCORRECT: +JoinColumns = ["customer_id"] // Does not reference the parent's PK +``` + +### DELETE Events + +For DELETE events, the source database must include the join column values so CDC Sink +can route the delete to the correct parent document. + +By default, many databases only include primary key columns in DELETE events. If the +join column is _not_ in the embedded table's primary key, additional source database +configuration is required. + +**PostgreSQL:** See [REPLICA IDENTITY](./postgres/replica-identity.mdx) +for how CDC Sink handles this automatically when it has sufficient permissions, or how +a DBA can configure it manually. + +### Avoiding REPLICA IDENTITY Requirements + +The cleanest solution is to include the join column in the embedded table's primary key: + +```sql +-- SQL schema: +CREATE TABLE order_lines ( + order_id INT NOT NULL REFERENCES orders(id), + line_id INT NOT NULL, + product VARCHAR(200), + PRIMARY KEY (order_id, line_id) -- order_id in PK means DELETE events include it +); +``` + +```csharp +// Configuration: +PrimaryKeyColumns = ["order_id", "line_id"] +JoinColumns = ["order_id"] +``` + +With `order_id` in the primary key, DELETE events include it by default and no additional +source database configuration is needed. + +Alternatively, set `OnDelete.IgnoreDeletes = true` to skip delete routing entirely +if deletes on that embedded table don't need to be processed. +See [Delete Strategies](./delete-strategies.mdx). + +--- + +## Matching Items on Update and Delete + +When an UPDATE or DELETE arrives for an embedded row, CDC Sink must find the correct +item within the parent document's array or map. + +Items are matched by their full `PrimaryKeyColumns` composite key: + +* **INSERT** — New item appended to array / added to map +* **UPDATE** — Item found by PK match; mapped columns overwritten +* **DELETE** — Item found by PK match; removed from array/map (or OnDelete behavior applied) + +**Composite PKs** work the same way — all PK columns must match: + +```csharp +PrimaryKeyColumns = ["invoice_date", "invoice_seq"] + +// UPDATE event for (invoice_date='2024-01-15', invoice_seq=3) +// → Finds and updates the item where both columns match +``` + +**Case sensitivity:** By default, PK matching is case-insensitive. Set +`CaseSensitiveKeys = true` on `CdcSinkEmbeddedTableConfig` if your keys are +case-sensitive. + +--- + +## Deep Nesting + +Embedded tables can contain their own `EmbeddedTables`, creating hierarchies +with multiple levels. + +**Key constraint:** Every descendant table must carry the **root table's primary key** +as a denormalized column. This is required because CDC Sink routes each row to its +root document in a single pass. + +**Example: Company → Departments → Employees** + +The `employees` table must have `company_id` (the root PK) even though it only +directly joins to `departments`: + +```sql +CREATE TABLE employees ( + company_id INT NOT NULL, -- Denormalized root PK + dept_id INT NOT NULL, + emp_id INT NOT NULL, + PRIMARY KEY (company_id, dept_id, emp_id) +); +``` + +```csharp +// Configuration for the employees embedded table: +JoinColumns = ["company_id", "dept_id"] // Root PK + parent PK +PrimaryKeyColumns = ["emp_id"] +``` + +**Why is the root PK required?** + +When a CDC Sink event arrives for an `employees` row, CDC Sink needs to: + +1. Find the root document: `Companies/{company_id}` +2. Navigate to the correct `Departments` array item: `Departments.find(d => d.dept_id == dept_id)` +3. Add or update the `Employees` array item: `Employees.find(e => e.emp_id == emp_id)` + +Without `company_id` in the event, CDC Sink cannot identify the root document +without an additional lookup, which is not supported. + +--- + +## Attachments on Embedded Items + +Binary columns from embedded tables can be stored as RavenDB attachments using +`AttachmentNameMapping`. + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "photos", + PropertyName = "Photos", + PrimaryKeyColumns = ["photo_num"], + JoinColumns = ["product_id"], + ColumnsMapping = { { "photo_num", "PhotoNum" }, { "caption", "Caption" } }, + AttachmentNameMapping = { { "thumbnail", "thumb" } } +} +``` + +A photo with `photo_num = 1` creates an attachment named `"Photos/1/thumb"` on the +parent document. The attachment name is prefixed with the embedded path and PK to +ensure uniqueness within the document. + +When the embedded item is deleted, its attachments are automatically removed. + +--- + +## Disabling an Embedded Table + +Set `Disabled = true` to pause processing for a specific embedded table without +removing it from the configuration: + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "audit_log", + PropertyName = "AuditLog", + Disabled = true, + // ... other config +} +``` + +Changes from the source table are ignored while `Disabled = true`. When re-enabled, +CDC Sink resumes from the current position — it does not backfill missed events. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx b/docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx new file mode 100644 index 0000000000..7619f96a7d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx @@ -0,0 +1,146 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Failover and Consistency" +sidebar_label: Failover and Consistency +description: "How CDC Sink handles node failover, what consistency guarantees it provides, and why patches must be idempotent." +sidebar_position: 14 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Failover and Consistency + + + +* This page explains how CDC Sink handles node failover, what consistency guarantees + it provides, and why patches must be designed to be idempotent. + +* In this page: + * [State Storage](#state-storage) + * [Failover Behavior](#failover-behavior) + * [Re-Reading Changes](#re-reading-changes) + * [Consistency Guarantee](#consistency-guarantee) + * [Idempotency Requirements](#idempotency-requirements) + + + +--- + +## State Storage + +CDC Sink persists its progress as a **document in RavenDB** alongside your data. + +This state document records: + +* The last acknowledged position in the source database's change log +* Per-table initial load progress (which tables have completed, and the last key scanned) + +Like any RavenDB document, this state document is subject to **normal replication behavior**. +Different nodes in a cluster may have different versions of this document at any given moment. + +--- + +## Failover Behavior + +When the cluster elects a new mentor node for a CDC Sink task, the new node reads +the **replicated state** — which may be older than the work the previous mentor +completed but had not yet replicated. + +**Example:** + +``` +Before failure: +├─ Node A (Mentor) — Processed up to position X, state document not yet replicated +├─ Node B (Replica) — State document shows position X-100 (older, replicated state) +└─ Node C (Replica) — State document shows position X-100 + +Node A crashes. +Node B is elected new mentor. +Node B reads its state document: position X-100. +Node B resumes CDC Sink from position X-100. +``` + +Changes between positions X-100 and X may already be in RavenDB (written by Node A), +or they may not be (if Node A crashed before writing). Node B will re-read and re-apply +them either way. + +--- + +## Re-Reading Changes + +**CDC Sink guarantees that no changes are lost, but some changes may be re-read.** + +When the new mentor resumes from an older position: + +* Changes that were already processed and fully replicated — re-applied (idempotent merge) +* Changes that were processed but not yet replicated — applied for the "first time" on new node +* New changes not yet seen — applied normally + +Re-reading is normal and expected. The document merge strategy ensures that re-applying +an INSERT or UPDATE with the same values is safe — properties are simply overwritten +with the same values they already have. + + + +Patches that are not idempotent can produce incorrect results when a change is +re-read after a failover. See [Idempotency Requirements](#idempotency-requirements) below. + + + +--- + +## Consistency Guarantee + +CDC Sink provides **at-least-once delivery** with **eventual consistency**: + +* No changes are lost +* Changes are applied in source transaction order +* After failover, progress resumes from the last replicated state +* Idempotent patches ensure re-reads are safe +* No "exactly once" guarantee — the same change may be applied more than once + +This is the same model used by other ongoing tasks in RavenDB. + +--- + +## Idempotency Requirements + +A patch is **idempotent** if applying it multiple times produces the same result as +applying it once. + +**Column mapping is always idempotent** — overwriting a property with the same value +is a no-op. + +**Patches that use absolute values are idempotent:** + +```javascript +// Idempotent — always sets to the current SQL value +this.Status = $row.is_active ? 'Active' : 'Inactive'; +this.ViewCount = $row.view_count; +``` + +**Patches that increment are NOT idempotent:** + +```javascript +// NOT idempotent — increments again on re-read +this.ViewCount = (this.ViewCount || 0) + 1; +``` + +**Delta patches using $old are idempotent** because `$old` reflects the previous +embedded item state, not the document's accumulated value: + +```javascript +// Idempotent — delta is recomputed correctly on re-read +const oldAmount = $old?.Amount || 0; +const newAmount = $row.amount || 0; +this.RunningTotal = (this.RunningTotal || 0) + (newAmount - oldAmount); +``` + +On re-read, `$old` still reflects the state before the update, and `$row` still +reflects the new state — so the delta is the same as on the first read. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx new file mode 100644 index 0000000000..ae3f4b3a7a --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx @@ -0,0 +1,206 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: How It Works" +sidebar_label: How It Works +description: "Describes the internal operation of CDC Sink — connection, initial load, change streaming, transaction ordering, state persistence, and failover." +sidebar_position: 1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: How It Works + + + +* This page describes the internal operation of CDC Sink — how it connects to the + source database, loads initial data, streams changes, and handles failover. + +* Understanding these mechanics is important when designing patches, planning for + failover, and setting up monitoring. + +* In this page: + * [Startup and Verification](#startup-and-verification) + * [Initial Load](#initial-load) + * [Change Streaming](#change-streaming) + * [Transaction Ordering](#transaction-ordering) + * [State Persistence and Failover](#state-persistence-and-failover) + * [Child Before Parent](#child-before-parent) + + + +--- + +## Startup and Verification + +When a CDC Sink task starts, it first verifies that the source database is properly +configured. For PostgreSQL, this includes checking: + +* WAL level is set to `logical` +* The connecting user has sufficient privileges +* REPLICA IDENTITY is configured correctly for embedded tables that need delete routing + +If any check fails, CDC Sink reports the exact issue and the SQL an administrator +needs to run to fix it. The task does not start until all checks pass. + +After verification, CDC Sink creates the necessary replication infrastructure +in the source database (publication and replication slot), then begins the initial load. + +See the [PostgreSQL Prerequisites Checklist](./postgres/prerequisites-checklist.mdx) +for the full list of requirements. + +--- + +## Initial Load + +Before streaming live changes, CDC Sink performs a full scan of every configured table +(root, embedded, and linked) using keyset pagination ordered by primary key. + +**Progress tracking:** Initial load progress is persisted per-table as a document in +RavenDB. If the task is restarted, it resumes from the last processed key rather than +re-scanning the entire table. + +**Batch pipelining:** While one batch is being written to RavenDB, the next batch is +being read from the source database, keeping both systems busy. + +**Ordering:** Tables are scanned in dependency order. Root tables are loaded first, +then embedded tables. This minimises the number of stub documents created (see +[Child Before Parent](#child-before-parent) below). + + + +**CDC is initialized before the initial load begins.** +The replication slot is created and CDC Sink starts capturing changes *before* +the first row is read. This guarantees that no change made during the (potentially +long) initial load phase is lost — all such changes are queued in the replication +stream and applied once the initial load completes. + +For very large databases this means the source database must retain sufficient +change history for the duration of the initial load (e.g., enough disk for WAL +accumulation on PostgreSQL, or a long enough retention window on SQL Server's CDC +tables). Plan accordingly before starting the initial load on large tables. + + + + + +**New rows added mid-load become stub documents.** +If a new row is inserted into the source during the initial load — and CDC Sink has +not yet reached that key via keyset pagination — it will appear in the replication +stream before the initial load scans past it. CDC Sink applies the CDC event first, +creating a full document. When the initial load later reaches that key, it merges +the scanned row onto the document. No data is lost or duplicated. + +If the CDC event arrives *after* the initial load has scanned past the key, the +document is written by the initial load and then the CDC event is applied on top. + + + +--- + +## Change Streaming + +After the initial load completes, CDC Sink opens a streaming connection to the +source database and begins receiving changes in real time. + + + +The starting position for CDC streaming is the position captured **before** the +initial load began — not after. This is what ensures changes made during the +initial load are not missed. + + + +Changes arrive grouped by source database transaction, preserving the exact order +of operations. A transaction is only applied to RavenDB after it is fully committed +in the source database — partial transactions are never written. + +**Document merging:** When an UPDATE arrives, CDC Sink merges the new column values +onto the existing RavenDB document. Properties that are not part of the column mapping +are preserved. This allows RavenDB-side annotations and computed fields to coexist with +CDC-managed properties. + +See [Property Retention](./property-retention.mdx) for details. + +--- + +## Transaction Ordering + +CDC Sink preserves the full order of operations within a source database transaction. +If a single transaction performs multiple operations on the same row, all operations +are applied in order. + +**Example:** A source transaction that does: + +```sql +BEGIN; +INSERT INTO items (id, name) VALUES (1, 'Alpha'); +UPDATE items SET name = 'Beta' WHERE id = 1; +DELETE FROM items WHERE id = 1; +INSERT INTO items (id, name) VALUES (1, 'Gamma'); +UPDATE items SET name = 'Delta' WHERE id = 1; +COMMIT; +``` + +CDC Sink applies all five operations in order. The final document state is `name = 'Delta'`. + +Multiple documents modified in the same transaction are also applied atomically within +a single RavenDB batch. + +--- + +## State Persistence and Failover + +### State Storage + +CDC Sink persists its progress as a **document in RavenDB**, alongside your data. +This document records: + +* The last acknowledged position in the source database's change log (LSN for PostgreSQL) +* Per-table initial load progress (which tables completed, and the last key scanned) + +Like any RavenDB document, this state document is subject to normal replication behavior. +Different nodes in a cluster may have different versions of this document at any point in time. + +### Failover Behavior + +When the cluster elects a new mentor node for the CDC Sink task, the new node reads +the **replicated** state document — which may be older than the work the previous +mentor had completed but not yet replicated. + +The new mentor resumes from that replicated state. This means: + +* **No data is lost** — CDC Sink resumes from a known position and the source database + retains all changes from that position onward +* **Some changes may be re-read** — Changes between the replicated state and the + previous mentor's actual progress will be processed again + +Re-reading is normal and expected. The document merge strategy means that re-applying +the same INSERT or UPDATE is safe — column values are simply overwritten with the same values. + + + +Patches that are not idempotent can produce incorrect results if the same change is +re-read after a failover. Design patches to handle re-processing safely. +See [Patching](./patching.mdx) for guidance. + + + +--- + +## Child Before Parent + +If an embedded row arrives before its parent row exists in RavenDB — which can happen +during initial load when tables are scanned in parallel, or due to relaxed foreign key +constraints in the source database — CDC Sink creates a **stub document** containing +only the embedded data. + +When the parent row arrives later, its columns are merged onto the stub document. +The final document contains both the parent fields and all embedded items that arrived earlier. + +This ensures no data is lost regardless of the order in which rows are processed. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx new file mode 100644 index 0000000000..1fa55647b5 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx @@ -0,0 +1,141 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Linked Tables" +sidebar_label: Linked Tables +description: "How to create document ID references from foreign keys using linked table configuration in CDC Sink." +sidebar_position: 4 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Linked Tables + + + +* A **linked table** creates a document ID reference in the parent document rather + than embedding data. A foreign key value becomes a RavenDB document ID pointing + to a related collection. + +* In this page: + * [Basic Configuration](#basic-configuration) + * [Composite Foreign Keys](#composite-foreign-keys) + * [Array References](#array-references) + * [Linked vs Embedded](#linked-vs-embedded) + + + +--- + +## Basic Configuration + +`CdcSinkLinkedTableConfig` is placed inside a root table's `LinkedTables` list: + +```csharp +new CdcSinkTableConfig +{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = ["id"], + ColumnsMapping = { { "id", "Id" }, { "customer_id", "CustomerId" } }, + LinkedTables = + [ + new CdcSinkLinkedTableConfig + { + SourceTableName = "customers", + PropertyName = "Customer", // Property name in document + LinkedCollectionName = "Customers", // Target collection for ID + Type = CdcSinkRelationType.Value, // Single reference + JoinColumns = ["customer_id"] // FK used to build the document ID + } + ] +} +``` + +With `customer_id = 42`, the Orders document gets: + +```json +{ + "Id": 1, + "CustomerId": 42, + "Customer": "Customers/42" +} +``` + +The `Customer` property is a RavenDB document ID. Load the referenced document using +[includes](../../../client-api/session/loading-entities#load-with-includes) to avoid +a second network call. + +--- + +## Composite Foreign Keys + +When the target table has a composite primary key, the linked reference includes +all parts of that key: + +```csharp +new CdcSinkLinkedTableConfig +{ + SourceTableName = "customers", + PropertyName = "Customer", + LinkedCollectionName = "Customers", + Type = CdcSinkRelationType.Value, + JoinColumns = ["customer_region", "customer_id"] // Must match Customers PK order +} +``` + +With `customer_region = 'US'` and `customer_id = 42`, the document gets: + +```json +"Customer": "Customers/US/42" +``` + +--- + +## Array References + +Use `Type = CdcSinkRelationType.Array` for one-to-many references, where a parent +row has multiple foreign keys pointing to the same collection: + +```csharp +new CdcSinkLinkedTableConfig +{ + SourceTableName = "tags", + PropertyName = "Tags", + LinkedCollectionName = "Tags", + Type = CdcSinkRelationType.Array, + JoinColumns = ["tag_id"] +} +``` + +This creates an array of document references: + +```json +{ + "Tags": ["Tags/primary", "Tags/urgent", "Tags/follow-up"] +} +``` + +--- + +## Linked vs Embedded + +| Consideration | Embedded | Linked | +|--------------|----------|--------| +| Data stored | Full nested object/array inside document | Document ID reference only | +| Load pattern | Single document load | Load parent + include references | +| Document size | Grows with embedded items | Parent document stays small | +| Updates to referenced data | Reflected via CDC | Reflected via CDC on the referenced collection | +| Independence | Child has no meaning without parent | Referenced entity exists independently | +| Typical use | Orders own LineItems | Orders reference Customers | + +**Choose embedded** when the child data belongs to the parent and is always read +together with it. + +**Choose linked** when the referenced entity is independently meaningful and shared +across many parents, and you want to avoid duplicating data. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx new file mode 100644 index 0000000000..e43d6a4fea --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx @@ -0,0 +1,99 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Monitoring" +sidebar_label: Monitoring +description: "How to monitor CDC Sink task state, fallback mode, runtime statistics, and notifications in RavenDB Management Studio and the Client API." +sidebar_position: 13 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Monitoring + + + +* This page explains how to monitor a CDC Sink task — its running state, progress, + fallback behavior, and statistics available through the Management Studio and API. + +* In this page: + * [Task State](#task-state) + * [Fallback Mode](#fallback-mode) + * [Statistics](#statistics) + * [Notifications](#notifications) + + + +--- + +## Task State + +A CDC Sink task can be in one of the following states: + +| State | Description | +|-------|-------------| +| `Active` | Running normally — streaming changes or waiting for new changes | +| `ActiveByAnotherNode` | Another cluster node is the mentor; this node is a replica | +| `Disabled` | Manually disabled via Studio or API | +| `Error` | The task encountered an error and stopped | +| `FallbackMode` | Connection to the source database was lost; retrying | +| `NotOnThisNode` | This node does not hold the task | + +The current state is visible in the **Ongoing Tasks** view in the Management Studio. + +--- + +## Fallback Mode + +When CDC Sink cannot reach the source database, it enters **fallback mode** rather +than failing immediately. + +In fallback mode: +* The task continues retrying the connection at regular intervals +* No changes are applied while the connection is down +* The task automatically resumes streaming once the source is reachable again + +The maximum time the task will remain in fallback mode before reporting an error +is controlled by the `CdcSink.MaxFallbackTimeInSec` configuration key. +See [Server Configuration](./server-configuration.mdx). + +--- + +## Statistics + +CDC Sink exposes runtime statistics through the `GetOngoingTaskInfoOperation`: + +```csharp +var taskInfo = await store.Maintenance.SendAsync( + new GetOngoingTaskInfoOperation(taskId, OngoingTaskType.CdcSink)); +``` + +The returned object includes: + +| Field | Description | +|-------|-------------| +| `TaskState` | Current state of the task | +| `MentorNode` | Configured preferred node | +| `ResponsibleNode` | Node currently running the task | +| `Error` | Error message if the task is in error state | + +Detailed per-table statistics — including row counts and last processed position — +are available through the Management Studio's ongoing tasks detail view. + +--- + +## Notifications + +CDC Sink participates in RavenDB's standard alert system. If the task enters an +error state or fallback mode, an alert is raised and visible in: + +* The **Notification Center** in the Management Studio (bell icon) +* The cluster's alert log + +Alerts include the error message and which table or operation triggered the failure, +making it straightforward to diagnose the root cause. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/overview.mdx new file mode 100644 index 0000000000..54c47bc41e --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/overview.mdx @@ -0,0 +1,133 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Overview" +sidebar_label: Overview +description: "An introduction to CDC Sink, a RavenDB ongoing task that reads Change Data Capture streams from relational databases and writes documents into RavenDB." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Overview + + + +* **CDC Sink** is a RavenDB ongoing task that reads **Change Data Capture (CDC)** + streams from a relational database and writes the resulting documents into RavenDB. + +* CDC Sink is the reverse of ETL: instead of pushing data _from_ RavenDB _to_ SQL, + CDC Sink _pulls_ data _from_ SQL _into_ RavenDB. + The relational database is the source of truth; RavenDB receives a continuously-updated + document model derived from it. + +* CDC Sink maps normalized relational tables into rich, nested RavenDB documents — + automatically and in real time. + +* Supported source databases: + * **PostgreSQL** — via logical replication + * Additional source databases planned for future versions + +* In this page: + * [Why Use CDC Sink](#why-use-cdc-sink) + * [How It Works](#how-it-works) + * [Task Lifecycle](#task-lifecycle) + * [Licensing](#licensing) + + + +--- + +## Why Use CDC Sink + +CDC Sink solves the problem of moving data from a relational database into RavenDB +without requiring changes to the source system. + +* **Migrate from SQL to RavenDB** + Transform normalized SQL tables (orders, order_lines, customers) into rich RavenDB + documents where an Order contains embedded LineItems and a reference to the Customer — + automatically and continuously, without changing your SQL application. + +* **Build a read-optimized view** + Your transactional system uses a relational database, but your API layer needs + denormalized documents. CDC Sink creates and maintains those documents without + touching your existing application. + +* **Gradual migration** + Keep your SQL application running while RavenDB documents are built in the background. + Applications can start reading from RavenDB while writes still go to the relational database. + +* **Event-driven side effects** + Using JavaScript patches, every INSERT, UPDATE, or DELETE in the source database can + trigger custom logic in RavenDB — computing derived fields, maintaining running totals, + or writing custom transformations. + +--- + +## How It Works + +A CDC Sink task continuously reads changes from the source relational database and +applies them to RavenDB documents. + +### Initial Load + +When a CDC Sink task starts for the first time, it performs a full scan of every +configured table using keyset pagination. This populates RavenDB with the current +state of the data before streaming begins. + +Initial load progress is persisted per-table. If the task is restarted, it resumes +from where it left off rather than re-scanning. + +### Change Streaming + +After the initial load, CDC Sink switches to streaming changes in real time. +Changes are grouped into transactions, preserving the exact order of operations +from the source database. Partial transactions are never written to RavenDB — +all changes within a source database transaction are applied together. + +### Document Model + +The relational model is mapped to RavenDB documents through configuration: + +* **Root tables** map to RavenDB collections (one document per row) +* **Embedded tables** become nested arrays or objects within parent documents +* **Linked tables** become document ID references + +See [Schema Design](./schema-design.mdx) for details. + +--- + +## Task Lifecycle + +1. **Create** — Define the task in Studio or via the Client API + Specify the connection string, table mappings, and transformation options + +2. **Verify** — CDC Sink verifies the source database is properly configured + Checks permissions, replication prerequisites, and table configuration + +3. **Initial Load** — Full table scan populates RavenDB with current data + Progress is tracked per-table and persists across restarts + +4. **Stream** — Real-time change streaming begins + All INSERTs, UPDATEs, and DELETEs are applied to RavenDB documents as they occur + +5. **Monitor** — View statistics, errors, and progress in Studio + +6. **Retire** — Delete the task in RavenDB when no longer needed + PostgreSQL artifacts (replication slot, publication) must be cleaned up by + the database administrator separately + +--- + +## Licensing + + +CDC Sink is available on an **Enterprise** license. + + +Learn more about licensing [here](../../../start/licensing/licensing-overview). + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx new file mode 100644 index 0000000000..a046f43a94 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -0,0 +1,307 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Patching" +sidebar_label: Patching +description: "JavaScript patches in CDC Sink — available variables, patch scope, aggregation, delta computations, metadata, loading related documents, and idempotency." +sidebar_position: 6 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Patching + + + +* A **patch** is a JavaScript snippet that runs on a document after column mapping is applied. + Patches let you transform data, compute derived fields, maintain aggregates, load related + documents, and control document metadata. + +* Patches are defined per-table (root or embedded) and per-operation (INSERT/UPDATE + or DELETE via `OnDelete.Patch`). + +* In this page: + * [When Patches Run](#when-patches-run) + * [Available Variables](#available-variables) + * [Patch Scope: Root vs Embedded](#patch-scope-root-vs-embedded) + * [Scenario: Column Transformation](#scenario-column-transformation) + * [Scenario: Aggregation with Embedded Tables](#scenario-aggregation-with-embedded-tables) + * [Scenario: Delta Computations](#scenario-delta-computations) + * [Scenario: Computed Derived Fields](#scenario-computed-derived-fields) + * [Scenario: Document Metadata](#scenario-document-metadata) + * [Scenario: Loading Related Documents](#scenario-loading-related-documents) + * [Idempotency and Failover](#idempotency-and-failover) + * [Step Limit](#step-limit) + * [Capabilities and Limitations](#capabilities-and-limitations) + + + +--- + +## When Patches Run + +| Event | Patch property | Runs? | +|-------|---------------|-------| +| INSERT | `Patch` | ✓ After column mapping | +| UPDATE | `Patch` | ✓ After column mapping, on merged document | +| DELETE (root table) | `OnDelete.Patch` | ✓ Before document is deleted | +| DELETE (embedded table) | `OnDelete.Patch` | ✓ On parent document before item is removed | + +The `Patch` property on a table handles INSERT and UPDATE. +The `OnDelete.Patch` on `CdcSinkOnDeleteConfig` handles DELETE separately. + +--- + +## Available Variables + +| Variable | Available | Type | Description | +|----------|-----------|------|-------------| +| `this` | Always | object | The document being modified (root or parent for embedded) | +| `$row` | Always | object | All SQL columns from the CDC event (mapped and unmapped) | +| `$old` | UPDATE only | object \| null | Previous state of the item; null for INSERT | +| `get(id)` | Always | function | Load a RavenDB document by ID | + +--- + +## Patch Scope: Root vs Embedded + +**Root table patches** operate on the root document: + +```javascript +Patch = "this.Status = $row.is_active ? 'Active' : 'Inactive';" +``` + +`this` is the Orders (or whatever root collection) document. + +**Embedded table patches** operate on the **parent** document, not the embedded item: + +```javascript +// Patch on order_lines embedded table +Patch = "this.TotalQuantity = (this.Lines || []).reduce((s, l) => s + l.Quantity, 0);" +``` + +`this` is the parent Orders document. This lets you recompute parent-level aggregates +whenever an embedded item changes. + +--- + +## Scenario: Column Transformation + +Transform or combine SQL columns into RavenDB properties: + +```javascript +this.FullName = ($row.first_name + ' ' + $row.last_name).trim(); +this.Status = $row.is_active ? 'Active' : 'Inactive'; +this.CreatedAt = new Date($row.created_unix * 1000).toISOString(); +``` + +--- + +## Scenario: Aggregation with Embedded Tables + +Recompute parent-level totals whenever an embedded item is added or updated. + + + +If you use a `Patch` to maintain an aggregate, you **must** also provide an +`OnDelete.Patch` that reverses the aggregate when an item is deleted. +Without it, deletes will leave the aggregate in an incorrect state. + + + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "order_lines", + PropertyName = "Lines", + PrimaryKeyColumns = ["line_id"], + JoinColumns = ["order_id"], + ColumnsMapping = { { "line_id", "LineId" }, { "quantity", "Quantity" } }, + + // Runs on INSERT and UPDATE — recomputes total from current Lines array + Patch = @" + this.TotalQuantity = (this.Lines || []) + .reduce((sum, line) => sum + (line.Quantity || 0), 0); + ", + + // REQUIRED: Runs on DELETE — recomputes after item is removed from array + OnDelete = new CdcSinkOnDeleteConfig + { + Patch = @" + this.TotalQuantity = (this.Lines || []) + .reduce((sum, line) => sum + (line.Quantity || 0), 0); + " + } +} +``` + +The OnDelete patch runs after the item has already been removed from the array, +so re-summing `this.Lines` gives the correct post-deletion total. + +--- + +## Scenario: Delta Computations + +Use `$old` to compute a delta when an embedded item changes, keeping a running total +without recomputing from scratch. + + + +Running total patches using `$old` must have a matching `OnDelete.Patch` that subtracts +the deleted item's value. Without it, deletes leave the running total incorrect. + + + +```csharp +new CdcSinkEmbeddedTableConfig +{ + SourceTableName = "invoice_lines", + PropertyName = "Lines", + PrimaryKeyColumns = ["line_id"], + JoinColumns = ["invoice_id"], + ColumnsMapping = { { "line_id", "LineId" }, { "amount", "Amount" } }, + + // INSERT: $old is null, so delta = new amount (0 → new) + // UPDATE: $old has previous Amount, delta = new - old + Patch = @" + const oldAmount = $old?.Amount || 0; + const newAmount = $row.amount || 0; + this.RunningTotal = (this.RunningTotal || 0) + (newAmount - oldAmount); + ", + + // REQUIRED: Subtract the deleted item's amount using $old + OnDelete = new CdcSinkOnDeleteConfig + { + Patch = @" + const deletedAmount = $old?.Amount || 0; + this.RunningTotal = (this.RunningTotal || 0) - deletedAmount; + " + } +} +``` + +The `$old` variable in the OnDelete patch contains the item's last known state before deletion. + +--- + +## Scenario: Computed Derived Fields + +Compute fields from unmapped columns that you don't want to store directly: + +```javascript +// base_price and tax_rate are NOT in ColumnsMapping +this.FinalPrice = $row.base_price * (1 + $row.tax_rate); +this.Discount = $row.is_vip ? $row.base_price * 0.1 : 0; +``` + +--- + +## Scenario: Document Metadata + +Patches can set RavenDB document metadata, including expiration: + +```javascript +this['@metadata'] = this['@metadata'] || {}; + +if ($row.expires_at) { + this['@metadata']['@expires'] = new Date($row.expires_at).toISOString(); +} + +this['@metadata']['SourceTable'] = 'orders'; +``` + +--- + +## Scenario: Loading Related Documents + +Use `get()` to load a related RavenDB document and denormalize its data: + +```javascript +const customer = get('Customers/' + $row.customer_id); + +if (customer) { + this.CustomerName = customer.Name; + this.CustomerEmail = customer.Email; + this.CustomerTier = customer.Tier; +} else { + // Document doesn't exist yet — race condition or not yet synced + this.CustomerName = null; + this.CustomerEmail = null; +} +``` + +`get()` returns `null` if the document does not exist or has not yet been created +by CDC Sink (race condition when multiple tables are loading in parallel). +Always check for null before accessing properties. + +**When to use `get()`:** + +* Denormalizing slowly-changing reference data (customer name, category, region) +* Capturing a snapshot of related data at insert time + +**Prefer linked tables** for simple foreign key references — they are cleaner and +do not have the null-handling complexity of `get()`. + +--- + +## Idempotency and Failover + +After a failover, CDC Sink resumes from the replicated state, which may be older +than the last work the previous mentor node completed. Some changes may be re-read +and re-applied. + +**Patches that are not idempotent can produce incorrect results when re-applied.** + +```javascript +// NOT idempotent — increments again on re-read +this.ViewCount = (this.ViewCount || 0) + 1; + +// Idempotent — absolute value from SQL source +this.ViewCount = $row.view_count; + +// Idempotent — delta via $old (both first-read and re-read produce same result) +const oldVal = $old?.Amount || 0; +const newVal = $row.amount || 0; +this.RunningTotal = (this.RunningTotal || 0) + (newVal - oldVal); +``` + +Column mapping itself is idempotent — overwriting a property with the same value +is always safe. + +--- + +## Step Limit + +Patch scripts are bounded by a **step quota** rather than a time limit. Each operation +in the script (assignment, loop iteration, function call) consumes steps. + +The limit is controlled by the `Patching.MaxStepsForScript` configuration setting. +See [Patching Configuration](../../../server/configuration/patching-configuration). + +If a patch exceeds the limit, the CDC operation fails and is retried. + +Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nested loops. + +--- + +## Capabilities and Limitations + +**What patches can do:** + +* Access `this`, `$row`, `$old` +* Load related documents with `get()` +* Compute and transform property values +* Set and modify document metadata +* Conditional logic, loops, array methods +* Use built-in JavaScript: `Date`, `Math`, `JSON`, `Array` + +**What patches cannot do:** + +* Make HTTP calls or API requests +* Access the file system +* Load documents from other RavenDB databases + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json b/docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json new file mode 100644 index 0000000000..eb53b0ec2d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json @@ -0,0 +1,4 @@ +{ + "position": 16, + "label": "PostgreSQL" +} diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx new file mode 100644 index 0000000000..8b4d3d1879 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx @@ -0,0 +1,179 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Cleanup and Maintenance" +sidebar_label: Cleanup and Maintenance +description: "How to find and drop orphaned PostgreSQL replication slots and publications after deleting a CDC Sink task or changing task configuration." +sidebar_position: 6 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Cleanup and Maintenance + + + +* When a CDC Sink task is deleted from RavenDB, the associated PostgreSQL objects + — the replication slot and publication — are **not** automatically removed. + +* These must be cleaned up manually by a database administrator. + +* In this page: + * [Why Manual Cleanup Is Required](#why-manual-cleanup-is-required) + * [Finding Orphaned Slots and Publications](#finding-orphaned-slots-and-publications) + * [Dropping a Replication Slot](#dropping-a-replication-slot) + * [Dropping a Publication](#dropping-a-publication) + * [Too Many Replication Slots](#too-many-replication-slots) + * [Configuration Changes That Rename Slots](#configuration-changes-that-rename-slots) + + + +--- + +## Why Manual Cleanup Is Required + +An active replication slot prevents PostgreSQL from discarding WAL segments that +have not yet been consumed. If a slot is not being consumed (because the CDC Sink +task was deleted), PostgreSQL will accumulate WAL on disk indefinitely. + +This can lead to: + +* Disk space exhaustion on the PostgreSQL server +* Degraded performance as old WAL segments pile up + +A database administrator must drop unused replication slots. + + + +**Why RavenDB does not drop slots automatically:** + +There are valid reasons to keep a slot after a task is deleted. For example, you +may want to create a new CDC Sink task that resumes from the same position (by +reusing the existing slot), or you may want to review what changes are pending +before cleaning up. The CDC Sink user may also not have the permissions required +to drop replication slots, even if it had the permissions to create them. + +For these reasons, slot and publication lifecycle management is the responsibility +of the database administrator. + + + +--- + +## Finding Orphaned Slots and Publications + +List all CDC Sink replication slots: + +```sql +SELECT slot_name, active, confirmed_flush_lsn +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +``` + +An `active = false` slot is not being consumed. Compare the list to your active CDC +Sink tasks in RavenDB — any slot whose corresponding task no longer exists is orphaned. + +List all CDC Sink publications: + +```sql +SELECT p.pubname, c.relname AS table_name +FROM pg_publication p +JOIN pg_publication_rel pr ON pr.prpubid = p.oid +JOIN pg_class c ON c.oid = pr.prrelid +WHERE p.pubname LIKE 'rvn_cdc_p_%' +ORDER BY p.pubname, c.relname; +``` + +--- + +## Dropping a Replication Slot + +```sql +SELECT pg_drop_replication_slot('rvn_cdc_s_'); +``` + + + +You cannot drop an active replication slot (one with `active = true`). The CDC Sink +task must be stopped or deleted in RavenDB before the slot can be dropped. + + + +--- + +## Dropping a Publication + +```sql +DROP PUBLICATION IF EXISTS "rvn_cdc_p_"; +``` + +Publications are not consumed like slots — they do not accumulate data or hold WAL +segments. However, they should still be dropped to keep the database clean. + +--- + +## Too Many Replication Slots + +PostgreSQL limits the total number of replication slots to `max_replication_slots`. +If you exceed this limit, no new CDC Sink tasks can start (they will fail with a +connection error). + +Check how many slots are in use: + +```sql +SELECT count(*) FROM pg_replication_slots; +SHOW max_replication_slots; +``` + +To resolve this: + +1. Identify inactive slots: + + ```sql + SELECT slot_name, active + FROM pg_replication_slots + WHERE active = false; + ``` + +2. Drop slots for tasks that no longer exist: + + ```sql + SELECT pg_drop_replication_slot('rvn_cdc_s_'); + ``` + +3. If needed, increase `max_replication_slots` in `postgresql.conf` and restart + PostgreSQL. + +--- + +## Configuration Changes That Rename Slots + +The replication slot name is derived from the task name, database name, and table +names. If you update a CDC Sink task in a way that changes any of these — such as +adding a table, removing a table, or renaming the task — the expected slot and +publication names change. + +What happens: + +* CDC Sink will look for a slot/publication with the new name +* If it has permissions, it will create them +* The old slot and publication are **not deleted** — they become orphaned + +After updating a task configuration that changes table membership: + +1. Let the task restart and create the new slot/publication +2. Identify the old slot (it will be inactive): + + ```sql + SELECT slot_name, active + FROM pg_replication_slots + WHERE slot_name LIKE 'rvn_cdc_s_%' + AND active = false; + ``` + +3. Drop the old slot and publication + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json new file mode 100644 index 0000000000..bef17e433d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json @@ -0,0 +1,4 @@ +{ + "position": 0, + "label": "Examples" +} diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx new file mode 100644 index 0000000000..c93744819c --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx @@ -0,0 +1,205 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Complex Nesting with Linked Tables" +sidebar_label: Complex Nesting +description: "Multi-level embedded table structure combined with linked table references for a product catalog with variants, attributes, and a category reference." +sidebar_position: 3 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Complex Nesting with Linked Tables + + + +* This example shows a multi-level embedded table structure combined with linked + table references, representing a product catalog with variants, attributes, and + a category reference. + +* In this page: + * [Source Schema](#source-schema) + * [REPLICA IDENTITY Setup](#replica-identity-setup) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + + + +--- + +## Source Schema + +```sql +CREATE TABLE categories ( + category_id SERIAL PRIMARY KEY, + name TEXT NOT NULL +); + +CREATE TABLE products ( + product_id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + category_id INT REFERENCES categories(category_id) +); + +CREATE TABLE product_variants ( + variant_id SERIAL PRIMARY KEY, + product_id INT NOT NULL REFERENCES products(product_id), + sku TEXT NOT NULL, + price NUMERIC(10,2) +); + +CREATE TABLE variant_attributes ( + attr_id SERIAL PRIMARY KEY, + product_id INT NOT NULL REFERENCES products(product_id), -- denormalized root PK (required for deep nesting) + variant_id INT NOT NULL REFERENCES product_variants(variant_id), + attr_name TEXT NOT NULL, + attr_value TEXT NOT NULL +); +``` + +--- + +## REPLICA IDENTITY Setup + +Both `product_variants` and `variant_attributes` have surrogate PKs with join +columns that are not part of their primary keys. Rather than `REPLICA IDENTITY FULL` +(which includes all columns), we use targeted unique indexes covering just the join +and PK columns: + +```sql +-- product_variants: join column is product_id, PK is variant_id +CREATE UNIQUE INDEX product_variants_replica_idx + ON product_variants (product_id, variant_id); +ALTER TABLE product_variants + REPLICA IDENTITY USING INDEX product_variants_replica_idx; + +-- variant_attributes: join columns are product_id + variant_id, PK is attr_id +CREATE UNIQUE INDEX variant_attributes_replica_idx + ON variant_attributes (product_id, variant_id, attr_id); +ALTER TABLE variant_attributes + REPLICA IDENTITY USING INDEX variant_attributes_replica_idx; +``` + +See [REPLICA IDENTITY](../replica-identity.mdx) for more details. + +--- + +## Task Configuration + +```csharp +var config = new CdcSinkConfiguration +{ + Name = "ProductCatalogSync", + ConnectionStringName = "MyPostgresConnection", + Tables = + [ + new CdcSinkTableConfig + { + Name = "Products", + SourceTableName = "products", + PrimaryKeyColumns = ["product_id"], + ColumnsMapping = new Dictionary + { + { "product_id", "ProductId" }, + { "name", "Name" } + }, + // Linked table: category_id FK → document ID in Categories collection + LinkedTables = + [ + new CdcSinkLinkedTableConfig + { + SourceTableName = "categories", + PropertyName = "Category", + LinkedCollectionName = "Categories", + Type = CdcSinkRelationType.Value, + JoinColumns = ["category_id"] + } + ], + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "product_variants", + PropertyName = "Variants", + Type = CdcSinkRelationType.Array, + JoinColumns = ["product_id"], + PrimaryKeyColumns = ["variant_id"], + ColumnsMapping = new Dictionary + { + { "variant_id", "VariantId" }, + { "sku", "Sku" }, + { "price", "Price" } + }, + // Deep-nested: attributes within each variant + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "variant_attributes", + PropertyName = "Attributes", + Type = CdcSinkRelationType.Array, + // JoinColumns must include the ROOT PK for deep nesting + JoinColumns = ["product_id", "variant_id"], + PrimaryKeyColumns = ["attr_id"], + ColumnsMapping = new Dictionary + { + { "attr_id", "AttrId" }, + { "attr_name", "Name" }, + { "attr_value", "Value" } + } + } + ] + } + ] + } + ] +}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +``` + +--- + +## Resulting Documents + +```json +{ + "ProductId": 42, + "Name": "Hiking Boot", + "Category": "categories/3", + "Variants": [ + { + "VariantId": 101, + "Sku": "HB-BLK-10", + "Price": 89.99, + "Attributes": [ + { "AttrId": 1, "Name": "Color", "Value": "Black" }, + { "AttrId": 2, "Name": "Size", "Value": "10" } + ] + }, + { + "VariantId": 102, + "Sku": "HB-BRN-11", + "Price": 89.99, + "Attributes": [ + { "AttrId": 3, "Name": "Color", "Value": "Brown" }, + { "AttrId": 4, "Name": "Size", "Value": "11" } + ] + } + ], + "@metadata": { "@collection": "Products" } +} +``` + + + +The `Categories` collection is also synced by CDC Sink (it would be a separate +root table configuration). `"categories/3"` is a standard RavenDB document ID +that enables the use of RavenDB includes when querying products. + + + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx new file mode 100644 index 0000000000..3c87e65335 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx @@ -0,0 +1,174 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Denormalization with Embedded Tables" +sidebar_label: Denormalization +description: "How to use CDC Sink embedded tables to merge normalized SQL tables (orders + order_lines) into denormalized RavenDB documents." +sidebar_position: 1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Denormalization with Embedded Tables + + + +* This example shows how to merge a normalized SQL schema (orders + order_lines) + into denormalized RavenDB documents with embedded arrays. + +* In this page: + * [Source Schema](#source-schema) + * [REPLICA IDENTITY Setup](#replica-identity-setup) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + * [What Happens on Change Events](#what-happens-on-change-events) + + + +--- + +## Source Schema + +```sql +CREATE TABLE orders ( + order_id SERIAL PRIMARY KEY, + customer TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + created_at TIMESTAMPTZ DEFAULT now() +); + +CREATE TABLE order_lines ( + line_id SERIAL PRIMARY KEY, + order_id INT NOT NULL REFERENCES orders(order_id), + product TEXT NOT NULL, + qty INT NOT NULL, + unit_price NUMERIC(10,2) NOT NULL +); +``` + +--- + +## REPLICA IDENTITY Setup + +`order_lines` has a surrogate PK (`line_id`). The join column `order_id` is not +part of the primary key. Without `REPLICA IDENTITY FULL`, DELETE events for +`order_lines` rows would not include `order_id`, and CDC Sink could not find +the parent document. + +```sql +ALTER TABLE order_lines REPLICA IDENTITY FULL; +``` + +See [REPLICA IDENTITY](../replica-identity.mdx). + +--- + +## Task Configuration + +```csharp +var config = new CdcSinkConfiguration +{ + Name = "OrdersSync", + ConnectionStringName = "MyPostgresConnection", + Tables = new List + { + new CdcSinkTableConfig + { + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = new List { "order_id" }, + ColumnsMapping = new Dictionary + { + { "order_id", "OrderId" }, + { "customer", "Customer" }, + { "status", "Status" }, + { "created_at", "CreatedAt" } + }, + EmbeddedTables = new List + { + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "order_lines", + PropertyName = "Lines", + Type = CdcSinkRelationType.Array, + JoinColumns = new List { "order_id" }, + PrimaryKeyColumns = new List { "line_id" }, + ColumnsMapping = new Dictionary + { + { "line_id", "LineId" }, + { "product", "Product" }, + { "qty", "Qty" }, + { "unit_price", "UnitPrice" } + } + } + } + } + } +}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +``` + +--- + +## Resulting Documents + +SQL rows: + +``` +orders: order_id=1, customer='Acme Corp', status='pending' +order_lines: line_id=1, order_id=1, product='Widget A', qty=3, unit_price=9.99 +order_lines: line_id=2, order_id=1, product='Widget B', qty=1, unit_price=24.99 +``` + +RavenDB document `orders/1`: + +```json +{ + "OrderId": 1, + "Customer": "Acme Corp", + "Status": "pending", + "CreatedAt": "2024-06-01T09:00:00+00:00", + "Lines": [ + { + "LineId": 1, + "Product": "Widget A", + "Qty": 3, + "UnitPrice": 9.99 + }, + { + "LineId": 2, + "Product": "Widget B", + "Qty": 1, + "UnitPrice": 24.99 + } + ], + "@metadata": { "@collection": "Orders" } +} +``` + +--- + +## What Happens on Change Events + +**INSERT into `order_lines`:** +A new item is appended to the `Lines` array. + +**UPDATE to `order_lines`:** +CDC Sink finds the item by `line_id` within the `Lines` array and updates its properties. + +**DELETE from `order_lines`:** +CDC Sink finds the item by `line_id` and removes it from the array. +(Requires `REPLICA IDENTITY FULL` as configured above.) + +**UPDATE to `orders`:** +Only the root document properties (`Status`, etc.) are updated. The `Lines` array +is not affected. + +**DELETE from `orders`:** +The entire `orders/1` document is deleted, including all embedded lines. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx new file mode 100644 index 0000000000..d0a3a26ffb --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx @@ -0,0 +1,168 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Event Sourcing with Aggregation" +sidebar_label: Event Sourcing +description: "Using CDC Sink patches to maintain a computed aggregate (running balance) on a RavenDB document as individual event rows arrive from PostgreSQL." +sidebar_position: 2 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Event Sourcing with Aggregation + + + +* This example shows how to use CDC Sink patches to maintain a computed aggregate + on a RavenDB document as individual event rows arrive from PostgreSQL. + +* In this page: + * [Source Schema](#source-schema) + * [Goal](#goal) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + * [Handling Deletes](#handling-deletes) + + + +--- + +## Source Schema + +An accounts table and a transactions table: + +```sql +CREATE TABLE accounts ( + account_id SERIAL PRIMARY KEY, + owner TEXT NOT NULL, + currency TEXT NOT NULL DEFAULT 'USD' +); + +CREATE TABLE transactions ( + txn_id SERIAL PRIMARY KEY, + account_id INT NOT NULL REFERENCES accounts(account_id), + amount NUMERIC(12,2) NOT NULL, + type TEXT NOT NULL, -- 'credit' or 'debit' + created_at TIMESTAMPTZ DEFAULT now() +); +``` + +--- + +## Goal + +Store each account as a RavenDB document with a `Balance` field that reflects the +running total of all transactions. Transaction rows are embedded as an array for +history, and `Balance` is maintained using patch logic. + +--- + +## Task Configuration + +```csharp +var config = new CdcSinkConfiguration +{ + Name = "AccountsSync", + ConnectionStringName = "MyPostgresConnection", + Tables = + [ + new CdcSinkTableConfig + { + Name = "Accounts", + SourceTableName = "accounts", + PrimaryKeyColumns = ["account_id"], + ColumnsMapping = new Dictionary + { + { "account_id", "AccountId" }, + { "owner", "Owner" }, + { "currency", "Currency" } + }, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "transactions", + PropertyName = "Transactions", + Type = CdcSinkRelationType.Array, + JoinColumns = ["account_id"], + PrimaryKeyColumns = ["txn_id"], + ColumnsMapping = new Dictionary + { + { "txn_id", "TxnId" }, + { "amount", "Amount" }, + { "type", "Type" }, + { "created_at", "CreatedAt" } + }, + // Patch runs on the parent document for INSERT/UPDATE + Patch = """ + const oldAmount = $old?.Amount || 0; + const newAmount = $row.amount || 0; + const sign = $row.type === 'credit' ? 1 : -1; + const oldSign = $old?.Type === 'credit' ? 1 : -1; + this.Balance = (this.Balance || 0) + - (oldSign * oldAmount) + + (sign * newAmount); + """, + OnDelete = new CdcSinkOnDeleteConfig + { + Patch = """ + const deletedAmount = $old?.Amount || 0; + const sign = $old?.Type === 'credit' ? 1 : -1; + this.Balance = (this.Balance || 0) - (sign * deletedAmount); + """ + } + } + ] + } + ] +}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +``` + +--- + +## Resulting Documents + +After three transactions (credit 100, debit 30, credit 50): + +```json +{ + "AccountId": 1, + "Owner": "Alice", + "Currency": "USD", + "Balance": 120.00, + "Transactions": [ + { "TxnId": 1, "Amount": 100.00, "Type": "credit", "CreatedAt": "..." }, + { "TxnId": 2, "Amount": 30.00, "Type": "debit", "CreatedAt": "..." }, + { "TxnId": 3, "Amount": 50.00, "Type": "credit", "CreatedAt": "..." } + ], + "@metadata": { "@collection": "Accounts" } +} +``` + +--- + +## Handling Deletes + +The `OnDelete.Patch` reverses the contribution of the deleted transaction to +`Balance`. This uses `$old` (the embedded item's last known state) rather than +`$row`, because for a DELETE event the embedded item's mapped values are in `$old`. + +Without the `OnDelete.Patch`, deleting a transaction row from SQL would remove +it from the `Transactions` array but leave `Balance` stale. The delete patch +keeps `Balance` consistent. + + + +The patch uses delta logic (`$old` → `$row`) for idempotency. If a change is +re-applied after a failover, `$old` still reflects the state before the original +update, so the delta produces the same result. +See [Failover and Consistency](../../failover-and-consistency.mdx). + + + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx new file mode 100644 index 0000000000..2f87b31d74 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx @@ -0,0 +1,101 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Simple Table Migration" +sidebar_label: Simple Table Migration +description: "Minimal CDC Sink configuration to replicate a single PostgreSQL table into a RavenDB collection." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Example: Simple Table Migration + + + +* This example shows the minimal setup to replicate a single SQL table into a + RavenDB collection. + +* In this page: + * [Source Schema](#source-schema) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + + + +--- + +## Source Schema + +A simple customers table: + +```sql +CREATE TABLE customers ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + email TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT now() +); +``` + +--- + +## Task Configuration + +```csharp +var config = new CdcSinkConfiguration +{ + Name = "CustomersSync", + ConnectionStringName = "MyPostgresConnection", + Tables = new List + { + new CdcSinkTableConfig + { + Name = "Customers", + SourceTableName = "customers", + PrimaryKeyColumns = new List { "id" }, + ColumnsMapping = new Dictionary + { + { "id", "Id" }, + { "name", "Name" }, + { "email", "Email" }, + { "created_at", "CreatedAt" } + } + } + } +}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +``` + +--- + +## Resulting Documents + +SQL row: + +``` +id=1, name='Alice', email='alice@example.com', created_at='2024-01-15 10:30:00+00' +``` + +RavenDB document in collection `Customers` with ID `customers/1`: + +```json +{ + "Id": 1, + "Name": "Alice", + "Email": "alice@example.com", + "CreatedAt": "2024-01-15T10:30:00+00:00", + "@metadata": { + "@collection": "Customers" + } +} +``` + +Document IDs are generated as `{collection}/{pk}` — for example, `customers/1` +for a row with `id = 1`. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx new file mode 100644 index 0000000000..c47cd3710d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx @@ -0,0 +1,201 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Initial Setup" +sidebar_label: Initial Setup +description: "How CDC Sink creates the PostgreSQL replication slot and publication — automatic setup, manual setup steps, custom slot names, and verification queries." +sidebar_position: 3 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Initial Setup + + + +* When CDC Sink starts, it verifies and (if it has the necessary permissions) creates + the **replication slot** and **publication** required for logical replication. + +* If the CDC Sink user does not have permission to create these objects, a database + administrator must create them manually before the task can start. + +* In this page: + * [Automatic Setup](#automatic-setup) + * [Manual Setup](#manual-setup) + * [Specifying Custom Slot and Publication Names](#specifying-custom-slot-and-publication-names) + * [Slot and Publication Naming (Auto-generated)](#slot-and-publication-naming-auto-generated) + * [Verifying Setup](#verifying-setup) + + + +--- + +## Automatic Setup + +If the CDC Sink user has the required permissions (see +[Permissions and Roles](./permissions-and-roles.mdx)), +CDC Sink will: + +1. Compute the replication slot and publication names from the task configuration +2. Check whether they already exist +3. Create them if they do not exist +4. Begin the initial load + +No manual database administration is needed in this case. + +--- + +## Manual Setup + +If the CDC Sink user does not have permission to create replication slots or +publications, a database administrator must create them before the task is started. + +**Step 1: Determine the slot and publication names** + +The simplest approach is to specify the names explicitly in `CdcSinkPostgresSettings` +so both you and the database administrator know what names to use. +See [Specifying Custom Slot and Publication Names](#specifying-custom-slot-and-publication-names). + +If using auto-generated names (no `Postgres` settings), CDC Sink derives names +deterministically from the task configuration. +See [Slot and Publication Naming](#slot-and-publication-naming-auto-generated) +for the naming scheme. + +You can also find the names CDC Sink expects by creating the task (it will fail to start) +and reading the error message, which includes the expected names. + +**Step 2: Create the publication** + +Create a publication that includes all the tables CDC Sink will replicate: + +```sql +CREATE PUBLICATION rvn_cdc_p_ +FOR TABLE orders, order_lines, customers; +``` + +The publication must include all tables from the task's root and embedded table +configurations. + +**Step 3: Create the replication slot** + +```sql +SELECT pg_create_logical_replication_slot( + 'rvn_cdc_s_', + 'pgoutput' +); +``` + +**Step 4: Start the CDC Sink task** + +Once the slot and publication exist, CDC Sink will detect them on startup and +proceed with the initial load. + + + +If the task configuration changes (tables added, removed, or renamed), the expected +slot and publication names change. The old slot and publication become orphaned and +must be dropped manually. The new names must be created before the task can restart. +See [Cleanup and Maintenance](./cleanup-and-maintenance.mdx). + + + +--- + +## Specifying Custom Slot and Publication Names + +You can explicitly specify the replication slot and publication names by setting +`CdcSinkPostgresSettings` on the task configuration: + +```csharp +var config = new CdcSinkConfiguration +{ + Name = "OrdersSync", + ConnectionStringName = "MyPostgresConnection", + Postgres = new CdcSinkPostgresSettings + { + SlotName = "orders_sync_slot", + PublicationName = "orders_sync_pub" + }, + Tables = [ ... ] +}; +``` + +Custom names are useful when: + +* A database administrator pre-creates the slot and publication with human-readable + names before starting the task +* You are migrating from a previous CDC Sink task and want to reuse an existing slot + to avoid re-reading history +* You need predictable names across environments (dev/staging/prod) + +**Constraints:** Names must be valid PostgreSQL identifiers — alphanumeric characters +and underscores only, maximum 63 characters. + +**Immutability:** Once set, `SlotName` and `PublicationName` cannot be changed. The +slot and publication names are fixed for the lifetime of the task. If you need to +rename them, delete the task and create a new one. + +--- + +## Slot and Publication Naming (Auto-generated) + +When `CdcSinkPostgresSettings` is not set (or `SlotName`/`PublicationName` are null), +CDC Sink generates deterministic names: + +* **Slot name**: `rvn_cdc_s_` +* **Publication name**: `rvn_cdc_p_` + +The `` is a base32-encoded SHA-256 hash of: + +* The RavenDB database name +* The CDC Sink task name +* The names of all tables in the task configuration + +**Example:** + +For a task named `OrdersSync` on database `NorthWind` with tables `orders` and +`order_lines`, the generated names look like: + +``` +rvn_cdc_s_jl0okb2prit591rfdfjt1g4kebi3879vgl3avve7gunadllk4re0 +rvn_cdc_p_jl0okb2prit591rfdfjt1g4kebi3879vgl3avve7gunadllk4re0 +``` + +The slot and publication share the same hash, making it easy to match them to +a specific task. + + + +Because the hash is derived from the task name, database name, and table names, +changing any of these values produces a different hash and therefore a different +slot name. This prevents naming conflicts between multiple CDC Sink tasks on the +same PostgreSQL instance, but means that renaming a task or adding/removing tables +produces a new slot — the old one becomes orphaned. + + + +--- + +## Verifying Setup + +To verify the slot and publication were created: + +```sql +-- View CDC Sink replication slots +SELECT slot_name, plugin, slot_type, active +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; + +-- View CDC Sink publications +SELECT p.pubname, c.relname AS table_name +FROM pg_publication p +JOIN pg_publication_rel pr ON pr.prpubid = p.oid +JOIN pg_class c ON c.oid = pr.prrelid +WHERE p.pubname LIKE 'rvn_cdc_p_%' +ORDER BY p.pubname, c.relname; +``` + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx new file mode 100644 index 0000000000..6ec0860e90 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx @@ -0,0 +1,94 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Monitoring PostgreSQL" +sidebar_label: Monitoring PostgreSQL +description: "PostgreSQL-side monitoring for CDC Sink — replication slot health, replication lag, and WAL disk usage queries." +sidebar_position: 7 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Monitoring PostgreSQL + + + +* This page covers PostgreSQL-side monitoring for CDC Sink — replication slot health, + lag, and WAL usage. + +* For RavenDB-side monitoring, see [Monitoring](../monitoring.mdx). + +* In this page: + * [Replication Slot Health](#replication-slot-health) + * [Replication Lag](#replication-lag) + * [WAL Disk Usage](#wal-disk-usage) + + + +--- + +## Replication Slot Health + +Check that CDC Sink replication slots are active and being consumed: + +```sql +SELECT slot_name, active, confirmed_flush_lsn +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +``` + +| Column | Meaning | +|--------|---------| +| `active` | `true` if CDC Sink is connected and consuming; `false` if the connection is down | +| `confirmed_flush_lsn` | The LSN up to which changes have been confirmed as consumed | + +A slot with `active = false` means CDC Sink is not currently connected. This is +expected during failover or when the task is paused. If the slot remains inactive +for an extended period, investigate the task state in RavenDB Studio. + +--- + +## Replication Lag + +Replication lag measures how far behind the slot is relative to the current WAL position: + +```sql +SELECT slot_name, + pg_current_wal_lsn() - confirmed_flush_lsn AS lag_bytes +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +``` + +A consistently growing `lag_bytes` means CDC Sink is not keeping up with the rate +of changes in the source database. Consider: + +* Increasing `CdcSink.MaxBatchSize` to process more changes per batch +* Reducing load on the source database +* Checking the per-table processing statistics in the Management Studio for slow scripts — + complex patch scripts are a common cause of processing slowdowns + +--- + +## WAL Disk Usage + +PostgreSQL retains WAL segments until all replication slots have consumed them. +An inactive or slow slot can cause WAL to accumulate on disk. + +Check approximate WAL retained per slot: + +```sql +SELECT slot_name, active, + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS retained_wal_bytes +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +``` + +High `retained_wal_bytes` on an inactive slot indicates the slot is not being +consumed and is holding WAL. If the slot corresponds to a deleted or abandoned +CDC Sink task, drop it. +See [Cleanup and Maintenance](./cleanup-and-maintenance.mdx). + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx new file mode 100644 index 0000000000..a5eb3b98de --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx @@ -0,0 +1,125 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Permissions and Roles" +sidebar_label: Permissions and Roles +description: "PostgreSQL permissions required by the CDC Sink user — REPLICATION privilege, SELECT grants, and optional permissions for automatic slot and publication management." +sidebar_position: 2 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Permissions and Roles + + + +* This page documents the PostgreSQL permissions required by the CDC Sink user, + and how to grant them. + +* In this page: + * [Minimum Permissions](#minimum-permissions) + * [Permissions for Automatic Setup](#permissions-for-automatic-setup) + * [Creating a Dedicated CDC User](#creating-a-dedicated-cdc-user) + + + +--- + +## Minimum Permissions + +At a minimum, the CDC Sink user needs: + +* **`REPLICATION`** attribute — allows the user to initiate logical replication +* **`SELECT`** on each table being replicated — required for the initial load phase + +Example: + +```sql +-- Grant replication privilege +ALTER USER cdc_user REPLICATION; + +-- Grant SELECT on each table +GRANT SELECT ON TABLE orders TO cdc_user; +GRANT SELECT ON TABLE order_lines TO cdc_user; +GRANT SELECT ON TABLE customers TO cdc_user; +``` + +With only these permissions, a database administrator must manually create the +replication slot and publication before starting the CDC Sink task. +See [Initial Setup](./initial-setup.mdx). + +--- + +## Permissions for Automatic Setup + +If you want CDC Sink to create and manage the replication slot and publication +automatically, the user needs additional permissions: + +**Create/drop replication slots:** + +PostgreSQL 14+: + +```sql +GRANT pg_replication_slot_admin TO cdc_user; +``` + +PostgreSQL 10–13: Replication slot management requires `SUPERUSER`. + +**Create/drop publications:** + +The user must either own the tables being published, or have `SUPERUSER`. + +Alternatively, you can grant `CREATE` on the database: + +```sql +GRANT CREATE ON DATABASE mydb TO cdc_user; +``` + + + +Granting `SUPERUSER` gives the user unrestricted access to the database server. +For production environments, prefer granting only the specific privileges listed +above rather than `SUPERUSER`. + + + + + +For added security in production, consider having your database administrator create +the replication slot and publication manually with the minimal permissions shown above, +rather than granting CDC Sink the ability to manage them automatically. The CDC Sink +user then only needs `REPLICATION` privilege and `SELECT` on the relevant tables. +See [Initial Setup](./initial-setup.mdx). + + + +--- + +## Creating a Dedicated CDC User + +It is recommended to use a dedicated database user for CDC Sink rather than an +application or admin user. + +Example setup: + +```sql +-- Create the user +CREATE USER cdc_user WITH PASSWORD 'secure_password' REPLICATION; + +-- Grant SELECT on tables to replicate +GRANT SELECT ON TABLE orders TO cdc_user; +GRANT SELECT ON TABLE order_lines TO cdc_user; + +-- If using PostgreSQL 14+ and want automatic slot management: +GRANT pg_replication_slot_admin TO cdc_user; + +-- If user needs to create publications (requires table ownership or superuser): +-- Option A: Grant ownership of tables +ALTER TABLE orders OWNER TO cdc_user; +-- Option B: Create publications as a superuser before starting the task +``` + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx new file mode 100644 index 0000000000..154440b267 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx @@ -0,0 +1,123 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Prerequisites Checklist" +sidebar_label: Prerequisites Checklist +description: "Requirements to verify before creating a CDC Sink task for PostgreSQL — WAL level, replication settings, user permissions, table requirements, and network access." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Prerequisites Checklist + + + +* Before creating a CDC Sink task for PostgreSQL, verify that each of these requirements + is in place. + +* In this page: + * [Source Database Requirements](#source-database-requirements) + * [User Permissions](#user-permissions) + * [Table Requirements](#table-requirements) + * [Network Access](#network-access) + + + +--- + +## Source Database Requirements + +* **PostgreSQL version**: 10 or later (logical replication introduced in version 10) + +* **WAL level**: `wal_level` must be set to `logical` + + Verify with: + + ```sql + SHOW wal_level; + ``` + + If the result is not `logical`, see [WAL Configuration](./wal-configuration.mdx). + +* **`max_replication_slots`**: Must be at least 1 (one slot per CDC Sink task) + + Verify with: + + ```sql + SHOW max_replication_slots; + ``` + +* **`max_wal_senders`**: Must be at least 1 + + Verify with: + + ```sql + SHOW max_wal_senders; + ``` + +--- + +## User Permissions + +The database user in the connection string must have sufficient permissions. + +**Minimum required permissions:** + +```sql +-- Replication privilege (required for logical replication) +ALTER USER cdc_user REPLICATION; + +-- SELECT on each table CDC Sink will read +GRANT SELECT ON TABLE orders TO cdc_user; +GRANT SELECT ON TABLE order_lines TO cdc_user; +``` + +**Optional (allows CDC Sink to configure replication automatically):** + +```sql +-- Create/drop replication slots +-- This requires SUPERUSER or membership in pg_replication_slot_admin (PG 14+) + +-- Create/drop publications +-- This requires ownership of the tables being published, or SUPERUSER +``` + +If the CDC Sink user does not have permission to create publications and replication +slots, a database administrator must set them up manually. +See [Initial Setup](./initial-setup.mdx). + +Full details: [Permissions and Roles](./permissions-and-roles.mdx). + +--- + +## Table Requirements + +* **Primary key**: Each root table and each embedded table must have a primary key + (or a unique index used as a replica identity). + +* **REPLICA IDENTITY**: For embedded tables where the join columns are not part of + the primary key, the table must have `REPLICA IDENTITY FULL` or a replica identity + index that includes the join columns. Without this, DELETE events will not include + the old row values, and CDC Sink cannot identify which embedded item to remove. + + See [REPLICA IDENTITY](./replica-identity.mdx). + +* **Published columns**: All columns referenced in `PrimaryKeyColumns`, `JoinColumns`, + and `ColumnsMapping` must exist in the SQL table. + +--- + +## Network Access + +* The RavenDB server must be able to open a TCP connection to the PostgreSQL host + on the configured port (default: 5432). + +* The connection must remain open for the duration of the replication stream. + Ensure that firewalls, proxies, and load balancers do not terminate idle or + long-lived connections. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx new file mode 100644 index 0000000000..083d268e7e --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx @@ -0,0 +1,116 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: REPLICA IDENTITY Manual Setup" +sidebar_label: REPLICA IDENTITY Manual Setup +description: "How to manually configure REPLICA IDENTITY FULL or INDEX on PostgreSQL tables when CDC Sink does not have permission to alter them." +sidebar_position: 5 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: REPLICA IDENTITY Manual Setup + + + +* When CDC Sink does not have permission to alter tables, a database administrator + must configure `REPLICA IDENTITY` manually before the task starts. + +* In this page: + * [When Manual Setup Is Required](#when-manual-setup-is-required) + * [Setting REPLICA IDENTITY FULL](#setting-replica-identity-full) + * [Using an Index Instead of FULL](#using-an-index-instead-of-full) + * [Verifying the Configuration](#verifying-the-configuration) + + + +--- + +## When Manual Setup Is Required + +CDC Sink attempts to set `REPLICA IDENTITY FULL` automatically on any embedded +table whose join columns are not part of the primary key. This requires: + +* The CDC Sink user owns the table, **or** +* The CDC Sink user has `SUPERUSER` + +If neither condition is met, CDC Sink will start but embedded table deletes may +not work correctly. A database administrator must configure `REPLICA IDENTITY` +manually. + +--- + +## Setting REPLICA IDENTITY FULL + +The simplest approach is to set `REPLICA IDENTITY FULL` on all embedded tables +that CDC Sink will replicate: + +```sql +ALTER TABLE order_lines REPLICA IDENTITY FULL; +ALTER TABLE line_attributes REPLICA IDENTITY FULL; +``` + +`REPLICA IDENTITY FULL` includes all column values in DELETE and UPDATE events. +This is the most compatible option but increases WAL volume for tables with many +or large columns. + +--- + +## Using an Index Instead of FULL + +If WAL size is a concern, you can use a specific unique index that covers both +the join columns and PK columns of the embedded table. + +**Step 1: Create a unique index covering the required columns** + +```sql +-- For order_lines: join column is order_id, PK is line_id +CREATE UNIQUE INDEX order_lines_replica_idx + ON order_lines (order_id, line_id); +``` + +**Step 2: Set REPLICA IDENTITY to use this index** + +```sql +ALTER TABLE order_lines + REPLICA IDENTITY USING INDEX order_lines_replica_idx; +``` + +This instructs PostgreSQL to include only those indexed columns in DELETE and +UPDATE events, rather than all columns. + + + +The index must be `UNIQUE` and `NOT DEFERRABLE`. It cannot include expressions +or partial predicates. All columns in the index must be `NOT NULL`. + + + +--- + +## Verifying the Configuration + +Confirm that the desired `REPLICA IDENTITY` mode is set: + +```sql +SELECT c.relname, c.relreplident, + CASE c.relreplident + WHEN 'd' THEN 'DEFAULT' + WHEN 'f' THEN 'FULL' + WHEN 'i' THEN 'INDEX' + WHEN 'n' THEN 'NOTHING' + END AS mode +FROM pg_class c +JOIN pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'public' +ORDER BY c.relname; +``` + +Tables configured with `FULL` or `INDEX` are ready for CDC Sink embedded table +delete processing. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx new file mode 100644 index 0000000000..f87e05f96f --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx @@ -0,0 +1,144 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: REPLICA IDENTITY" +sidebar_label: REPLICA IDENTITY +description: "Why PostgreSQL REPLICA IDENTITY matters for CDC Sink embedded table deletes, when it is required, how to check the current setting, and automatic vs manual configuration." +sidebar_position: 4 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: REPLICA IDENTITY + + + +* **REPLICA IDENTITY** controls what column values PostgreSQL includes in DELETE + (and UPDATE) events in the logical replication stream. + +* CDC Sink requires old row values in DELETE events to identify which embedded + item to remove from a parent document. Without them, embedded table deletes + cannot be routed correctly. + +* In this page: + * [Why REPLICA IDENTITY Matters](#why-replica-identity-matters) + * [REPLICA IDENTITY Options](#replica-identity-options) + * [When Is REPLICA IDENTITY Required?](#when-is-replica-identity-required) + * [Checking Current Setting](#checking-current-setting) + * [Automatic vs Manual Configuration](#automatic-vs-manual-configuration) + + + +--- + +## Why REPLICA IDENTITY Matters + +When CDC Sink receives a DELETE event for an embedded table, it needs to: + +1. Find the parent document (using the join column values from the old row) +2. Find the matching item within the embedded array or map (using the PK column values) +3. Remove it + +If the DELETE event does not include old row values for the join columns and PK +columns, CDC Sink cannot perform steps 1 or 2. + +By default, PostgreSQL only includes the primary key columns in DELETE events. +If the embedded table's join columns are part of the primary key, this is sufficient. +If they are not — which is the common case when a table has a surrogate PK — the +DELETE event will be missing the columns needed to route it. + +--- + +## REPLICA IDENTITY Options + +PostgreSQL supports four REPLICA IDENTITY modes: + +| Mode | What's included in DELETE/UPDATE events | Notes | +|------|----------------------------------------|-------| +| `DEFAULT` | Primary key columns only | Default for tables with a PK | +| `FULL` | All columns | Works for any table structure; increases WAL size | +| `INDEX` | Columns covered by a specific unique index | More targeted than FULL | +| `NOTHING` | No old values | Insufficient for CDC Sink embedded table deletes | + +For CDC Sink, `FULL` is the simplest and most compatible choice. +`INDEX` works if the index covers both the join columns and PK columns. + +--- + +## When Is REPLICA IDENTITY Required? + +| Scenario | REPLICA IDENTITY needed? | +|----------|------------------------| +| Root table deletes | No — root documents are deleted by document ID | +| Embedded table deletes where join columns ARE part of the PK | No — `DEFAULT` is sufficient | +| Embedded table deletes where join columns are NOT part of the PK | **Yes** — `DEFAULT` is insufficient | +| Deep-nested embedded tables | Depends on the join column placement | + +**Example where REPLICA IDENTITY is needed:** + +```sql +-- order_lines has a surrogate PK (line_id), with order_id as foreign key +-- order_id is NOT part of the primary key +CREATE TABLE order_lines ( + line_id SERIAL PRIMARY KEY, -- PK + order_id INT NOT NULL, -- FK to orders (not part of PK) + product TEXT, + qty INT +); + +-- Without REPLICA IDENTITY FULL, a DELETE event only contains line_id. +-- CDC Sink cannot determine which parent document (order_id) to update. +ALTER TABLE order_lines REPLICA IDENTITY FULL; +``` + +**Example where REPLICA IDENTITY is NOT needed:** + +```sql +-- If order_id is part of a composite PK, DEFAULT is sufficient +CREATE TABLE order_lines ( + order_id INT NOT NULL, + line_num INT NOT NULL, + product TEXT, + qty INT, + PRIMARY KEY (order_id, line_num) +); +``` + +--- + +## Checking Current Setting + +```sql +SELECT c.relname, c.relreplident +FROM pg_class c +JOIN pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'public' +ORDER BY c.relname; +``` + +The `relreplident` column: + +| Value | Meaning | +|-------|---------| +| `d` | DEFAULT (primary key only) | +| `f` | FULL (all columns) | +| `i` | INDEX (specific unique index) | +| `n` | NOTHING | + +--- + +## Automatic vs Manual Configuration + +If the CDC Sink user has sufficient permissions (table ownership or `SUPERUSER`), +CDC Sink will automatically set `REPLICA IDENTITY FULL` on embedded tables that +require it. + +If the CDC Sink user does not have permission to alter tables, a database +administrator must configure `REPLICA IDENTITY` before starting the task. +See [Manual Setup](./replica-identity-manual-setup.mdx). + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx new file mode 100644 index 0000000000..abeb6f3892 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx @@ -0,0 +1,104 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: Studio UI" +sidebar_label: Studio UI +description: "How to create and manage CDC Sink tasks for PostgreSQL through the RavenDB Management Studio." +sidebar_position: 8 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: Studio UI + + + +* CDC Sink tasks are created and managed through the **Ongoing Tasks** section of the + Management Studio. + +* In this page: + * [Creating a CDC Sink Task](#creating-a-cdc-sink-task) + * [Monitoring Task State](#monitoring-task-state) + * [Editing a Task](#editing-a-task) + + + +--- + +## Creating a CDC Sink Task + + + +Screenshot needed — "Ongoing Tasks" list view showing the CDC Sink task type in the "Add Task" dropdown. +Reference ETL documentation screenshots for style (e.g., server/ongoing-tasks/etl/raven/defining-raven-etl-task.markdown). + + + +To create a CDC Sink task in the Studio: + +1. Navigate to **Databases** → your database → **Ongoing Tasks** +2. Click **Add Task** and select **CDC Sink** +3. Configure the connection string pointing to the PostgreSQL source +4. Add one or more root tables with their column mappings +5. (Optional) Add embedded tables and linked tables for each root table +6. (Optional) Configure patches for INSERT/UPDATE/DELETE events +7. Click **Save** + + + +Screenshot needed — CDC Sink task creation form, showing the connection string field, table configuration panel, and column mapping editor. + + + +--- + +## Monitoring Task State + + + +Screenshot needed — Ongoing Tasks list view with a running CDC Sink task, showing state badge ("Active"), responsible node, and the expand button for details. + + + +The task list shows: + +* **Task name** and **connection string name** +* **State** — Active, Disabled, Error, or FallbackMode +* **Responsible node** — the cluster node currently running the task +* **Progress** — during initial load, shows which tables have been scanned + +Clicking on a task opens the detail view with per-table statistics. + + + +Screenshot needed — CDC Sink task detail view, showing per-table row counts, last processed position, and the initial load progress per table. + + + +--- + +## Editing a Task + +To edit a task, click its name in the Ongoing Tasks list. The same configuration +form used for creation opens in edit mode. + + + +If the task uses **auto-generated** slot and publication names (no custom +`CdcSinkPostgresSettings`), adding or removing tables changes the hash and therefore +the names. A new slot and publication are created under the new names, and the old +ones become orphaned. + +If you specified **custom names** via `CdcSinkPostgresSettings.SlotName` and +`PublicationName`, the slot name stays the same but the publication may need to be +updated by a database administrator to include the new tables. + +Orphaned slots and publications must be dropped manually by the database administrator. +See [Cleanup and Maintenance](./cleanup-and-maintenance.mdx). + + + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx new file mode 100644 index 0000000000..29361cc2a2 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx @@ -0,0 +1,97 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: PostgreSQL: WAL Configuration" +sidebar_label: WAL Configuration +description: "How to verify and enable PostgreSQL logical replication (wal_level = logical) and configure max_replication_slots and max_wal_senders." +sidebar_position: 1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: PostgreSQL: WAL Configuration + + + +* CDC Sink uses PostgreSQL **logical replication**, which requires the Write-Ahead Log + (WAL) to be configured at the `logical` level. + +* This page explains how to verify the current setting and change it if needed. + +* In this page: + * [Check Current WAL Level](#check-current-wal-level) + * [Enable Logical Replication](#enable-logical-replication) + * [Other Required Settings](#other-required-settings) + + + +--- + +## Check Current WAL Level + +Connect to your PostgreSQL instance and run: + +```sql +SHOW wal_level; +``` + +If the result is `logical`, no changes are needed. + +If the result is `replica` or `minimal`, logical replication is not enabled and +must be configured before CDC Sink can run. + +--- + +## Enable Logical Replication + +Edit `postgresql.conf` and set the following: + +``` +wal_level = logical +``` + +This change requires a **PostgreSQL restart**. + +After restarting, verify the change took effect: + +```sql +SHOW wal_level; +-- Should return: logical +``` + + + +Changing `wal_level` requires a full server restart, not just a configuration reload. +Plan for a brief maintenance window. + + + +--- + +## Other Required Settings + +CDC Sink uses one replication slot per task. Ensure the following settings are +sufficient for the number of CDC Sink tasks you plan to run: + +``` +max_replication_slots = 10 -- at least 1 per CDC Sink task +max_wal_senders = 10 -- at least 1 per active replication connection +``` + +The defaults in a standard PostgreSQL installation are typically sufficient for +a small number of tasks, but you may need to increase them if you have many +concurrent CDC Sink tasks or other replication consumers. + +Check current values: + +```sql +SHOW max_replication_slots; +SHOW max_wal_senders; +``` + +These settings also require a **server restart** if changed. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx new file mode 100644 index 0000000000..4075d746cf --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx @@ -0,0 +1,132 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Property Retention" +sidebar_label: Property Retention +description: "How CDC Sink merges CDC updates onto existing RavenDB documents, preserving properties not managed by column mapping." +sidebar_position: 8 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Property Retention + + + +* When CDC Sink applies an UPDATE to an existing RavenDB document, it **merges** + the new values onto the existing document rather than replacing it entirely. + +* Properties that are not part of the column mapping are preserved across updates. + This allows RavenDB-side data to coexist safely with CDC-managed properties. + +* In this page: + * [How Merging Works](#how-merging-works) + * [What Is and Isn't Preserved](#what-is-and-isnt-preserved) + * [Editing Documents Directly in RavenDB](#editing-documents-directly-in-ravendb) + * [Implications for Patches](#implications-for-patches) + + + +--- + +## How Merging Works + +When a CDC UPDATE arrives for a document that already exists: + +1. The existing document is loaded +2. Mapped column values from the CDC event overwrite the corresponding properties +3. All other properties on the document are left unchanged +4. The merged document is written back + +**Example:** + +Initial SQL row → initial document: + +```json +{ + "Id": 1, + "Name": "Alice", + "Email": "alice@example.com", + "InternalNotes": "VIP customer", + "@metadata": { "@collection": "Customers" } +} +``` + +SQL UPDATE: `UPDATE customers SET email = 'alice.new@example.com' WHERE id = 1` + +Document after CDC UPDATE: + +```json +{ + "Id": 1, + "Name": "Alice", + "Email": "alice.new@example.com", + "InternalNotes": "VIP customer", + "@metadata": { "@collection": "Customers" } +} +``` + +`InternalNotes` was added directly in RavenDB and is not in `ColumnsMapping` — it is preserved. + +--- + +## What Is and Isn't Preserved + +**Preserved across CDC updates:** + +* Properties not listed in `ColumnsMapping` +* Properties set in RavenDB directly (annotations, computed values, flags) +* Document metadata (unless the patch explicitly modifies it) + +**Overwritten on CDC update:** + +* Any property mapped via `ColumnsMapping` — always updated to match the current SQL value + +If you manually edit a property that is part of `ColumnsMapping`, the next CDC UPDATE +for that row will overwrite your edit with the SQL value. + +--- + +## Editing Documents Directly in RavenDB + +You can safely add properties to CDC-managed documents: + +```json +{ + "Id": 1, + "Name": "Alice", + "Email": "alice@ex.com", + "InternalNotes": "...", + "ReviewedAt": "...", + "Tags": ["vip"] +} +``` + +Properties managed by CDC (those in `ColumnsMapping`) will be overwritten on +the next UPDATE from the source database. Do not rely on manual edits to mapped +properties surviving future CDC updates. + +**CDC Sink does not detect or protect manual edits to mapped properties.** +If you need to preserve a value that comes from SQL, consider adding a separate +RavenDB-only property for your annotation and leaving the SQL-mapped property as-is. + +--- + +## Implications for Patches + +Patches run after column mapping and can set additional properties that are not +from `ColumnsMapping`. These patch-computed properties follow the same merge rules: + +* If a patch sets `this.ComputedField = ...`, that value persists across future events + where the patch doesn't explicitly change it +* If a patch sets a property that is also in `ColumnsMapping`, the column mapping + value takes precedence (mapping is applied before patching) + +For aggregates maintained via patches (e.g., `RunningTotal`), the patch itself +is responsible for keeping the value correct across INSERT, UPDATE, and DELETE events. +See [Patching](./patching.mdx). + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx new file mode 100644 index 0000000000..289a077180 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -0,0 +1,285 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Schema Design" +sidebar_label: Schema Design +description: "Explains root tables, embedded tables, linked tables, multi-level nesting, relation types, and how to choose between embedded and linked tables." +sidebar_position: 2 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Schema Design + + + +* CDC Sink maps a relational schema to a RavenDB document model through configuration. + This page explains the three building blocks — root tables, embedded tables, and + linked tables — and how to combine them. + +* In this page: + * [Root Tables](#root-tables) + * [Embedded Tables](#embedded-tables) + * [Linked Tables](#linked-tables) + * [Primary Key and Join Column Requirements](#primary-key-and-join-column-requirements) + * [Multi-Level Nesting](#multi-level-nesting) + * [Relation Types](#relation-types) + * [Choosing Between Embedded and Linked](#choosing-between-embedded-and-linked) + + + +--- + +## Root Tables + +A **root table** maps a SQL table to a RavenDB collection. Each row in the SQL table +becomes one document. + +```csharp +new CdcSinkTableConfig +{ + Name = "Orders", // RavenDB collection name + SourceTableSchema = "public", // SQL schema (optional, default: "public") + SourceTableName = "orders", // SQL table name + PrimaryKeyColumns = ["id"], // Used for document ID generation + ColumnsMapping = new Dictionary + { + { "id", "Id" }, + { "customer_name", "CustomerName" }, + { "total", "Total" } + } +} +``` + +**Document ID generation:** `{CollectionName}/{pk1}/{pk2}/...` +A row with `id = 42` and collection `Orders` becomes document `Orders/42`. +A composite PK `(region, id)` with values `(US, 42)` becomes `Orders/US/42`. + +**Column mapping:** Only mapped columns appear in the document. Unmapped columns are +still available in patch scripts via `$row` but are not stored in the document. + +--- + +## Embedded Tables + +An **embedded table** creates nested data within a parent document. For example, a +SQL `order_lines` table becomes an array inside each `Orders` document. + +```csharp +new CdcSinkTableConfig +{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = ["id"], + ColumnsMapping = { { "id", "Id" }, { "customer_name", "CustomerName" } }, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "order_lines", + PropertyName = "Lines", // Property in parent document + Type = CdcSinkRelationType.Array, // Array of items + JoinColumns = ["order_id"], // FK referencing parent's PK + PrimaryKeyColumns = ["line_id"], // Used to match items on update/delete + ColumnsMapping = + { + { "line_id", "LineId" }, + { "product", "Product" }, + { "quantity", "Quantity" } + } + } + ] +} +``` + +This produces documents like: + +```json +{ + "Id": 1, + "CustomerName": "Alice", + "Lines": [ + { "LineId": 1, "Product": "Apples", "Quantity": 5 }, + { "LineId": 2, "Product": "Bananas", "Quantity": 3 } + ], + "@metadata": { "@collection": "Orders" } +} +``` + +--- + +## Linked Tables + +A **linked table** creates a document ID reference rather than embedding data. +A foreign key in the source row becomes a RavenDB document ID. + +```csharp +new CdcSinkTableConfig +{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = ["id"], + ColumnsMapping = { { "id", "Id" }, { "customer_id", "CustomerId" } }, + LinkedTables = + [ + new CdcSinkLinkedTableConfig + { + SourceTableName = "customers", + PropertyName = "Customer", // Property in parent document + LinkedCollectionName = "Customers", // Target collection for ID + Type = CdcSinkRelationType.Value, // Single reference + JoinColumns = ["customer_id"] // FK used to build the ID + } + ] +} +``` + +With `customer_id = 42`, the document gets `"Customer": "Customers/42"`. + +--- + +## Primary Key and Join Column Requirements + +### Root Tables + +The `PrimaryKeyColumns` list defines which SQL columns are used to generate the document ID. +All PK columns must be present in every INSERT, UPDATE, and DELETE event. + +### Embedded Tables (One Level) + +An embedded table needs: + +* **PrimaryKeyColumns** — Used to match items within the parent's array for UPDATE and DELETE +* **JoinColumns** — Foreign key referencing the parent's `PrimaryKeyColumns` + +The `JoinColumns` must exactly match the parent's `PrimaryKeyColumns`: + +| Parent PK | Required JoinColumns | Valid? | +|-----------|---------------------|--------| +| `[id]` | `[order_id]` where `order_id` = parent's `id` | ✓ | +| `[id, year]` | `[order_id, order_year]` | ✓ | +| `[id]` | `[customer_id, order_id]` | ✗ Extra column not from parent PK | +| `[id, year]` | `[order_id]` | ✗ Missing `order_year` | + +### DELETE Events and REPLICA IDENTITY + +For DELETE events, the source database must include the join column values so CDC Sink +can route the delete to the correct parent document. + +If the join column is not part of the SQL table's primary key, the source database may +need additional configuration to include it in DELETE events. + +See [REPLICA IDENTITY](./postgres/replica-identity.mdx) for +the PostgreSQL-specific requirement and how CDC Sink handles it automatically. + +--- + +## Multi-Level Nesting + +Embedded tables can themselves have embedded tables, creating arbitrarily deep hierarchies. + +**Example: Company → Departments → Employees** + +```csharp +new CdcSinkTableConfig +{ + Name = "Companies", + SourceTableName = "companies", + PrimaryKeyColumns = ["company_id"], + ColumnsMapping = { { "company_id", "CompanyId" }, { "name", "Name" } }, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "departments", + PropertyName = "Departments", + Type = CdcSinkRelationType.Array, + JoinColumns = ["company_id"], // Root FK + PrimaryKeyColumns = ["dept_id"], + ColumnsMapping = { { "dept_id", "DeptId" }, { "dept_name", "DeptName" } }, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + { + SourceTableName = "employees", + PropertyName = "Employees", + Type = CdcSinkRelationType.Array, + JoinColumns = ["company_id", "dept_id"], // Root FK + parent FK + PrimaryKeyColumns = ["emp_id"], + ColumnsMapping = { { "emp_id", "EmpId" }, { "emp_name", "EmpName" } } + } + ] + } + ] +} +``` + +**Critical requirement for deep nesting:** All descendant tables must carry the root +table's primary key as a denormalized column. The `employees` table must have +`company_id` even though it joins directly to `departments` via `dept_id`. + +This is required because CDC Sink needs to route every row to the correct root document +in a single pass, without additional lookups. + +**SQL schema to support this:** + +```sql +CREATE TABLE employees ( + company_id INT NOT NULL, -- Denormalized root FK + dept_id INT NOT NULL, -- Parent FK + emp_id INT NOT NULL, -- Local PK + emp_name VARCHAR(200), + PRIMARY KEY (company_id, dept_id, emp_id) +); +``` + +Including all routing columns in the primary key also avoids the need for REPLICA IDENTITY +configuration — the default DELETE events include all PK columns. + +--- + +## Relation Types + +The `Type` property on embedded and linked tables controls the document structure: + +| Type | Use Case | Document Structure | +|------|----------|--------------------| +| `Array` | One-to-many: parent has many children | `"Lines": [{ ... }, { ... }]` | +| `Map` | One-to-many with direct key lookup | `"Lines": { "1": { ... }, "2": { ... } }` | +| `Value` | Many-to-one: parent has one child/reference | `"Customer": { ... }` or `"Customer": "Customers/42"` | + +**Array** — Items are matched by `PrimaryKeyColumns` for UPDATE and DELETE. +Use when you need to iterate over all items. + +**Map** — Items are stored as a JSON object keyed by the primary key value(s). +Use when you need fast direct-key access within the document. + +**Value** — Stores a single embedded object or document reference. +Use for many-to-one relationships (many orders share one customer). + +--- + +## Choosing Between Embedded and Linked + +| Consideration | Embedded | Linked | +|--------------|----------|--------| +| Data location | Stored inside parent document | Stored in a separate document | +| Access pattern | Read parent to get all data | Load parent, then load referenced doc | +| Updates | Automatic via CDC | Automatic via CDC for each table | +| Document size | Grows with embedded items | Parent stays small | +| Use case | Parent owns child (orders own lines) | Independent entities (orders reference customers) | + +**Use embedded tables** when: +* The child entity has no meaning outside the parent (order lines without an order) +* You always read the parent and child together +* You want a single-document read + +**Use linked tables** when: +* The referenced entity is independently meaningful (customers exist without orders) +* The referenced entity is shared by many parents +* You want RavenDB's include loading to handle the join + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx new file mode 100644 index 0000000000..0a7cefc357 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx @@ -0,0 +1,65 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Server Configuration" +sidebar_label: Server Configuration +description: "RavenDB server configuration keys that control CDC Sink task behavior — batch size, fallback timeout, and poll interval." +sidebar_position: 12 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Server Configuration + + + +* This page documents the RavenDB server configuration keys that control CDC Sink + task behavior. + +* In this page: + * [Configuration Keys](#configuration-keys) + + + +--- + +## Configuration Keys + +These keys can be set in `settings.json` or passed as environment variables. +See [Configuration Overview](../../../server/configuration/configuration-options). + +--- + +#### `CdcSink.MaxBatchSize` + +**Default:** `1024` + +The maximum number of change events processed in a single batch. Larger values +increase throughput but also increase memory usage per batch. + +--- + +#### `CdcSink.MaxFallbackTimeInSec` + +**Default:** `900` (15 minutes) + +How long the task will remain in fallback mode (continuously retrying) after losing +connection to the source database before reporting an error. + +Set to `0` to disable fallback mode entirely — the task will move to error state +immediately on connection failure. + +--- + +#### `CdcSink.PollIntervalInSec` + +**Default:** `1` + +How frequently CDC Sink polls the source database for new change events when the +stream is idle. A shorter interval reduces latency but increases polling load on +the source. + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json b/docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json new file mode 100644 index 0000000000..cf141b1eb7 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json @@ -0,0 +1,4 @@ +{ + "position": 17, + "label": "SQL Server" +} diff --git a/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx new file mode 100644 index 0000000000..9dfe73c8f9 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx @@ -0,0 +1,33 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: SQL Server: Overview" +sidebar_label: Overview +description: "CDC Sink support for SQL Server — planned for a future release." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: SQL Server: Overview + + + +* CDC Sink support for SQL Server is planned for a future release. + + + + + +This section will cover the prerequisites, setup, and SQL Server-specific +configuration for CDC Sink when SQL Server support is available. + +In the meantime, see the [PostgreSQL documentation](../postgres/prerequisites-checklist.mdx) +for the full CDC Sink feature documentation. + + + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx new file mode 100644 index 0000000000..5bc2236bce --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -0,0 +1,174 @@ +--- +title: "Server: Ongoing Tasks: CDC Sink: Troubleshooting" +sidebar_label: Troubleshooting +description: "Common problems with CDC Sink tasks and how to resolve them — startup failures, error states, missing documents, embedded item issues, and patch errors." +sidebar_position: 15 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# Server: Ongoing Tasks: CDC Sink: Troubleshooting + + + +* This page covers common problems encountered when running CDC Sink tasks and how + to resolve them. + +* In this page: + * [Task Won't Start](#task-wont-start) + * [Task Enters Error State](#task-enters-error-state) + * [Documents Not Appearing](#documents-not-appearing) + * [Embedded Items Missing or Incorrect](#embedded-items-missing-or-incorrect) + * [DELETE Not Applied](#delete-not-applied) + * [Patch Errors](#patch-errors) + + + +--- + +## Task Won't Start + +**Symptom:** Task remains in `Error` state immediately after creation. + +**Common causes:** + +* **Invalid connection string** — verify the connection string name matches one defined + in the SQL connection strings section of the Management Studio. + +* **Connection refused** — the source database is unreachable from the RavenDB server. + Verify host, port, and firewall rules. + +* **Authentication failure** — the credentials in the connection string are incorrect + or the user does not have the required permissions. + See the PostgreSQL [Permissions and Roles](./postgres/permissions-and-roles.mdx) page. + +* **WAL level not set to `logical`** — CDC Sink requires logical replication enabled + on the source. + See [WAL Configuration](./postgres/wal-configuration.mdx). + +Check the error message in the Notification Center (bell icon in Studio) for the +specific failure reason. + +--- + +## Task Enters Error State + +**Symptom:** Task was running, then stopped with an error. + +**Common causes:** + +* **Replication slot dropped** — if the PostgreSQL replication slot was dropped + externally, CDC Sink cannot resume. The task must be deleted and recreated (a new + slot will be created on next start). + +* **Source table schema changed** — adding or removing columns from the SQL table + may cause mapping errors. Update the task configuration to match the new schema. + +* **Replication lag too large** — if CDC Sink falls behind significantly, PostgreSQL + may retain WAL segments that fill the disk. See your PostgreSQL documentation on + `wal_keep_size` and replication slot lag. + +* **Exceeded fallback timeout** — the source was unreachable for longer than + `CdcSink.MaxFallbackTimeInSec`. The task moves to error state after this timeout. + Restore connectivity and re-enable the task. + +--- + +## Documents Not Appearing + +**Symptom:** SQL rows exist but corresponding RavenDB documents are not created. + +**Check:** + +1. **Initial load in progress** — CDC Sink performs a full table scan before streaming. + For large tables this can take time. Check the task progress in Studio. + +2. **Table is disabled** — verify `Disabled` is not set to `true` on the `CdcSinkTableConfig`. + +3. **Primary key mismatch** — the columns listed in `PrimaryKeyColumns` must match the + actual SQL primary key. If they don't match, CDC Sink cannot generate a document ID. + +4. **ColumnsMapping is empty** — at least one column must be mapped. A table with no + column mappings will produce empty documents. + +5. **Task is paused** — check that the task state is `Active`, not `Disabled`. + +--- + +## Embedded Items Missing or Incorrect + +**Symptom:** Parent document exists but embedded array/map is empty or items are missing. + +**Check:** + +1. **JoinColumns mismatch** — the `JoinColumns` in `CdcSinkEmbeddedTableConfig` must + reference the same columns (by name) that the SQL foreign key uses. Verify these + match the parent table's `PrimaryKeyColumns`. + +2. **PrimaryKeyColumns wrong** — the embedded table's `PrimaryKeyColumns` are used to + match items on UPDATE and DELETE. If they're wrong, new items may be created as + duplicates or existing items may not be found. + +3. **DELETE not reflected** — if embedded item rows are being deleted in SQL but the + items remain in RavenDB, check whether `REPLICA IDENTITY` is configured for the + embedded table. Without it, PostgreSQL does not include the old row values in the + DELETE event, and CDC Sink cannot identify which item to remove. + See [REPLICA IDENTITY](./postgres/replica-identity.mdx). + +4. **Type mismatch (Array vs Map)** — if the `Type` was changed from `Array` to `Map` + or vice versa after documents were already created, existing documents retain the + old structure. Re-process from scratch or migrate existing documents. + +--- + +## DELETE Not Applied + +**Symptom:** A row was deleted from SQL but the RavenDB document still exists. + +**Check:** + +1. **`IgnoreDeletes = true`** — if `OnDelete.IgnoreDeletes` is set to `true`, the + delete is intentionally discarded. Verify this is the intended behavior. + +2. **Missing REPLICA IDENTITY (embedded tables)** — for embedded table items, + PostgreSQL must include old row values in DELETE events so CDC Sink knows which + parent document to update. If `REPLICA IDENTITY` is not set to `FULL` (or to + an index that includes join columns), the DELETE event may be missing the columns + needed to route it. + See [REPLICA IDENTITY](./postgres/replica-identity.mdx). + +3. **Custom patch returns without deleting** — if an `OnDelete.Patch` is configured + that does not allow the delete to proceed (combined with `IgnoreDeletes = true`), + the document is kept. Review the patch logic. + +--- + +## Patch Errors + +**Symptom:** Task enters error state with a message referencing a JavaScript patch failure. + +**Check:** + +1. **Script syntax error** — test the patch script in the Management Studio's patch + editor before using it in CDC Sink. + +2. **Null reference** — `$row` properties and `$old` may be `null` for certain event + types. Use optional chaining: `$old?.Amount || 0`. + +3. **`get()` returns null** — a document loaded with `get()` may not exist yet if CDC + Sink processes tables out of dependency order. Guard with a null check: + ```javascript + const related = get("Collection/123"); + if (related) { ... } + ``` + +4. **Step limit exceeded** — patch scripts have a step quota. If the script is very + long-running or loops over large arrays, it may hit `Patching.MaxStepsForScript`. + See the [configuration reference](../../../server/configuration/patching-configuration) for this setting. + +---