From 31edbae5b02af6b628d957cc1647a31a524e2c52 Mon Sep 17 00:00:00 2001 From: cafzal Date: Wed, 3 Jun 2026 14:47:33 -0700 Subject: [PATCH] Add transaction_screening_local template (rules + query on local DuckDB) --- v1/README.md | 1 + v1/transaction_screening_local/README.md | 180 ++++++++++++++++++ .../data/transactions.csv | 11 ++ v1/transaction_screening_local/pyproject.toml | 16 ++ .../transaction_screening_local.py | 130 +++++++++++++ 5 files changed, 338 insertions(+) create mode 100644 v1/transaction_screening_local/README.md create mode 100644 v1/transaction_screening_local/data/transactions.csv create mode 100644 v1/transaction_screening_local/pyproject.toml create mode 100644 v1/transaction_screening_local/transaction_screening_local.py diff --git a/v1/README.md b/v1/README.md index f1f4963..0ca5145 100644 --- a/v1/README.md +++ b/v1/README.md @@ -49,6 +49,7 @@ This directory contains the templates for v1. Each template folder includes its | [synthetic_order_lifecycle](./synthetic_order_lifecycle/) | Generate synthetic order-lifecycle event traces (PLACE / MODIFY / CANCEL / FILL) that satisfy MiFID II / Reg NMS-flavour sequencing rules using a CSP solver. | | [telco_network_recovery](./telco_network_recovery/) | Multi-reasoner template: equipment-failure GNN over a heterogeneous graph (with manufacturer advisories), declarative critical-tower rules, call-graph blast radius, and tower-upgrade optimization on a shared telco ontology. | | [test_data_generation](./test_data_generation/) | Determine optimal row counts for test database tables satisfying schema and referential integrity constraints. | +| [transaction_screening_local](./transaction_screening_local/) | Rules + query fraud-ring triage: structuring and large-sender flags, suspect classification, and one-hop investigation expansion via a relationship self-join. | | [traveling_salesman](./traveling_salesman/) | Find the shortest route visiting all cities exactly once using the MTZ formulation. | | [underwriting_audit](./underwriting_audit/) | Audit an underwriting ruleset against a catalog of properties. For each property, the solver either proves the property holds (PASS) or returns K distinct counterexample applicants who falsify it (FAIL). Multi-property batch audit, CSP solver in multi-solution mode. | | [warehouse_allocation](./warehouse_allocation/) | Allocate inventory across a distribution network using graph centrality, weakly-connected-components, and bridge-route detection to prioritize critical hubs. | diff --git a/v1/transaction_screening_local/README.md b/v1/transaction_screening_local/README.md new file mode 100644 index 0000000..1c5241a --- /dev/null +++ b/v1/transaction_screening_local/README.md @@ -0,0 +1,180 @@ +--- +title: "Transaction Screening (Local DuckDB)" +description: "Rules + query fraud-ring triage: structuring and large-sender flags, suspect classification, and one-hop investigation expansion via a relationship self-join." +featured: false +experience_level: beginner +industry: "Financial Services" +reasoning_types: + - Rules-based +tags: + - Rules-Based Reasoning + - Anti-Money-Laundering + - Local Development + - DuckDB + - Getting Started +--- + +## What this template is for + +Anti-money-laundering teams triage a transfer ledger to decide which accounts deserve a closer look. This template demonstrates that triage with **rules-based reasoning** in RelationalAI: it classifies accounts that move money just under reporting thresholds, flags large senders, and expands the investigation to everyone one hop away in the transfer network. + +It runs entirely on a local DuckDB database, so you can try the full ontology → rules → query workflow with nothing but a Python install. It is the local-development counterpart to the Snowflake-backed templates: the same modeling patterns, on an engine you can run anywhere. + +What runs locally vs. needs a Snowflake connection: + +| Used here (local DuckDB) | Needs a Snowflake connection | +| --- | --- | +| Data loading, querying (filter / join / aggregate / group) | Graph reasoner (`Graph()` — centrality, community, WCC) | +| Rules / logic (classification flags, chaining) | Optimization solve (`Problem`) | +| Relationship traversal (multi-hop self-joins, connectivity) | GNN training / inference | + +## Who this is for + +- Anyone who wants to try RelationalAI without provisioning Snowflake +- Developers prototyping an ontology, rules, and queries before pointing at production data +- Anyone learning the rules + relationship-traversal patterns on a small, legible dataset + +## What you'll build + +- An `Account` concept and a self-referential `transfers_to` relationship loaded from a CSV +- Declarative classification rules (`structuring`, `large sender`, `suspect`) using `define()` + `where()` +- A one-hop investigation expansion across the transfer network via a relationship self-join +- Queries that summarize the network and surface the accounts to review + +## What's included + +- **Model**: `Account`, the `transfers_to` relationship, and the classification + expansion rules +- **Runner**: a single Python script +- **Sample data**: a small transfer ledger with an embedded structuring ring +- **Outputs**: printed tables (network overview, per-account volume, suspects, counterparties, investigation set) + +## Prerequisites + +- Python 3.10+ +- `relationalai==1.8.1` (DuckDB ships with it) + +No Snowflake account, Native App, or `raiconfig.yaml` is required — the script builds an in-memory DuckDB config inline. (Local DuckDB execution relies on deploy mode, which the package currently flags as experimental.) + +## Quickstart + +1. Download the ZIP file for this template and extract it: + + ```bash + curl -O https://docs.relational.ai/templates/zips/v1/transaction_screening_local.zip + unzip transaction_screening_local.zip + cd transaction_screening_local + ``` + + > [!TIP] + > You can also download the template ZIP using the "Download ZIP" button at the top of this page. + +2. Create and activate a virtual environment: + + ```bash + python -m venv .venv + source .venv/bin/activate + python -m pip install -U pip + ``` + +3. Install dependencies: + + ```bash + python -m pip install . + ``` + +4. Run the template: + + ```bash + python transaction_screening_local.py + ``` + +## Template structure + +```text +transaction_screening_local/ +├── README.md +├── pyproject.toml +├── transaction_screening_local.py # model, rules, and queries +└── data/ + └── transactions.csv # sample transfer ledger +``` + +## Sample data + +`data/transactions.csv` is a transfer ledger with columns `id, src, dst, amount`. Accounts `C1001–C1004` transact ordinarily; `C2001–C2005` form a ring that cycles money in amounts just under the $10,000 reporting threshold (structuring), with one large $60,000 transfer from `C1001`. + +## Model overview + +The model derives accounts from both ends of every transfer, links them with a `transfers_to` relationship, then layers rules on top: + +- **`is_structuring`** — sent a transfer in the 9,000–10,000 band. +- **`is_large_sender`** — sent a transfer over 50,000. +- **`is_suspect`** — either of the above (defined as two rules, an OR). +- **`near_suspect`** — one hop from a suspect, in either direction. + +## How it works + +The local path is configured with four keys — a `duckdb` connection, `enable_model_deployment`, a model schema, and `auto_deploy`: + +```python +config = create_config( + connections={"local": DuckDBConnection(path=":memory:")}, # or a file path, e.g. "./dev.duckdb" + default_connection="local", + enable_model_deployment=True, + model={"schema": "main", "auto_deploy": True}, +) +``` + +The `transfers_to` relationship is built with explicit two-ref binding so each row links the correct source and destination accounts: + +```python +Account.transfers_to = model.Relationship(f"{Account} transfers to {Account:other}") +_src, _dst = Account.ref(), Account.ref() +model.where(_src.id == txn.src, _dst.id == txn.dst).define(_src.transfers_to(_dst)) +``` + +Rules are declarative derived Relationships; `is_suspect` chains on the flags below it: + +```python +Account.is_suspect = model.Relationship(f"{Account} is suspect") +model.where(Account.is_structuring()).define(Account.is_suspect()) +model.where(Account.is_large_sender()).define(Account.is_suspect()) +``` + +Connectivity ("who transacts with whom") comes from a self-join over `transfers_to`, not a graph reasoner: + +```python +_other = Account.ref() +model.where(Account.transfers_to(_other), _other.is_suspect()).define(Account.near_suspect()) +``` + +## Customize this template + +- Adjust `STRUCTURING_FLOOR`, `STRUCTURING_CEILING`, and `LARGE_TRANSFER` at the top of the script to match your thresholds. +- Replace `data/transactions.csv` with your own `id, src, dst, amount` ledger (or change the `read_csv_auto(...)` path). +- To move to production scale, point `model.Table(...)` at a Snowflake table instead of the DuckDB connection — the ontology, rules, and queries stay the same. + +## Troubleshooting + +
+Expected a fully-qualified table name with 3 parts + +DuckDB tables need a three-part name. Reference them as `memory..` (in-memory DuckDB defaults to the `memory` database). + + +
+A query falls back to a Snowflake path, or reads an empty model relation + +Make sure the config sets `enable_model_deployment=True` and `model={"auto_deploy": True}` so the model is routed to the DuckDB executor and materialized before queries. +
+ +
+Existing object ... is of type Table, trying to replace with type View + +DuckDB is case-insensitive, so a source table named like a concept collides with the installed view. Keep source tables in a schema (`raw`) separate from the model install schema (`main`). +
+ +## Related templates + +- [commercial_underwriting](../commercial_underwriting/) — rules-based eligibility and risk-tier classification on a hierarchical ontology. +- [fraud-detection](../fraud-detection/) — the full multi-reasoner fraud pipeline (Graph + Rules + Predictive + Prescriptive) on Snowflake. diff --git a/v1/transaction_screening_local/data/transactions.csv b/v1/transaction_screening_local/data/transactions.csv new file mode 100644 index 0000000..35dfa76 --- /dev/null +++ b/v1/transaction_screening_local/data/transactions.csv @@ -0,0 +1,11 @@ +id,src,dst,amount +1,C1001,C1002,5000 +2,C1002,C1003,12000 +3,C1003,C1004,8000 +4,C1001,C1003,60000 +10,C2001,C2002,9500 +11,C2002,C2003,9800 +12,C2003,C2004,9900 +13,C2004,C2005,9700 +14,C2005,C2001,9600 +15,C2001,C2003,9400 diff --git a/v1/transaction_screening_local/pyproject.toml b/v1/transaction_screening_local/pyproject.toml new file mode 100644 index 0000000..d1772a5 --- /dev/null +++ b/v1/transaction_screening_local/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools>=64", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "rai-template-transaction-screening-local" +version = "0.0.0" +description = "RelationalAI template: transaction_screening_local (PyRel v1, local DuckDB)" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "relationalai==1.8.1", +] + +[tool.setuptools] +packages = [] diff --git a/v1/transaction_screening_local/transaction_screening_local.py b/v1/transaction_screening_local/transaction_screening_local.py new file mode 100644 index 0000000..68e755b --- /dev/null +++ b/v1/transaction_screening_local/transaction_screening_local.py @@ -0,0 +1,130 @@ +"""Transaction Screening (rules-based reasoning, local DuckDB) template. + +This script demonstrates an anti-money-laundering triage in RelationalAI, +running entirely against an in-memory DuckDB database — no Snowflake account +or Native App required: + +- Load a transfer ledger into DuckDB and model accounts over it. +- Define a self-referential `transfers_to` relationship between accounts. +- Author declarative rules as derived Relationships using `define()` + `where()`: + structuring (transfers just under the reporting threshold), large sender, + and a combined suspect flag. +- Expand a one-hop investigation set across the transfer network with a + relationship self-join. + +The whole pipeline is declarative — PyRel resolves dependencies automatically. + +Run: + `python transaction_screening_local.py` + +Output: + Prints the network overview, per-account sent volume, the suspect accounts, + each suspect's transfer counterparties, and the full investigation set. +""" + +from pathlib import Path + +import relationalai.semantics as rai +from relationalai.config import DuckDBConnection, create_config +from relationalai.semantics import String +from relationalai.semantics.std import aggregates as aggs + +# -------------------------------------------------- +# Configure inputs +# -------------------------------------------------- +DATA_DIR = Path(__file__).parent / "data" + +# Transfers just under the 10k reporting threshold count as structuring. +STRUCTURING_FLOOR, STRUCTURING_CEILING = 9000.0, 10000.0 +LARGE_TRANSFER = 50000.0 + +# Local DuckDB config (no Snowflake). Four keys unlock the local path: a duckdb +# connection, enable_model_deployment, a model schema, and auto_deploy. +config = create_config( + connections={"local": DuckDBConnection(path=":memory:")}, # or a file path, e.g. "./dev.duckdb" + default_connection="local", + enable_model_deployment=True, + model={"schema": "main", "auto_deploy": True}, +) + +# -------------------------------------------------- +# Define semantic model & load data +# -------------------------------------------------- +# Load the ledger straight into DuckDB. Keep source data in a schema (`raw`) +# separate from the model install schema (`main`) — DuckDB is case-insensitive, +# so a source table named like a concept would collide with the installed view. +session = config.get_connection(DuckDBConnection).get_session() +session.execute("CREATE SCHEMA IF NOT EXISTS raw") +session.execute( + f"CREATE OR REPLACE TABLE raw.txn AS SELECT * FROM read_csv_auto('{DATA_DIR / 'transactions.csv'}')" +) + +model = rai.Model("transaction_screening_local", config=config) +txn = model.Table("memory.raw.txn") # 3-part FQN: ..
+ +# Account concept: a party that sends or receives transfers. +Account = model.Concept("Account", identify_by={"id": String}) +model.define(Account.new(id=txn.src), Account.new(id=txn.dst)) + +# transfers_to relationship: built with explicit two-ref binding. (The +# filter_by(id=col).rel(filter_by(id=col2)) shortcut produces self-loops here.) +Account.transfers_to = model.Relationship(f"{Account} transfers to {Account:other}") +_src, _dst = Account.ref(), Account.ref() +model.where(_src.id == txn.src, _dst.id == txn.dst).define(_src.transfers_to(_dst)) + +# -------------------------------------------------- +# Stage 1: account classification rules +# -------------------------------------------------- +# Structuring: sent a transfer in the just-under-threshold band. +Account.is_structuring = model.Relationship(f"{Account} is structuring") +model.where( + txn.src == Account.id, + txn.amount >= STRUCTURING_FLOOR, + txn.amount < STRUCTURING_CEILING, +).define(Account.is_structuring()) + +# Large sender: sent a transfer above the large-transfer threshold. +Account.is_large_sender = model.Relationship(f"{Account} is large sender") +model.where(txn.src == Account.id, txn.amount > LARGE_TRANSFER).define(Account.is_large_sender()) + +# Suspect: structuring OR large sender (OR via two definitions). +Account.is_suspect = model.Relationship(f"{Account} is suspect") +model.where(Account.is_structuring()).define(Account.is_suspect()) +model.where(Account.is_large_sender()).define(Account.is_suspect()) + +# -------------------------------------------------- +# Stage 2: investigation expansion (one hop from a suspect) +# -------------------------------------------------- +Account.near_suspect = model.Relationship(f"{Account} near suspect") +_other = Account.ref() +model.where(Account.transfers_to(_other), _other.is_suspect()).define(Account.near_suspect()) +model.where(_other.transfers_to(Account), _other.is_suspect()).define(Account.near_suspect()) + +# -------------------------------------------------- +# Results +# -------------------------------------------------- +print("== Network overview ==") +model.select( + aggs.count(txn.id).alias("transactions"), + aggs.sum(txn.amount).alias("total_moved"), +).inspect() + +print("\n== Sent volume per account ==") +model.where(txn.src == Account.id).select( + Account.id.alias("account"), + aggs.sum(txn.amount).per(Account).alias("total_sent"), + aggs.count(txn.id).per(Account).alias("sent_count"), +).inspect() + +print("\n== Suspect accounts (rules) ==") +model.where(Account.is_suspect()).select(Account.id.alias("suspect")).inspect() + +print("\n== Suspect -> counterparty (relationship self-join) ==") +_counterparty = Account.ref() +model.where(Account.is_suspect(), Account.transfers_to(_counterparty)).select( + Account.id.alias("suspect"), + _counterparty.id.alias("counterparty"), +).inspect() + +print("\n== Investigation set (suspect or one hop from a suspect) ==") +model.where(Account.near_suspect()).select(Account.id.alias("flagged_for_review")).inspect()