From fbefafe13af677edc9ac9cbc32ec3f025c960285 Mon Sep 17 00:00:00 2001
From: mrrajan <86094767+mrrajan@users.noreply.github.com.>
Date: Tue, 19 May 2026 20:55:51 +0530
Subject: [PATCH 1/2] docs: Update CONVENTIONS.md file

- Add Addional conventions to the CONVENTIONS.md file with reference to ADR 00018

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CONVENTIONS.md | 440 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 437 insertions(+), 3 deletions(-)

diff --git a/CONVENTIONS.md b/CONVENTIONS.md
index 8420a65c3..d04fb12f7 100644
--- a/CONVENTIONS.md
+++ b/CONVENTIONS.md
@@ -359,9 +359,14 @@ error level to `INFO`:
 
 ### Tracing spans on sub-operations
 
-When a function performs multiple logical steps (e.g., loading data, processing, persisting),
-wrap each step with a `tracing::instrument` span to make profiling and debugging easier.
-Only use this for calls that lack their own `#[instrument]` attribute:
+When a function performs multiple external I/O steps (e.g., several DB loads), wrap each
+step with a `.instrument(info_span!(...))` span to make profiling and debugging easier.
+Only apply this to calls that (a) perform external I/O and (b) lack their own `#[instrument]`
+attribute. Do not add `.instrument` at the call site when the **callee** already has
+`#[instrument]` (see the example under [`#[instrument]` attribute conventions](#instrument-attribute-conventions)).
+Nested spans under an instrumented parent are fine for **distinct** external steps (e.g.,
+multiple SeaORM loads in one function). See
+[Instrumentation scope](#instrumentation-scope) for full scoping rules.
 
 ```rust
 use tracing::{info_span, Instrument};
@@ -380,6 +385,38 @@ let statuses = load_purl_statuses(&base_ids, &txn)
 This splits up large functions into measurable chunks and makes it easier to identify
 performance bottlenecks.
 
+### Instrumentation scope
+
+Scope instrumentation to external work — instrument where it matters, not on every `pub`
+method by default.
+
+- **Do instrument** — Logic that calls external systems (database, object storage, HTTP,
+  message queues). Use `#[instrument]` on the function, or `.instrument(info_span!(...))` on
+  individual awaits when inner helpers lack instrumentation.
+- **Do not instrument** — Pure in-memory work that is trivial in context (simple mapping,
+  formatting, small transforms).
+- **Do not double-wrap** — Pick one span per operation: either the outer function or an
+  inner `.instrument` on the external call, not both for the same work.
+- **Scope by logic, not visibility** — Apply spans where external I/O happens, including
+  non-`pub` helpers, not "every public method regardless of what it does."
+
+### Logging framework
+
+Use `tracing::` for all new code; migrate `log::` when touching a file.
+
+`log::*` calls inside an `#[instrument]`-annotated function do not attach to the tracing
+span, creating gaps in distributed traces. All new code MUST use `tracing::` macros. When
+modifying a file that uses `log::`, migrate its `log::` calls to `tracing::` in the same
+change.
+
+```rust
+// Approved
+tracing::debug!("Processing vulnerability {id}");
+
+// Avoid in new code
+log::debug!("Processing vulnerability {id}");
+```
+
 ## Shared Table Insert Pattern (Duplicate Key Handling)
 
 When inserting into a table that has unique constraints and is shared across multiple
@@ -436,3 +473,400 @@ including **ingestor**, **advisory**, **sbom**, and **risk_assessment**. All cod
 that insert into `source_document` must use the nested-transaction duplicate-handling
 pattern described above. Failing to do so will cause unhandled constraint violations
 under concurrent ingestion.
+
+## Additional Conventions
+
+The following anti-patterns were identified in
+[ADR-00018](docs/adrs/00018-conventions-file.md); each entry states the agreed convention.
+
+### N+1 Query Anti-pattern
+
+**Convention**: Prefer batch loading; allow exceptions when impractical.
+
+Default to batch loading on collection paths — use SQL JOINs, `.is_in()`, or SeaORM
+`load_one`/`load_many` to fetch related data in a single query. Per-entity DB calls inside
+loops are permitted only when batching is impractical or costly. Existing N+1 patterns are
+tech debt to remediate.
+
+```rust
+// Approved: batch load with IN clause
+let vulns = advisory_vulnerability::Entity::find()
+    .filter(advisory_vulnerability::Column::AdvisoryId.is_in(advisory_ids))
+    .all(tx)
+    .await?;
+
+// Approved: SeaORM batch loader
+let orgs = advisories.load_one(organization::Entity, tx).await?;
+```
+
+> **Note**: Loop-based queries for PostgreSQL's 65535 bind-parameter limit are valid
+> patterns, not N+1 issues — see
+> [Query parameter limits and chunking](#query-parameter-limits-and-chunking).
+
+### Unbounded Queries
+
+**Convention**: Public API list endpoints must be paginated; internal queries may be
+unbounded if the caller controls scope.
+
+All public API list endpoints MUST use `PaginatedResults` (see
+[Endpoint Patterns](#endpoint-patterns)) and enforce a maximum pagination limit
+([ADR-00017](docs/adrs/00017-efficient-pagination.md)). Internal service queries may be
+unbounded if the caller controls scope and the table is known to be bounded in practice
+(e.g., admin-configured importers).
+
+### In-Memory Filtering Instead of SQL WHERE
+
+**Convention (provisional)**: Push filters to SQL when the unfiltered dataset can exceed
+~100 rows; small known-bounded collections may filter in Rust for readability.
+Unbounded fetch followed by in-memory filter is not an exception to this rule.
+_(Provisional — see [ADR-00018](docs/adrs/00018-conventions-file.md) for options under
+review.)_
+
+```rust
+// Preferred: filter in SQL
+advisory_vulnerability_score::Entity::find()
+    .filter(advisory_vulnerability_score::Column::AdvisoryId.eq(advisory_id))
+    .all(tx)
+    .await?;
+
+// Avoid: fetch-then-filter
+let all_scores = advisory_vulnerability_score::Entity::find().all(tx).await?;
+let filtered: Vec<_> = all_scores.into_iter()
+    .filter(|s| s.advisory_id == advisory_id)
+    .collect();
+```
+
+### Application-Side Counting
+
+**Convention**: Default to SQL `COUNT()`; `.len()` is acceptable if rows are already loaded.
+
+Counts MUST be computed in the database using `COUNT()`, not by materializing rows and
+calling `.len()`. Exception: if the full collection is already materialized for another
+purpose in the same scope, `.len()` is acceptable to avoid a redundant query. For paginated
+endpoints, the `total` count is optional and cached per
+[ADR-00017](docs/adrs/00017-efficient-pagination.md).
+
+```rust
+// Approved: SQL count
+let total = entity::Entity::find()
+    .filter(condition)
+    .count(tx)
+    .await?;
+
+// Avoid (for count-only purposes):
+let items = entity::Entity::find().filter(condition).all(tx).await?;
+let total = items.len();
+```
+
+### Missing Batch/Bulk Operations
+
+**Convention**: Use bulk operations for multiple items; single operations for single-item
+paths.
+
+Use `insert_many`, `delete_many`, and similar bulk operations when handling multiple entries
+in one operation. When the API or code path handles a single item only (e.g.,
+`DELETE /resource/{id}`), a single `delete_by_id` is acceptable.
+
+```rust
+// Approved: batch delete
+source_document::Entity::delete_many()
+    .filter(source_document::Column::Id.is_in(doc_ids))
+    .exec(tx)
+    .await?;
+
+// Avoid: individual deletes in loop
+for doc in &docs {
+    source_document::Entity::delete_by_id(doc).exec(tx).await?;
+}
+```
+
+### Recursive Graph Traversal Without Depth Limits
+
+**Convention**: Validate on the way in; error if you cannot handle it.
+
+When data arrives, confirm it can be processed (size, shape, depth, etc.). After validation,
+treat it as safe to walk. If something goes wrong during traversal, return an error — do not
+quietly skip parts of the data.
+
+```rust
+// Approved: validate input, then traverse; propagate errors
+fn ingest_document(doc: &Document) -> Result<(), Error> {
+    validate_depth(doc, MAX_DEPTH)?;
+    walk_tree(doc.root())
+}
+
+fn walk_tree(node: &Node) -> Result<(), Error> {
+    process(node)?;
+    for child in &node.children {
+        walk_tree(child)?;  // errors propagate, never silently skipped
+    }
+    Ok(())
+}
+```
+
+### Missing Database Indexes
+
+**Convention**: Evaluate per-column with a pros/cons analysis.
+
+No single rule for every column. Before adding (or skipping) an index, weigh these factors:
+
+| For adding an index | Against adding an index |
+|---------------------|-------------------------|
+| Column is filtered or sorted often in production | Table is small or query volume is low |
+| Slow queries or full scans show up in logs | Writes are heavy; extra indexes slow inserts/updates |
+| Public API / user-facing latency matters | Existing indexes already cover the access pattern |
+| Table is large and still growing | Index would duplicate a UNIQUE constraint |
+
+Record the decision in the PR or migration comment when it is not obvious. For migration-side
+conventions (naming, `IF NOT EXISTS`, index types), see [Database Indexes](#database-indexes).
+
+### Swallowed Errors
+
+**Convention**: Handle by context — no one-size rule.
+
+Choose what to do based on the situation:
+
+1. **Instrumentation wrapper exists** — If tracing/metrics will record the failure, no extra
+   logging or handling is required.
+2. **Return to the caller** — If the API or UI can surface the error, return or map it. Do
+   not also log the same failure redundantly.
+3. **Propagate** — If the error stops processing required data, propagate it (`?`,
+   `return Err(...)`) — do not swallow it.
+4. **Expected failure** — If failure is normal for this path (e.g., optimistic lock miss,
+   optional field missing), ignoring it is fine; no log required.
+5. **Log before dropping** — If you must drop the error and cannot report it upstream, log
+   it compactly (e.g., `.inspect_err(|e| tracing::debug!("…: {e}"))`) rather than a verbose
+   `match`.
+
+```rust
+// Approved: log the error before discarding
+match serde_json::from_value::<Report>(report) {
+    Ok(r) => Some(r),
+    Err(e) => {
+        tracing::warn!("Failed to deserialize report: {e}");
+        None
+    }
+}
+
+// Avoid: silent discard
+serde_json::from_value(report).ok()
+```
+
+### Stringly-Typed APIs
+
+**Convention**: Use enums for fixed value sets.
+
+Any value drawn from a fixed set (statuses, directions, relationship types) MUST be
+represented as a Rust enum, not a `String`. The enum is the single source of truth — string
+conversion happens only at serialization boundaries (API input/output, database columns).
+String matching against known values is prohibited. Exception:
+framework-constrained signatures (e.g., `Columns::translator` callback requiring `&str`)
+are exempt until the framework supports typed alternatives.
+
+```rust
+// Approved: enum with serde
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+enum VexStatus { Affected, Fixed, NotAffected, UnderInvestigation, Recommended }
+
+// Avoid: string matching
+match status_string.as_str() {
+    "affected" => ...,
+    "fixed" => ...,
+}
+```
+
+### Code Duplication
+
+**Convention**: Extract shared logic into generics or traits.
+
+When two or more modules implement the same logic pattern (e.g., label CRUD, license
+filtering), extract it into a shared generic function, trait, or macro parameterized by the
+entity type. Duplicated logic blocks longer than ~20 lines should be consolidated.
+
+```rust
+// Approved: generic label service
+async fn set_labels<E: EntityTrait>(
+    entity_id: Uuid,
+    labels: Labels,
+    connection: &impl ConnectionTrait,
+) -> Result<(), Error> { ... }
+```
+
+### Oversized Functions
+
+**Convention**: Decompose functions with more than 3 distinct phases.
+
+No hard line limit, but functions with more than 3 distinct phases (identifiable by
+blank-line-separated blocks or section comments) should be decomposed. Use judgment — a
+150-line function with a single clear flow is better than 5 artificial 30-line helpers.
+
+### Public API Documentation
+
+**Convention**: Document all public items.
+
+Every public struct, enum, trait, function, and method MUST have a `///` doc comment. One
+line describing what it does is sufficient. Service methods should document their parameters,
+error conditions, and return value semantics when non-obvious. Inside functions, add brief
+`//` comments only where the logic is not obvious from the code.
+
+### Magic Numbers and Hardcoded Values
+
+**Convention**: Use judgment — prefer named constants when they clarify meaning.
+
+- **Inline in SQL strings** — e.g., `relationship = 13` in raw SQL: prefer leaving the
+  literal in the SQL with a short comment explaining which enum/migration it matches, rather
+  than interpolating a `const` with `format!`.
+- **Single-use imports** — keep the value in the same file for readability unless it is
+  genuinely reused elsewhere.
+
+Where this file documents a specific prescriptive pattern (e.g.,
+[Shared Table Insert Pattern](#shared-table-insert-pattern-duplicate-key-handling)), that
+documented pattern takes precedence over the general "use judgment" rule.
+
+### Raw SQL Defeating Parameterization
+
+**Convention**: Allow with review gate.
+
+`Statement::from_string` with dynamic content is allowed but must be flagged with a
+`// SAFETY:` comment explaining why parameterization is not possible and confirming that all
+interpolated values are validated. Such code requires explicit reviewer approval.
+
+```rust
+// Approved: static SQL with no dynamic values
+Statement::from_string(DbBackend::Postgres, STATIC_QUERY.to_string())
+
+// Approved with review gate: dynamic content with SAFETY comment
+// SAFETY: `schema_name` is validated against an allowlist in `validate_schema()`.
+// Parameterization is not possible because PostgreSQL does not support bind
+// parameters for schema identifiers.
+let query = format!("SELECT * FROM {schema_name}.entity WHERE id = $1");
+Statement::from_sql_and_values(DbBackend::Postgres, &query, [id.into()])
+```
+
+### Database Resource Conventions
+
+The following sub-conventions cover database schema patterns. They complement the existing
+[Migration Patterns](#migration-patterns) and
+[Entity Model Patterns](#entity-model-patterns) sections.
+
+#### Table and Column Naming
+
+All database object names use `snake_case` — see
+[Naming Conventions](#naming-conventions) for the general rule. Tables use singular nouns
+(e.g., `sbom`, `advisory_vulnerability`). Join tables use `<left>_<right>` or a domain term.
+Avoid CamelCase, pluralized table names, or abbreviated column names (e.g., `vulnId`,
+`Advisories`).
+
+#### Column Types
+
+Use the narrowest correct PostgreSQL type:
+
+| Data | PostgreSQL Type | Rust / SeaORM |
+|------|----------------|---------------|
+| Primary keys (default) | `UUID` | `Uuid` with `#[sea_orm(primary_key)]` |
+| Primary keys (domain id) | `VARCHAR` / `TEXT` | `String` with `#[sea_orm(primary_key)]` |
+| Foreign keys | Same type as referenced PK | `Uuid` / `String` |
+| Timestamps | `TIMESTAMP WITH TIME ZONE` | `OffsetDateTime` / `Option<OffsetDateTime>` |
+| Free text | `TEXT` | `String` / `Option<String>` |
+| Structured data | `JSONB` | `serde_json::Value` with `#[sea_orm(column_type = "JsonBinary")]` |
+| Booleans | `BOOLEAN` | `bool` — use `NOT NULL DEFAULT false`; avoid nullable booleans |
+| Scores / metrics | `DOUBLE PRECISION` or `REAL` | `f64` / `f32` |
+| Integer values | `INTEGER` | `i32` — for integer-backed enums, importer state, discriminators |
+| Fixed value sets | PostgreSQL `ENUM` | `DeriveActiveEnum` (see [Enums](#enums)) |
+
+Avoid using `String` for a column that holds values from a fixed set, using `TIMESTAMP`
+without time zone, or using `INTEGER` for a primary key without a domain reason.
+
+#### Enums
+
+Store fixed value sets as a typed construct — three patterns, pick the one that fits:
+
+| Pattern | When to use | Example |
+|---------|-------------|---------|
+| **PostgreSQL ENUM** | Small, rarely-changing string set | `score_type`, `cvss3_severity` |
+| **Integer-backed `DeriveActiveEnum`** | Performance-sensitive or Rust-managed sets | `relationship` |
+| **Lookup table** | Set that grows over time or carries metadata | `version_scheme`, `status` |
+
+```rust
+// PostgreSQL ENUM mapping
+#[derive(Debug, Copy, Clone, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "String", db_type = "Enum", enum_name = "score_type")]
+pub enum ScoreType {
+    #[sea_orm(string_value = "2.0")]
+    V2_0,
+    #[sea_orm(string_value = "3.1")]
+    V3_1,
+}
+
+// Integer-backed enum
+#[derive(Debug, Copy, Clone, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "i32", db_type = "Integer")]
+pub enum Relationship {
+    #[sea_orm(num_value = 0)]
+    Contains,
+    #[sea_orm(num_value = 13)]
+    Describes,
+}
+```
+
+Enum type names use `snake_case` (e.g., `cvss3_severity`). Variant string values use the
+domain's canonical representation (e.g., `"3.1"` for CVSS version).
+
+#### Database Indexes
+
+For idempotency and type guidance, see [Migration Patterns](#migration-patterns)
+(`.if_not_exists()`). For when to add an index, see [Missing Database Indexes](#missing-database-indexes).
+
+Additional migration-side conventions:
+
+- **B-tree** is the default for equality and range queries on scalar columns.
+- **GIN** for JSONB containment queries (`@>`), `LIKE`/trigram searches, and array columns.
+- **Composite indexes**: create when queries frequently filter on multiple columns together;
+  column order follows query selectivity (most selective first).
+- **Naming (provisional)**: Match the dominant naming pattern in the migration file you're
+  editing. The team is deciding between `<table>_<col(s)>_idx` suffix (dominant in early
+  migrations) and `idx_<table>_<col(s)>` prefix (dominant in newer migrations).
+  _(Provisional — see [ADR-00018](docs/adrs/00018-conventions-file.md). This stopgap must be
+  replaced with a single naming convention once the team decides.)_
+
+#### Foreign Keys and Constraints
+
+- Every column referencing another table MUST have an explicit `FOREIGN KEY` constraint.
+- Choose `ON DELETE` deliberately: `CASCADE` for child-lifecycle-tied-to-parent (e.g.,
+  `sbom_node` → `sbom`), `RESTRICT` for prevent-delete-if-children-exist, `SET NULL` for
+  nullable optional references.
+- Add `UNIQUE` constraints for natural keys or business identifiers (e.g.,
+  `advisory.identifier`).
+- Columns are `NOT NULL` by default; use `NULL` only when absence is a valid domain state.
+- `ON UPDATE` is not used — primary keys (`UUID` and domain-natural `String`) are immutable.
+
+```rust
+// Explicit foreign key with cascade
+Table::create()
+    .table(SbomAi::Table)
+    .col(ColumnDef::new(SbomAi::SbomId).uuid().not_null())
+    .foreign_key(
+        ForeignKey::create()
+            .from_col(SbomAi::SbomId)
+            .to(Sbom::Table, Sbom::SbomId)
+            .on_delete(ForeignKeyAction::Cascade),
+    )
+```
+
+#### Migrations
+
+See [Migration Patterns](#migration-patterns) for the core conventions (SeaORM framework,
+idempotency guards, raw SQL loading, data migration separation). Additional conventions:
+
+- **Numbering**: increment by 10 (e.g., `m0002190` → `m0002200`) to leave room for
+  insertions.
+- **Reversibility**: implement both `up()` and `down()`.
+- **Schema vs. data**: schema migrations registered with `.normal()`; data backfills
+  registered with `.data()`. Prefer no data in schema migrations — separate schema changes
+  from data changes.
+
+## References
+
+- [ADR-00018: Conventions File and Performance Anti-Pattern Standards](docs/adrs/00018-conventions-file.md) — full anti-pattern analysis with occurrence lists, option rationale, and open decisions
+- [Logging and Tracing Design](docs/design/log_tracing.md) — rationale for observability conventions

From 54c48b0632c2f7e10081d0d4fbf160b976f550b8 Mon Sep 17 00:00:00 2001
From: mrrajan <86094767+mrrajan@users.noreply.github.com.>
Date: Thu, 21 May 2026 13:18:36 +0530
Subject: [PATCH 2/2] docs: Address review feedback and add Creator pattern
 convention

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CONVENTIONS.md | 105 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 21 deletions(-)

diff --git a/CONVENTIONS.md b/CONVENTIONS.md
index d04fb12f7..3472287a0 100644
--- a/CONVENTIONS.md
+++ b/CONVENTIONS.md
@@ -417,6 +417,67 @@ tracing::debug!("Processing vulnerability {id}");
 log::debug!("Processing vulnerability {id}");
 ```
 
+## Data Ingestion: Creator Pattern
+
+**Convention**: Use the `*Creator` batch pattern for data ingestion; do not use the
+deprecated `*Context` per-entity pattern.
+
+### Creator pattern (required for new code)
+
+A `*Creator` struct accumulates entries in memory (via `.add()`), then writes them all in
+a single `.create()` call using `INSERT ... ON CONFLICT DO NOTHING` with batch chunking.
+This approach is concurrency-safe, avoids N+1 queries, and uses consistent lock ordering
+to prevent deadlocks.
+
+Key characteristics:
+- **Batch-oriented** — collects entries in a deduplicating collection, then inserts in bulk
+- **Consuming** — `.create(self, ...)` takes ownership; the creator is single-use
+- **Deduplication** — entries are keyed by deterministic UUID or natural key before insert
+- **Atomic** — uses `ON CONFLICT DO NOTHING` so concurrent transactions cannot conflict
+- **Lock-safe** — sorted/chunked inserts where lock ordering matters (e.g.,
+  `OrganizationCreator` sorts by name; others rely on `BTreeMap` key order or the batch
+  helper's own ordering)
+
+```rust
+// Approved: Creator pattern — batch accumulation then bulk insert
+let mut purl_creator = PurlCreator::new();
+for purl in &purls {
+    purl_creator.add(purl.clone());
+}
+purl_creator.create(&connection).await?;
+```
+
+Examples include `PurlCreator`, `PurlStatusCreator`, `OrganizationCreator`,
+`VulnerabilityCreator`, `PackageCreator`, `RelationshipCreator`, `LicenseCreator`,
+`CpeCreator`, and `ScoreCreator` — see `modules/ingestor/src/graph/**/creator.rs` for the
+full set. When adding ingestion for a new entity type, follow this pattern.
+
+Use creators for batch ingest; use the
+[Shared Table Insert Pattern](#shared-table-insert-pattern-duplicate-key-handling) when you
+need the existing row ID back on conflict (e.g., `create_doc`).
+
+### Context pattern (deprecated)
+
+The `*Context` structs (e.g., `ProductContext`, `OrganizationContext`) wrap a single loaded
+entity model with a reference to the parent `Graph` and perform immediate per-entity
+database operations via methods like `ingest_*()`. This design causes:
+
+- **N+1 queries** — each method call triggers individual DB round-trips inside loops
+- **Race conditions** — SELECT-then-INSERT is not atomic; concurrent ingestion of the same
+  entity causes unique-constraint violations
+- **Mixed responsibility** — combines entity representation with ingestion logic
+
+Remaining `*Context` usage is tech debt. Do not extend or add new `*Context` structs. Not
+every entity has a `*Creator` yet, so when modifying code that uses a `*Context`, prefer
+migrating it to the corresponding `*Creator` in the same change when practical — creating
+a new `*Creator` if needed.
+
+```rust
+// Avoid: Context pattern — per-entity immediate insert
+let org_ctx = graph.ingest_organization(name, info, &connection).await?;
+let product_ctx = org_ctx.ingest_product(name, &connection).await?;
+```
+
 ## Shared Table Insert Pattern (Duplicate Key Handling)
 
 When inserting into a table that has unique constraints and is shared across multiple
@@ -476,9 +537,6 @@ under concurrent ingestion.
 
 ## Additional Conventions
 
-The following anti-patterns were identified in
-[ADR-00018](docs/adrs/00018-conventions-file.md); each entry states the agreed convention.
-
 ### N+1 Query Anti-pattern
 
 **Convention**: Prefer batch loading; allow exceptions when impractical.
@@ -516,11 +574,13 @@ unbounded if the caller controls scope and the table is known to be bounded in p
 
 ### In-Memory Filtering Instead of SQL WHERE
 
-**Convention (provisional)**: Push filters to SQL when the unfiltered dataset can exceed
-~100 rows; small known-bounded collections may filter in Rust for readability.
-Unbounded fetch followed by in-memory filter is not an exception to this rule.
-_(Provisional — see [ADR-00018](docs/adrs/00018-conventions-file.md) for options under
-review.)_
+**Convention**: Push filters to SQL; filter in Rust only when the rows are already loaded
+for another purpose in the same scope.
+
+PostgreSQL's query planner handles small datasets efficiently, so "the table is small" is
+not a reason to filter in application code. Filtering in Rust is acceptable when doing so
+eliminates a duplicate query — i.e., the full result set is already materialized for another
+purpose and a second filtered query would be redundant.
 
 ```rust
 // Preferred: filter in SQL
@@ -529,7 +589,13 @@ advisory_vulnerability_score::Entity::find()
     .all(tx)
     .await?;
 
-// Avoid: fetch-then-filter
+// Acceptable: rows already loaded for another purpose, filter avoids a duplicate query
+let all_scores = load_all_scores(tx).await?; // needed elsewhere in this scope
+let filtered: Vec<_> = all_scores.iter()
+    .filter(|s| s.advisory_id == advisory_id)
+    .collect();
+
+// Avoid: fetch-all then filter as the primary access pattern
 let all_scores = advisory_vulnerability_score::Entity::find().all(tx).await?;
 let filtered: Vec<_> = all_scores.into_iter()
     .filter(|s| s.advisory_id == advisory_id)
@@ -616,6 +682,7 @@ No single rule for every column. Before adding (or skipping) an index, weigh the
 | Slow queries or full scans show up in logs | Writes are heavy; extra indexes slow inserts/updates |
 | Public API / user-facing latency matters | Existing indexes already cover the access pattern |
 | Table is large and still growing | Index would duplicate a UNIQUE constraint |
+| Storage cost justifies the overhead | Index storage overhead is significant on large tables |
 
 Record the decision in the PR or migration comment when it is not obvious. For migration-side
 conventions (naming, `IF NOT EXISTS`, index types), see [Database Indexes](#database-indexes).
@@ -639,14 +706,10 @@ Choose what to do based on the situation:
    `match`.
 
 ```rust
-// Approved: log the error before discarding
-match serde_json::from_value::<Report>(report) {
-    Ok(r) => Some(r),
-    Err(e) => {
-        tracing::warn!("Failed to deserialize report: {e}");
-        None
-    }
-}
+// Approved: compact log before discarding
+serde_json::from_value::<Report>(report)
+    .inspect_err(|e| tracing::warn!("Failed to deserialize report: {e}"))
+    .ok()
 
 // Avoid: silent discard
 serde_json::from_value(report).ok()
@@ -764,7 +827,8 @@ Use the narrowest correct PostgreSQL type:
 
 | Data | PostgreSQL Type | Rust / SeaORM |
 |------|----------------|---------------|
-| Primary keys (default) | `UUID` | `Uuid` with `#[sea_orm(primary_key)]` |
+| Primary keys (random) | `UUID` (v7) | `Uuid` with `#[sea_orm(primary_key)]` — v7 has better B-tree locality than v4 |
+| Primary keys (content-derived) | `UUID` (v5) | `Uuid` with `#[sea_orm(primary_key)]` — enables upsert patterns by pre-generating the ID |
 | Primary keys (domain id) | `VARCHAR` / `TEXT` | `String` with `#[sea_orm(primary_key)]` |
 | Foreign keys | Same type as referenced PK | `Uuid` / `String` |
 | Timestamps | `TIMESTAMP WITH TIME ZONE` | `OffsetDateTime` / `Option<OffsetDateTime>` |
@@ -827,8 +891,8 @@ Additional migration-side conventions:
 - **Naming (provisional)**: Match the dominant naming pattern in the migration file you're
   editing. The team is deciding between `<table>_<col(s)>_idx` suffix (dominant in early
   migrations) and `idx_<table>_<col(s)>` prefix (dominant in newer migrations).
-  _(Provisional — see [ADR-00018](docs/adrs/00018-conventions-file.md). This stopgap must be
-  replaced with a single naming convention once the team decides.)_
+  _(Provisional — this stopgap must be replaced with a single naming convention once the
+  team decides.)_
 
 #### Foreign Keys and Constraints
 
@@ -868,5 +932,4 @@ idempotency guards, raw SQL loading, data migration separation). Additional conv
 
 ## References
 
-- [ADR-00018: Conventions File and Performance Anti-Pattern Standards](docs/adrs/00018-conventions-file.md) — full anti-pattern analysis with occurrence lists, option rationale, and open decisions
 - [Logging and Tracing Design](docs/design/log_tracing.md) — rationale for observability conventions