diff --git a/docs/2026-05-07-perf-investigation.md b/docs/2026-05-07-perf-investigation.md index e3315ff..39fab22 100644 --- a/docs/2026-05-07-perf-investigation.md +++ b/docs/2026-05-07-perf-investigation.md @@ -145,7 +145,7 @@ Three findings: Top suspects for the 45× gap (from the dhat profile): -- Redundant `row_cache: HashMap` + `row_versions_cache: HashMap` + `pk_hash: HashMap` next to the `OrdMap` source-of-truth +- Historical redundant `row_cache: HashMap` + `row_versions_cache: HashMap` + `pk_hash: HashMap` next to the `OrdMap` source-of-truth. These caches have since been removed; re-run `examples/ram_probe.rs` to quantify the remaining gap. - `im::OrdMap` HAMT/btree node Arc overhead - `version_store` retaining historical snapshots (default `max_versions: 1024`, `min_version_age_ms: 5000`) - Allocator fragmentation @@ -154,7 +154,7 @@ Top suspects for the 45× gap (from the dhat profile): ### Cheap (days) -1. **Drop redundant caches** in `TableData` (`row_cache`, `row_versions_cache`, `pk_hash`). 1–2 days. Probably halves table RAM, cuts ~3 of 4 HAMT clones per commit. Re-measure read latency to confirm acceptable regression. **The dhat profile is the evidence; the bench harness is ready to verify.** +1. **Re-measure table RAM after redundant cache removal.** `TableData` now keeps rows and row versions only in the canonical `OrdMap`s. Re-run the RSS probe and read-latency benches so the next storage decision is based on current data. 2. **Make the counter honest.** Either multiply by an empirical structural factor or replace `max_memory_estimate_bytes` with an actual RSS-based check via `/proc/self/statm`. ~½ day. Otherwise users keep getting bitten by the 45× gap. 3. **Tune `version_store` retention.** Default `max_versions: 1024` is generous. 64–128 is plenty for most workloads. Hours of testing, no code change. diff --git a/src/commit/executor/internals.rs b/src/commit/executor/internals.rs index 7c337fb..c6fb956 100644 --- a/src/commit/executor/internals.rs +++ b/src/commit/executor/internals.rs @@ -3239,8 +3239,6 @@ fn merge_parallel_namespace_result( }; let mut added: usize = 0; let mut removed: usize = 0; - let use_point_lookup_cache = - keyspace.primary_index_backend == crate::config::PrimaryIndexBackend::ArtExperimental; let dest = keyspace.namespace_mut(ns_id.clone()); let mut table_names: Vec<_> = targets.tables.iter().cloned().collect(); table_names.sort(); @@ -3275,30 +3273,18 @@ fn merge_parallel_namespace_result( Some(row) => { added = added.saturating_add(row_mem_cost(row)); dest_table.rows.insert(row_key.clone(), row.clone()); - if use_point_lookup_cache { - dest_table.row_cache.insert(row_key.clone(), row.clone()); - dest_table.pk_hash.insert(row_key.clone(), ()); - } } None => { dest_table.rows.remove(row_key); - dest_table.row_cache.remove(row_key); - dest_table.pk_hash.remove(row_key); } } removed = removed.saturating_add(prev_row_cost); match source_table.and_then(|table| table.row_versions.get(row_key)) { Some(version) => { dest_table.row_versions.insert(row_key.clone(), *version); - if use_point_lookup_cache { - dest_table - .row_versions_cache - .insert(row_key.clone(), *version); - } } None => { dest_table.row_versions.remove(row_key); - dest_table.row_versions_cache.remove(row_key); } } } @@ -4687,9 +4673,6 @@ fn rebuild_kv_projection_rows( if let Some(table) = state.keyspace.table_by_namespace_key_mut(&ns, table_name) { table.rows.clear(); table.row_versions.clear(); - table.pk_hash.clear(); - table.row_cache.clear(); - table.row_versions_cache.clear(); table.structural_version = materialized_seq; } else { state diff --git a/src/offline.rs b/src/offline.rs index 46883db..76821a0 100644 --- a/src/offline.rs +++ b/src/offline.rs @@ -425,12 +425,6 @@ fn check_invariants(recovered: &RecoveredState) -> InvariantReport { ns_id )); } - if !table_data.pk_hash.is_empty() && table_data.rows.len() != table_data.pk_hash.len() { - violations.push(format!( - "pk_hash cardinality mismatch in namespace={:?} table={table_name}", - ns_id - )); - } let schema_key = match ns_id { NamespaceId::Project(namespace) => Some((namespace.clone(), table_name.clone())), NamespaceId::System | NamespaceId::Global => None, @@ -655,16 +649,10 @@ mod tests { rows: OrdMap::new(), row_versions: OrdMap::new(), structural_version: 0, - pk_hash: ImHashMap::new(), - row_cache: ImHashMap::new(), - row_versions_cache: ImHashMap::new(), indexes: ImHashMap::new(), }; table.rows.insert(pk.clone(), row.clone()); table.row_versions.insert(pk.clone(), 1); - table.pk_hash.insert(pk.clone(), ()); - table.row_cache.insert(pk.clone(), row); - table.row_versions_cache.insert(pk.clone(), 1); table.indexes.insert( "by_owner".into(), SecondaryIndex { diff --git a/src/storage/keyspace.rs b/src/storage/keyspace.rs index ce32380..b248d2f 100644 --- a/src/storage/keyspace.rs +++ b/src/storage/keyspace.rs @@ -70,9 +70,6 @@ pub struct TableData { pub row_versions: OrdMap, #[serde(default)] pub structural_version: u64, - pub pk_hash: HashMap, - pub row_cache: HashMap, - pub row_versions_cache: HashMap, pub indexes: HashMap, } @@ -1235,33 +1232,6 @@ impl Keyspace { pub fn set_backend(&mut self, backend: PrimaryIndexBackend) { self.primary_index_backend = backend; - self.rebuild_point_lookup_caches(); - } - - fn rebuild_point_lookup_caches(&mut self) { - let use_point_lookup_cache = - self.primary_index_backend == PrimaryIndexBackend::ArtExperimental; - let namespace_ids: Vec = self.namespaces.keys().cloned().collect(); - for namespace_id in namespace_ids { - let Some(namespace) = self.namespaces_mut().get_mut(&namespace_id) else { - continue; - }; - let table_names: Vec = namespace.tables.keys().cloned().collect(); - for table_name in table_names { - let Some(table) = namespace.tables.get_mut(&table_name) else { - continue; - }; - if use_point_lookup_cache { - table.pk_hash = table.rows.keys().map(|pk| (pk.clone(), ())).collect(); - table.row_cache = table.rows.clone().into_iter().collect(); - table.row_versions_cache = table.row_versions.clone().into_iter().collect(); - } else { - table.pk_hash.clear(); - table.row_cache.clear(); - table.row_versions_cache.clear(); - } - } - } } pub fn namespace_mut(&mut self, namespace_id: NamespaceId) -> &mut Namespace { @@ -1376,7 +1346,6 @@ impl Keyspace { row: Row, commit_seq: u64, ) { - let use_cache = self.primary_index_backend == PrimaryIndexBackend::ArtExperimental; let new_cost = row_mem_cost(&row); let old_cost = { let table = self.table_mut(project_id, scope_id, table_name); @@ -1384,17 +1353,8 @@ impl Keyspace { if old_cost == 0 { table.structural_version = commit_seq; } - if use_cache { - table.rows.insert(encoded_pk.clone(), row.clone()); - table.row_cache.insert(encoded_pk.clone(), row); - } else { - table.rows.insert(encoded_pk.clone(), row); - } + table.rows.insert(encoded_pk.clone(), row); table.row_versions.insert(encoded_pk.clone(), commit_seq); - if use_cache { - table.pk_hash.insert(encoded_pk.clone(), ()); - table.row_versions_cache.insert(encoded_pk, commit_seq); - } old_cost }; self.mem_bytes = self @@ -1423,15 +1383,7 @@ impl Keyspace { ) -> Option<&Row> { self.namespace(&NamespaceId::project_scope(project_id, scope_id)) .and_then(|ns| ns.tables.get(table_name)) - .and_then(|t| { - if self.primary_index_backend == PrimaryIndexBackend::ArtExperimental { - t.row_cache - .get(encoded_pk) - .or_else(|| t.rows.get(encoded_pk)) - } else { - t.rows.get(encoded_pk) - } - }) + .and_then(|t| t.rows.get(encoded_pk)) } pub fn delete_row( @@ -1460,9 +1412,6 @@ impl Keyspace { .tables .get_mut(table_name)?; table.row_versions.remove(encoded_pk); - table.pk_hash.remove(encoded_pk); - table.row_cache.remove(encoded_pk); - table.row_versions_cache.remove(encoded_pk); let removed = table.rows.remove(encoded_pk); if removed.is_some() { table.structural_version = commit_seq; @@ -1579,16 +1528,7 @@ impl Keyspace { let encoded_pk = EncodedKey::from_values(pk); self.namespace(&NamespaceId::project_scope(project_id, scope_id)) .and_then(|ns| ns.tables.get(table_name)) - .and_then(|t| { - if self.primary_index_backend == PrimaryIndexBackend::ArtExperimental { - t.row_versions_cache - .get(&encoded_pk) - .copied() - .or_else(|| t.row_versions.get(&encoded_pk).copied()) - } else { - t.row_versions.get(&encoded_pk).copied() - } - }) + .and_then(|t| t.row_versions.get(&encoded_pk).copied()) .unwrap_or(0) } @@ -3645,7 +3585,7 @@ mod tests { } #[test] - fn art_experimental_backend_uses_point_lookup_cache() { + fn art_experimental_backend_uses_primary_ordmap_storage() { let mut ks = Keyspace::with_backend(PrimaryIndexBackend::OrdMap); ks.upsert_row( "p",