Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/2026-05-07-perf-investigation.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ Three findings:

Top suspects for the 45× gap (from the dhat profile):

- Redundant `row_cache: HashMap` + `row_versions_cache: HashMap` + `pk_hash: HashMap` next to the `OrdMap` source-of-truth
- Historical redundant `row_cache: HashMap` + `row_versions_cache: HashMap` + `pk_hash: HashMap` next to the `OrdMap` source-of-truth. These caches have since been removed; re-run `examples/ram_probe.rs` to quantify the remaining gap.
- `im::OrdMap` HAMT/btree node Arc overhead
- `version_store` retaining historical snapshots (default `max_versions: 1024`, `min_version_age_ms: 5000`)
- Allocator fragmentation
Expand All @@ -154,7 +154,7 @@ Top suspects for the 45× gap (from the dhat profile):

### Cheap (days)

1. **Drop redundant caches** in `TableData` (`row_cache`, `row_versions_cache`, `pk_hash`). 1–2 days. Probably halves table RAM, cuts ~3 of 4 HAMT clones per commit. Re-measure read latency to confirm acceptable regression. **The dhat profile is the evidence; the bench harness is ready to verify.**
1. **Re-measure table RAM after redundant cache removal.** `TableData` now keeps rows and row versions only in the canonical `OrdMap`s. Re-run the RSS probe and read-latency benches so the next storage decision is based on current data.
2. **Make the counter honest.** Either multiply by an empirical structural factor or replace `max_memory_estimate_bytes` with an actual RSS-based check via `/proc/self/statm`. ~½ day. Otherwise users keep getting bitten by the 45× gap.
3. **Tune `version_store` retention.** Default `max_versions: 1024` is generous. 64–128 is plenty for most workloads. Hours of testing, no code change.

Expand Down
17 changes: 0 additions & 17 deletions src/commit/executor/internals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3239,8 +3239,6 @@ fn merge_parallel_namespace_result(
};
let mut added: usize = 0;
let mut removed: usize = 0;
let use_point_lookup_cache =
keyspace.primary_index_backend == crate::config::PrimaryIndexBackend::ArtExperimental;
let dest = keyspace.namespace_mut(ns_id.clone());
let mut table_names: Vec<_> = targets.tables.iter().cloned().collect();
table_names.sort();
Expand Down Expand Up @@ -3275,30 +3273,18 @@ fn merge_parallel_namespace_result(
Some(row) => {
added = added.saturating_add(row_mem_cost(row));
dest_table.rows.insert(row_key.clone(), row.clone());
if use_point_lookup_cache {
dest_table.row_cache.insert(row_key.clone(), row.clone());
dest_table.pk_hash.insert(row_key.clone(), ());
}
}
None => {
dest_table.rows.remove(row_key);
dest_table.row_cache.remove(row_key);
dest_table.pk_hash.remove(row_key);
}
}
removed = removed.saturating_add(prev_row_cost);
match source_table.and_then(|table| table.row_versions.get(row_key)) {
Some(version) => {
dest_table.row_versions.insert(row_key.clone(), *version);
if use_point_lookup_cache {
dest_table
.row_versions_cache
.insert(row_key.clone(), *version);
}
}
None => {
dest_table.row_versions.remove(row_key);
dest_table.row_versions_cache.remove(row_key);
}
}
}
Expand Down Expand Up @@ -4687,9 +4673,6 @@ fn rebuild_kv_projection_rows(
if let Some(table) = state.keyspace.table_by_namespace_key_mut(&ns, table_name) {
table.rows.clear();
table.row_versions.clear();
table.pk_hash.clear();
table.row_cache.clear();
table.row_versions_cache.clear();
table.structural_version = materialized_seq;
} else {
state
Expand Down
12 changes: 0 additions & 12 deletions src/offline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,6 @@ fn check_invariants(recovered: &RecoveredState) -> InvariantReport {
ns_id
));
}
if !table_data.pk_hash.is_empty() && table_data.rows.len() != table_data.pk_hash.len() {
violations.push(format!(
"pk_hash cardinality mismatch in namespace={:?} table={table_name}",
ns_id
));
}
let schema_key = match ns_id {
NamespaceId::Project(namespace) => Some((namespace.clone(), table_name.clone())),
NamespaceId::System | NamespaceId::Global => None,
Expand Down Expand Up @@ -655,16 +649,10 @@ mod tests {
rows: OrdMap::new(),
row_versions: OrdMap::new(),
structural_version: 0,
pk_hash: ImHashMap::new(),
row_cache: ImHashMap::new(),
row_versions_cache: ImHashMap::new(),
indexes: ImHashMap::new(),
};
table.rows.insert(pk.clone(), row.clone());
table.row_versions.insert(pk.clone(), 1);
table.pk_hash.insert(pk.clone(), ());
table.row_cache.insert(pk.clone(), row);
table.row_versions_cache.insert(pk.clone(), 1);
table.indexes.insert(
"by_owner".into(),
SecondaryIndex {
Expand Down
68 changes: 4 additions & 64 deletions src/storage/keyspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ pub struct TableData {
pub row_versions: OrdMap<EncodedKey, u64>,
#[serde(default)]
pub structural_version: u64,
pub pk_hash: HashMap<EncodedKey, ()>,
pub row_cache: HashMap<EncodedKey, Row>,
pub row_versions_cache: HashMap<EncodedKey, u64>,
pub indexes: HashMap<String, SecondaryIndex>,
}

Expand Down Expand Up @@ -1235,33 +1232,6 @@ impl Keyspace {

pub fn set_backend(&mut self, backend: PrimaryIndexBackend) {
self.primary_index_backend = backend;
self.rebuild_point_lookup_caches();
}

fn rebuild_point_lookup_caches(&mut self) {
let use_point_lookup_cache =
self.primary_index_backend == PrimaryIndexBackend::ArtExperimental;
let namespace_ids: Vec<NamespaceId> = self.namespaces.keys().cloned().collect();
for namespace_id in namespace_ids {
let Some(namespace) = self.namespaces_mut().get_mut(&namespace_id) else {
continue;
};
let table_names: Vec<String> = namespace.tables.keys().cloned().collect();
for table_name in table_names {
let Some(table) = namespace.tables.get_mut(&table_name) else {
continue;
};
if use_point_lookup_cache {
table.pk_hash = table.rows.keys().map(|pk| (pk.clone(), ())).collect();
table.row_cache = table.rows.clone().into_iter().collect();
table.row_versions_cache = table.row_versions.clone().into_iter().collect();
} else {
table.pk_hash.clear();
table.row_cache.clear();
table.row_versions_cache.clear();
}
}
}
}

pub fn namespace_mut(&mut self, namespace_id: NamespaceId) -> &mut Namespace {
Expand Down Expand Up @@ -1376,25 +1346,15 @@ impl Keyspace {
row: Row,
commit_seq: u64,
) {
let use_cache = self.primary_index_backend == PrimaryIndexBackend::ArtExperimental;
let new_cost = row_mem_cost(&row);
let old_cost = {
let table = self.table_mut(project_id, scope_id, table_name);
let old_cost = table.rows.get(&encoded_pk).map(row_mem_cost).unwrap_or(0);
if old_cost == 0 {
table.structural_version = commit_seq;
}
if use_cache {
table.rows.insert(encoded_pk.clone(), row.clone());
table.row_cache.insert(encoded_pk.clone(), row);
} else {
table.rows.insert(encoded_pk.clone(), row);
}
table.rows.insert(encoded_pk.clone(), row);
table.row_versions.insert(encoded_pk.clone(), commit_seq);
if use_cache {
table.pk_hash.insert(encoded_pk.clone(), ());
table.row_versions_cache.insert(encoded_pk, commit_seq);
}
old_cost
};
self.mem_bytes = self
Expand Down Expand Up @@ -1423,15 +1383,7 @@ impl Keyspace {
) -> Option<&Row> {
self.namespace(&NamespaceId::project_scope(project_id, scope_id))
.and_then(|ns| ns.tables.get(table_name))
.and_then(|t| {
if self.primary_index_backend == PrimaryIndexBackend::ArtExperimental {
t.row_cache
.get(encoded_pk)
.or_else(|| t.rows.get(encoded_pk))
} else {
t.rows.get(encoded_pk)
}
})
.and_then(|t| t.rows.get(encoded_pk))
}

pub fn delete_row(
Expand Down Expand Up @@ -1460,9 +1412,6 @@ impl Keyspace {
.tables
.get_mut(table_name)?;
table.row_versions.remove(encoded_pk);
table.pk_hash.remove(encoded_pk);
table.row_cache.remove(encoded_pk);
table.row_versions_cache.remove(encoded_pk);
let removed = table.rows.remove(encoded_pk);
if removed.is_some() {
table.structural_version = commit_seq;
Expand Down Expand Up @@ -1579,16 +1528,7 @@ impl Keyspace {
let encoded_pk = EncodedKey::from_values(pk);
self.namespace(&NamespaceId::project_scope(project_id, scope_id))
.and_then(|ns| ns.tables.get(table_name))
.and_then(|t| {
if self.primary_index_backend == PrimaryIndexBackend::ArtExperimental {
t.row_versions_cache
.get(&encoded_pk)
.copied()
.or_else(|| t.row_versions.get(&encoded_pk).copied())
} else {
t.row_versions.get(&encoded_pk).copied()
}
})
.and_then(|t| t.row_versions.get(&encoded_pk).copied())
.unwrap_or(0)
}

Expand Down Expand Up @@ -3645,7 +3585,7 @@ mod tests {
}

#[test]
fn art_experimental_backend_uses_point_lookup_cache() {
fn art_experimental_backend_uses_primary_ordmap_storage() {
let mut ks = Keyspace::with_backend(PrimaryIndexBackend::OrdMap);
ks.upsert_row(
"p",
Expand Down
Loading