From 00098b90547ab2fd1c2bf5f4f97a31b10c68485b Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 10 Aug 2025 16:57:51 +0300 Subject: [PATCH 01/12] WIP --- src/in_memory/empty_links_registry/mod.rs | 16 +++++++++++++++ src/in_memory/empty_links_registry/sized.rs | 20 +++++++++++++++++++ .../empty_links_registry/unsized_.rs | 1 + 3 files changed, 37 insertions(+) create mode 100644 src/in_memory/empty_links_registry/mod.rs create mode 100644 src/in_memory/empty_links_registry/sized.rs create mode 100644 src/in_memory/empty_links_registry/unsized_.rs diff --git a/src/in_memory/empty_links_registry/mod.rs b/src/in_memory/empty_links_registry/mod.rs new file mode 100644 index 0000000..17abe7e --- /dev/null +++ b/src/in_memory/empty_links_registry/mod.rs @@ -0,0 +1,16 @@ +mod sized; +mod unsized_; + +use data_bucket::Link; + +/// [`EmptyLinksRegistry`] is used for storing [`Link`]'s after their release in +/// [`DataPages`]. +/// +/// [`DataPages`]: crate::in_memory::DataPages +pub trait EmptyLinksRegistry { + /// Stores empty [`Link`] in this registry. + fn add_empty_link(&self, link: Link); + + /// Returns [`Link`] that will be enough to fit data with provided `size`. + fn find_link_with_length(&self, size: u32) -> Option; +} diff --git a/src/in_memory/empty_links_registry/sized.rs b/src/in_memory/empty_links_registry/sized.rs new file mode 100644 index 0000000..ef32594 --- /dev/null +++ b/src/in_memory/empty_links_registry/sized.rs @@ -0,0 +1,20 @@ +use crate::in_memory::empty_links_registry::EmptyLinksRegistry; +use data_bucket::Link; + +/// [`EmptyLinksRegistry`] that should be used for sized `Row`'s. It uses +/// [`lockfree::Stack`] under hood and just [`pop`]'s [`Link`] when any is needed +/// because sized `Row`'s all have same size. +/// +/// [`lockfree::Stack`]: lockfree::stack::Stack +/// [`pop`]: lockfree::stack::Stack::pop +pub type SizedEmptyLinkRegistry = lockfree::stack::Stack; + +impl EmptyLinksRegistry for SizedEmptyLinkRegistry { + fn add_empty_link(&self, link: Link) { + self.push(link) + } + + fn find_link_with_length(&self, size: u32) -> Option { + self.pop() + } +} diff --git a/src/in_memory/empty_links_registry/unsized_.rs b/src/in_memory/empty_links_registry/unsized_.rs new file mode 100644 index 0000000..f0194df --- /dev/null +++ b/src/in_memory/empty_links_registry/unsized_.rs @@ -0,0 +1 @@ +pub struct UnsizedEmptyLinkRegistry {} From 8cf906de735c04c1adf09b6203da6b8879adb41e Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Mon, 11 Aug 2025 13:02:58 +0300 Subject: [PATCH 02/12] add separate sized and unsized empty link registries --- Cargo.toml | 8 +- codegen/src/worktable/generator/row.rs | 2 +- codegen/src/worktable/generator/table/mod.rs | 18 +++ codegen/src/worktable/generator/wrapper.rs | 2 +- src/in_memory/data.rs | 24 +++- .../empty_links_registry/defragmentator.rs | 1 + src/in_memory/empty_links_registry/mod.rs | 11 ++ src/in_memory/empty_links_registry/sized.rs | 28 +++- .../empty_links_registry/unsized_.rs | 35 ++++- src/in_memory/mod.rs | 4 + src/in_memory/pages.rs | 135 ++++++++++-------- src/index/unsized_node.rs | 6 + src/lib.rs | 5 +- src/table/mod.rs | 21 ++- src/table/system_info.rs | 5 +- tests/worktable/with_enum.rs | 4 +- 16 files changed, 226 insertions(+), 83 deletions(-) create mode 100644 src/in_memory/empty_links_registry/defragmentator.rs diff --git a/Cargo.toml b/Cargo.toml index e665a16..01d675d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,14 +27,14 @@ lockfree = { version = "0.5.1" } fastrand = "2.3.0" futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4", "v7"] } -data_bucket = "0.3.0" +# data_bucket = "0.3.0" # data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "page_cdc_correction", version = "0.2.7" } -# data_bucket = { path = "../DataBucket", version = "0.2.10" } +data_bucket = { path = "../DataBucket", version = "0.3.0" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } # indexset = { version = "0.12.3", features = ["concurrent", "cdc", "multimap"] } -# indexset = { package = "wt-indexset", path = "../indexset", version = "0.12.5", features = ["concurrent", "cdc", "multimap"] } -indexset = { package = "wt-indexset", version = "0.12.6", features = ["concurrent", "cdc", "multimap"] } +indexset = { package = "wt-indexset", path = "../indexset", version = "0.12.6", features = ["concurrent", "cdc", "multimap"] } +# indexset = { package = "wt-indexset", version = "0.12.6", features = ["concurrent", "cdc", "multimap"] } convert_case = "0.6.0" ordered-float = "5.0.0" parking_lot = "0.12.3" diff --git a/codegen/src/worktable/generator/row.rs b/codegen/src/worktable/generator/row.rs index 5d35368..8827051 100644 --- a/codegen/src/worktable/generator/row.rs +++ b/codegen/src/worktable/generator/row.rs @@ -85,7 +85,7 @@ impl Generator { }; quote! { - #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq, MemStat)] + #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq, MemStat, SizeMeasure)] #custom_derives #[rkyv(derive(Debug))] #[repr(C)] diff --git a/codegen/src/worktable/generator/table/mod.rs b/codegen/src/worktable/generator/table/mod.rs index fb09607..a11ae0b 100644 --- a/codegen/src/worktable/generator/table/mod.rs +++ b/codegen/src/worktable/generator/table/mod.rs @@ -83,6 +83,13 @@ impl Generator { }) .collect::>(); let pk_types_unsized = is_unsized_vec(pk_types); + let row_types = &self + .columns + .columns_map + .iter() + .map(|(_, t)| t.to_string()) + .collect::>(); + let row_unsized = is_unsized_vec(row_types); let derive = if self.is_persist { if pk_types_unsized { quote! { @@ -108,6 +115,15 @@ impl Generator { Vec> } }; + let empty_links_type = if row_unsized { + quote! { + UnsizedEmptyLinkRegistry + } + } else { + quote! { + SizedEmptyLinkRegistry + } + }; if self.config.as_ref().and_then(|c| c.page_size).is_some() { quote! { @@ -116,6 +132,7 @@ impl Generator { WorkTable< #row_type, #primary_key_type, + #empty_links_type, #avt_type_ident, #avt_index_ident, #index_type, @@ -134,6 +151,7 @@ impl Generator { WorkTable< #row_type, #primary_key_type, + #empty_links_type, #avt_type_ident, #avt_index_ident, #index_type, diff --git a/codegen/src/worktable/generator/wrapper.rs b/codegen/src/worktable/generator/wrapper.rs index b77f0ad..455d946 100644 --- a/codegen/src/worktable/generator/wrapper.rs +++ b/codegen/src/worktable/generator/wrapper.rs @@ -24,7 +24,7 @@ impl Generator { let wrapper_ident = name_generator.get_wrapper_type_ident(); quote! { - #[derive(rkyv::Archive, Debug, rkyv::Deserialize, rkyv::Serialize)] + #[derive(rkyv::Archive, Debug, rkyv::Deserialize, rkyv::Serialize, SizeMeasure)] #[repr(C)] pub struct #wrapper_ident { inner: #row_ident, diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index 0e824cb..5755ae1 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -138,7 +138,11 @@ impl Data { feature = "perf_measurements", performance_measurement(prefix_name = "DataRow") )] - pub unsafe fn save_row_by_link(&self, row: &Row, link: Link) -> Result + pub unsafe fn save_row_by_link( + &self, + row: &Row, + mut link: Link, + ) -> Result<(Link, Option), ExecutionError> where Row: Archive + for<'a> Serialize< @@ -147,15 +151,25 @@ impl Data { { let bytes = rkyv::to_bytes(row).map_err(|_| ExecutionError::SerializeError)?; let length = bytes.len() as u32; - if length != link.length { + + let link_left = if link.length > length { + link.length = length; + Some(Link { + page_id: link.page_id, + offset: link.offset + length, + length: link.length - length, + }) + } else if link.length == length { + None + } else { return Err(ExecutionError::InvalidLink); - } + }; let inner_data = unsafe { &mut *self.inner_data.get() }; inner_data[link.offset as usize..][..link.length as usize] .copy_from_slice(bytes.as_slice()); - Ok(link) + Ok((link, link_left)) } /// # Safety @@ -302,7 +316,7 @@ mod tests { let new_row = TestRow { a: 20, b: 20 }; let res = unsafe { page.save_row_by_link(&new_row, link) }.unwrap(); - assert_eq!(res, link); + assert_eq!(res, (link, None)); let inner_data = unsafe { &mut *page.inner_data.get() }; let bytes = &inner_data[link.offset as usize..link.length as usize]; diff --git a/src/in_memory/empty_links_registry/defragmentator.rs b/src/in_memory/empty_links_registry/defragmentator.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/in_memory/empty_links_registry/defragmentator.rs @@ -0,0 +1 @@ + diff --git a/src/in_memory/empty_links_registry/mod.rs b/src/in_memory/empty_links_registry/mod.rs index 17abe7e..f672ef8 100644 --- a/src/in_memory/empty_links_registry/mod.rs +++ b/src/in_memory/empty_links_registry/mod.rs @@ -1,8 +1,12 @@ +mod defragmentator; mod sized; mod unsized_; use data_bucket::Link; +pub use sized::SizedEmptyLinkRegistry; +pub use unsized_::UnsizedEmptyLinkRegistry; + /// [`EmptyLinksRegistry`] is used for storing [`Link`]'s after their release in /// [`DataPages`]. /// @@ -13,4 +17,11 @@ pub trait EmptyLinksRegistry { /// Returns [`Link`] that will be enough to fit data with provided `size`. fn find_link_with_length(&self, size: u32) -> Option; + + /// Pop's all [`Link`]'s from this [`EmptyLinksRegistry`] for further + /// operations. + fn as_vec(&self) -> Vec; + + /// Returns length of this [`EmptyLinksRegistry`]. + fn len(&self) -> usize; } diff --git a/src/in_memory/empty_links_registry/sized.rs b/src/in_memory/empty_links_registry/sized.rs index ef32594..b67cdae 100644 --- a/src/in_memory/empty_links_registry/sized.rs +++ b/src/in_memory/empty_links_registry/sized.rs @@ -1,5 +1,6 @@ use crate::in_memory::empty_links_registry::EmptyLinksRegistry; use data_bucket::Link; +use std::sync::atomic::{AtomicUsize, Ordering}; /// [`EmptyLinksRegistry`] that should be used for sized `Row`'s. It uses /// [`lockfree::Stack`] under hood and just [`pop`]'s [`Link`] when any is needed @@ -7,14 +8,33 @@ use data_bucket::Link; /// /// [`lockfree::Stack`]: lockfree::stack::Stack /// [`pop`]: lockfree::stack::Stack::pop -pub type SizedEmptyLinkRegistry = lockfree::stack::Stack; +#[derive(Debug, Default)] +pub struct SizedEmptyLinkRegistry { + stack: lockfree::stack::Stack, + length: AtomicUsize, +} impl EmptyLinksRegistry for SizedEmptyLinkRegistry { fn add_empty_link(&self, link: Link) { - self.push(link) + self.stack.push(link); + self.length.fetch_add(1, Ordering::Relaxed); + } + + fn find_link_with_length(&self, _size: u32) -> Option { + // `size` can be ignored as sized row's all have same size. + if let Some(val) = self.stack.pop() { + self.length.fetch_sub(1, Ordering::Relaxed); + Some(val) + } else { + None + } + } + + fn as_vec(&self) -> Vec { + self.stack.pop_iter().collect() } - fn find_link_with_length(&self, size: u32) -> Option { - self.pop() + fn len(&self) -> usize { + self.length.load(Ordering::Relaxed) } } diff --git a/src/in_memory/empty_links_registry/unsized_.rs b/src/in_memory/empty_links_registry/unsized_.rs index f0194df..b9a48c1 100644 --- a/src/in_memory/empty_links_registry/unsized_.rs +++ b/src/in_memory/empty_links_registry/unsized_.rs @@ -1 +1,34 @@ -pub struct UnsizedEmptyLinkRegistry {} +use data_bucket::Link; + +use crate::IndexMultiMap; +use crate::in_memory::empty_links_registry::EmptyLinksRegistry; + +#[derive(Debug, Default)] +pub struct UnsizedEmptyLinkRegistry(IndexMultiMap); + +impl EmptyLinksRegistry for UnsizedEmptyLinkRegistry { + fn add_empty_link(&self, link: Link) { + self.0.insert(link.length, link); + } + + fn find_link_with_length(&self, size: u32) -> Option { + if let Some(link) = self.0.remove_max().map(|(_, l)| l) { + if link.length < size { + self.0.insert(link.length, link); + None + } else { + Some(link) + } + } else { + None + } + } + + fn as_vec(&self) -> Vec { + self.0.drain().into_iter().map(|p| p.value).collect() + } + + fn len(&self) -> usize { + self.0.len() + } +} diff --git a/src/in_memory/mod.rs b/src/in_memory/mod.rs index 1a14334..137005a 100644 --- a/src/in_memory/mod.rs +++ b/src/in_memory/mod.rs @@ -1,7 +1,11 @@ mod data; +mod empty_links_registry; mod pages; mod row; pub use data::{DATA_INNER_LENGTH, Data, ExecutionError as DataExecutionError}; +pub use empty_links_registry::{ + EmptyLinksRegistry, SizedEmptyLinkRegistry, UnsizedEmptyLinkRegistry, +}; pub use pages::{DataPages, ExecutionError as PagesExecutionError}; pub use row::{GhostWrapper, Query, RowWrapper, StorableRow}; diff --git a/src/in_memory/pages.rs b/src/in_memory/pages.rs index 65823b9..74f546a 100644 --- a/src/in_memory/pages.rs +++ b/src/in_memory/pages.rs @@ -4,9 +4,9 @@ use std::{ sync::{Arc, RwLock}, }; +use data_bucket::SizeMeasurable; use data_bucket::page::PageId; use derive_more::{Display, Error, From}; -use lockfree::stack::Stack; #[cfg(feature = "perf_measurements")] use performance_measurement_codegen::performance_measurement; use rkyv::{ @@ -17,6 +17,7 @@ use rkyv::{ util::AlignedVec, }; +use crate::in_memory::empty_links_registry::EmptyLinksRegistry; use crate::{ in_memory::{ DATA_INNER_LENGTH, Data, DataExecutionError, @@ -30,16 +31,15 @@ fn page_id_mapper(page_id: usize) -> usize { } #[derive(Debug)] -pub struct DataPages +pub struct DataPages where Row: StorableRow, { /// Pages vector. Currently, not lock free. pages: RwLock::WrappedRow, DATA_LENGTH>>>>, - /// Stack with empty [`Link`]s. It stores [`Link`]s of rows that was deleted. - // TODO: Proper empty links registry + defragmentation - empty_links: Stack, + /// Registry with empty [`Link`]s. It stores [`Link`]s of rows that was deleted. + empty_links: EmptyLinks, /// Count of saved rows. row_count: AtomicU64, @@ -49,8 +49,9 @@ where current_page_id: AtomicU32, } -impl Default for DataPages +impl Default for DataPages where + EmptyLinks: Default + EmptyLinksRegistry, Row: StorableRow, ::WrappedRow: RowWrapper, { @@ -59,8 +60,9 @@ where } } -impl DataPages +impl DataPages where + EmptyLinks: Default + EmptyLinksRegistry, Row: StorableRow, ::WrappedRow: RowWrapper, { @@ -68,7 +70,7 @@ where Self { // We are starting ID's from `1` because `0`'s page in file is info page. pages: RwLock::new(vec![Arc::new(Data::new(1.into()))]), - empty_links: Stack::new(), + empty_links: EmptyLinks::default(), row_count: AtomicU64::new(0), last_page_id: AtomicU32::new(1), current_page_id: AtomicU32::new(1), @@ -83,7 +85,7 @@ where let last_page_id = vec.len(); Self { pages: RwLock::new(vec), - empty_links: Stack::new(), + empty_links: EmptyLinks::default(), row_count: AtomicU64::new(0), last_page_id: AtomicU32::new(last_page_id as u32), current_page_id: AtomicU32::new(last_page_id as u32), @@ -100,28 +102,36 @@ where ::WrappedRow: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + SizeMeasurable, { let general_row = ::WrappedRow::from_inner(row); - if let Some(link) = self.empty_links.pop() { + if let Some(link) = self + .empty_links + .find_link_with_length(general_row.aligned_size() as u32) + { let pages = self.pages.read().unwrap(); let current_page: usize = page_id_mapper(link.page_id.into()); let page = &pages[current_page]; - if let Err(e) = unsafe { page.save_row_by_link(&general_row, link) } { - match e { + match unsafe { page.save_row_by_link(&general_row, link) } { + Ok((link, left_link)) => { + if let Some(link) = left_link { + self.empty_links.add_empty_link(link) + } + return Ok(link); + } + Err(e) => match e { DataExecutionError::InvalidLink => { - self.empty_links.push(link); + println!("Link back {:?}", link); + self.empty_links.add_empty_link(link); } DataExecutionError::PageIsFull { .. } | DataExecutionError::PageTooSmall { .. } | DataExecutionError::SerializeError | DataExecutionError::DeserializeError => return Err(e.into()), - } - } else { - return Ok(link); - }; + }, + } } loop { @@ -164,7 +174,7 @@ where ::WrappedRow: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + SizeMeasurable, { let link = self.insert(row.clone())?; let general_row = ::WrappedRow::from_inner(row); @@ -306,12 +316,14 @@ where let gen_row = ::WrappedRow::from_inner(row); unsafe { page.save_row_by_link(&gen_row, link) - .map_err(ExecutionError::DataPageError) + .map_err(ExecutionError::DataPageError)?; } + + Ok(link) } pub fn delete(&self, link: Link) -> Result<(), ExecutionError> { - self.empty_links.push(link); + self.empty_links.add_empty_link(link); Ok(()) } @@ -337,20 +349,15 @@ where } pub fn get_empty_links(&self) -> Vec { - let mut res = vec![]; - for l in self.empty_links.pop_iter() { - res.push(l) - } - - res + self.empty_links.as_vec() } pub fn with_empty_links(mut self, links: Vec) -> Self { - let stack = Stack::new(); + let registry = EmptyLinks::default(); for l in links { - stack.push(l) + registry.add_empty_link(l) } - self.empty_links = stack; + self.empty_links = registry; self } @@ -370,19 +377,32 @@ pub enum ExecutionError { #[cfg(test)] mod tests { use std::collections::HashSet; - use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::atomic::Ordering; use std::sync::{Arc, RwLock}; use std::thread; use std::time::Instant; - use rkyv::with::{AtomicLoad, Relaxed}; use rkyv::{Archive, Deserialize, Serialize}; use crate::in_memory::pages::DataPages; use crate::in_memory::{PagesExecutionError, RowWrapper, StorableRow}; + use crate::prelude::{ + EmptyLinksRegistry, SizeMeasurable, SizeMeasure, SizedEmptyLinkRegistry, align, + }; #[derive( - Archive, Copy, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, + Archive, + Copy, + Clone, + Deserialize, + Debug, + Eq, + Hash, + Ord, + PartialEq, + PartialOrd, + Serialize, + SizeMeasure, )] struct TestRow { a: u64, @@ -391,46 +411,44 @@ mod tests { /// General `Row` wrapper that is used to append general data for every `Inner` /// `Row`. - #[derive(Archive, Deserialize, Debug, Serialize)] - pub struct GeneralRow { + #[derive(Archive, Deserialize, Debug, Serialize, SizeMeasure)] + struct GeneralRow { /// Inner generic `Row`. - pub inner: Inner, + pub inner: TestRow, /// Indicator for ghosted rows. - #[rkyv(with = AtomicLoad)] - pub is_ghosted: AtomicBool, + pub is_ghosted: bool, /// Indicator for deleted rows. - #[rkyv(with = AtomicLoad)] - pub deleted: AtomicBool, + pub deleted: bool, } - impl RowWrapper for GeneralRow { - fn get_inner(self) -> Inner { + impl RowWrapper for GeneralRow { + fn get_inner(self) -> TestRow { self.inner } fn is_ghosted(&self) -> bool { - self.is_ghosted.load(Ordering::Relaxed) + self.is_ghosted } /// Creates new [`GeneralRow`] from `Inner`. - fn from_inner(inner: Inner) -> Self { + fn from_inner(inner: TestRow) -> Self { Self { inner, - is_ghosted: AtomicBool::new(true), - deleted: AtomicBool::new(false), + is_ghosted: true, + deleted: false, } } } impl StorableRow for TestRow { - type WrappedRow = GeneralRow; + type WrappedRow = GeneralRow; } #[test] fn insert() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); let row = TestRow { a: 10, b: 20 }; let link = pages.insert(row).unwrap(); @@ -444,7 +462,7 @@ mod tests { #[test] fn insert_many() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); for _ in 0..10_000 { let row = TestRow { a: 10, b: 20 }; @@ -457,7 +475,7 @@ mod tests { #[test] fn select() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); let row = TestRow { a: 10, b: 20 }; let link = pages.insert(row).unwrap(); @@ -468,7 +486,7 @@ mod tests { #[test] fn select_non_ghosted() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); let row = TestRow { a: 10, b: 20 }; let link = pages.insert(row).unwrap(); @@ -479,7 +497,7 @@ mod tests { #[test] fn update() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); let row = TestRow { a: 10, b: 20 }; let link = pages.insert(row).unwrap(); @@ -490,14 +508,17 @@ mod tests { #[test] fn delete() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); let row = TestRow { a: 10, b: 20 }; let link = pages.insert(row).unwrap(); pages.delete(link).unwrap(); - assert_eq!(pages.empty_links.pop(), Some(link)); - pages.empty_links.push(link); + assert_eq!( + pages.empty_links.find_link_with_length(link.length), + Some(link) + ); + pages.empty_links.add_empty_link(link); let row = TestRow { a: 20, b: 20 }; let new_link = pages.insert(row).unwrap(); @@ -506,7 +527,7 @@ mod tests { #[test] fn insert_on_empty() { - let pages = DataPages::::new(); + let pages = DataPages::::new(); let row = TestRow { a: 10, b: 20 }; let link = pages.insert(row).unwrap(); @@ -519,7 +540,7 @@ mod tests { //#[test] fn _bench() { - let pages = Arc::new(DataPages::::new()); + let pages = Arc::new(DataPages::::new()); let mut v = Vec::new(); diff --git a/src/index/unsized_node.rs b/src/index/unsized_node.rs index 6b62e8d..2aa0f07 100644 --- a/src/index/unsized_node.rs +++ b/src/index/unsized_node.rs @@ -213,6 +213,12 @@ where { self.inner.deref().iter() } + + fn drain(&mut self) -> Vec { + // we can not change `length` and other fields as node will be removed + // after drain. + self.inner.drain(..).collect() + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index d6699cb..0a211dc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,10 @@ pub use table::*; pub use worktable_codegen::worktable; pub mod prelude { - pub use crate::in_memory::{Data, DataPages, GhostWrapper, Query, RowWrapper, StorableRow}; + pub use crate::in_memory::{ + Data, DataPages, EmptyLinksRegistry, GhostWrapper, Query, RowWrapper, + SizedEmptyLinkRegistry, StorableRow, UnsizedEmptyLinkRegistry, + }; pub use crate::lock::LockMap; pub use crate::lock::{Lock, RowLock}; pub use crate::mem_stat::MemStat; diff --git a/src/table/mod.rs b/src/table/mod.rs index 0bb06cb..1e8f90f 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -4,7 +4,7 @@ pub mod system_info; use std::fmt::Debug; use std::marker::PhantomData; -use crate::in_memory::{DataPages, GhostWrapper, RowWrapper, StorableRow}; +use crate::in_memory::{DataPages, EmptyLinksRegistry, GhostWrapper, RowWrapper, StorableRow}; use crate::lock::LockMap; use crate::persistence::{InsertOperation, Operation}; use crate::prelude::{OperationId, PrimaryKeyGeneratorState}; @@ -13,7 +13,7 @@ use crate::{ AvailableIndex, IndexError, IndexMap, TableRow, TableSecondaryIndex, TableSecondaryIndexCdc, in_memory, }; -use data_bucket::{INNER_PAGE_SIZE, Link}; +use data_bucket::{INNER_PAGE_SIZE, Link, SizeMeasurable}; use derive_more::{Display, Error, From}; use indexset::core::node::NodeLike; use indexset::core::pair::Pair; @@ -32,6 +32,7 @@ use uuid::Uuid; pub struct WorkTable< Row, PrimaryKey, + EmptyLinks, AvailableTypes = (), AvailableIndexes = (), SecondaryIndexes = (), @@ -44,7 +45,7 @@ pub struct WorkTable< Row: StorableRow + Send + Clone + 'static, PkNodeType: NodeLike> + Send + 'static, { - pub data: DataPages, + pub data: DataPages, pub pk_map: IndexMap, @@ -65,6 +66,7 @@ pub struct WorkTable< impl< Row, PrimaryKey, + EmptyLinks, AvailableTypes, AvailableIndexes, SecondaryIndexes, @@ -76,6 +78,7 @@ impl< for WorkTable< Row, PrimaryKey, + EmptyLinks, AvailableTypes, AvailableIndexes, SecondaryIndexes, @@ -86,6 +89,7 @@ impl< > where PrimaryKey: Debug + Clone + Ord + Send + TablePrimaryKey + std::hash::Hash, + EmptyLinks: Default + EmptyLinksRegistry, SecondaryIndexes: Default, PkGen: Default, PkNodeType: NodeLike> + Send + 'static, @@ -109,6 +113,7 @@ where impl< Row, PrimaryKey, + EmptyLinks, AvailableTypes, AvailableIndexes, SecondaryIndexes, @@ -120,6 +125,7 @@ impl< WorkTable< Row, PrimaryKey, + EmptyLinks, AvailableTypes, AvailableIndexes, SecondaryIndexes, @@ -131,6 +137,7 @@ impl< where Row: TableRow, PrimaryKey: Debug + Clone + Ord + Send + TablePrimaryKey + std::hash::Hash, + EmptyLinks: Default + EmptyLinksRegistry, PkNodeType: NodeLike> + Send + 'static, Row: StorableRow + Send + Clone + 'static, ::WrappedRow: RowWrapper, @@ -184,7 +191,7 @@ where ::WrappedRow: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + SizeMeasurable, <::WrappedRow as Archive>::Archived: GhostWrapper, PrimaryKey: Clone, AvailableTypes: 'static, @@ -246,7 +253,7 @@ where ::WrappedRow: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + SizeMeasurable, <::WrappedRow as Archive>::Archived: GhostWrapper, PrimaryKey: Clone, SecondaryIndexes: TableSecondaryIndex @@ -321,7 +328,7 @@ where ::WrappedRow: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + SizeMeasurable, <::WrappedRow as Archive>::Archived: GhostWrapper, PrimaryKey: Clone, AvailableTypes: 'static, @@ -397,7 +404,7 @@ where ::WrappedRow: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + SizeMeasurable, <::WrappedRow as Archive>::Archived: GhostWrapper, PrimaryKey: Clone, SecondaryIndexes: TableSecondaryIndex diff --git a/src/table/system_info.rs b/src/table/system_info.rs index 6575195..bec128b 100644 --- a/src/table/system_info.rs +++ b/src/table/system_info.rs @@ -4,7 +4,7 @@ use indexset::core::pair::Pair; use prettytable::{Table, format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR, row}; use std::fmt::{self, Debug, Display, Formatter}; -use crate::in_memory::{RowWrapper, StorableRow}; +use crate::in_memory::{EmptyLinksRegistry, RowWrapper, StorableRow}; use crate::mem_stat::MemStat; use crate::{TableSecondaryIndexInfo, WorkTable}; @@ -48,6 +48,7 @@ impl Display for IndexKind { impl< Row, PrimaryKey, + EmptyLinks, AvailableTypes, AvailableIndexes, SecondaryIndexes, @@ -59,6 +60,7 @@ impl< WorkTable< Row, PrimaryKey, + EmptyLinks, AvailableTypes, AvailableIndexes, SecondaryIndexes, @@ -69,6 +71,7 @@ impl< > where PrimaryKey: Debug + Clone + Ord + Send + 'static + std::hash::Hash, + EmptyLinks: Default + EmptyLinksRegistry, Row: StorableRow + Send + Clone + 'static, ::WrappedRow: RowWrapper, NodeType: NodeLike> + Send + 'static, diff --git a/tests/worktable/with_enum.rs b/tests/worktable/with_enum.rs index 9ee9916..89c5f9f 100644 --- a/tests/worktable/with_enum.rs +++ b/tests/worktable/with_enum.rs @@ -2,7 +2,9 @@ use rkyv::{Archive, Deserialize, Serialize}; use worktable::prelude::*; use worktable::worktable; -#[derive(Archive, Clone, Copy, Debug, Deserialize, Serialize, PartialEq, PartialOrd, MemStat)] +#[derive( + Archive, Clone, Copy, Debug, Deserialize, Serialize, PartialEq, PartialOrd, MemStat, SizeMeasure, +)] #[rkyv(compare(PartialEq), derive(Debug))] pub enum SomeEnum { First, From ec30d10b81c4dd570a8cbcb26d7ca696e4e7d473 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Tue, 12 Aug 2025 01:26:12 +0300 Subject: [PATCH 03/12] add persistence for empty links logic --- .../src/worktable/generator/queries/delete.rs | 3 +- src/in_memory/mod.rs | 2 +- src/in_memory/pages.rs | 49 ++++- src/lib.rs | 8 +- src/persistence/engine.rs | 26 ++- src/persistence/mod.rs | 4 +- src/persistence/operation/batch.rs | 172 ++++++++++++++++++ src/persistence/operation/mod.rs | 4 +- src/persistence/operation/operation.rs | 25 +++ src/table/mod.rs | 25 ++- tests/persistence/sync/mod.rs | 49 ++++- 11 files changed, 342 insertions(+), 25 deletions(-) diff --git a/codegen/src/worktable/generator/queries/delete.rs b/codegen/src/worktable/generator/queries/delete.rs index 89bf166..5617ff9 100644 --- a/codegen/src/worktable/generator/queries/delete.rs +++ b/codegen/src/worktable/generator/queries/delete.rs @@ -77,7 +77,7 @@ impl Generator { quote! { let secondary_keys_events = self.0.indexes.delete_row_cdc(row, link)?; let (_, primary_key_events) = TableIndexCdc::remove_cdc(&self.0.pk_map, pk.clone(), link); - self.0.data.delete(link).map_err(WorkTableError::PagesError)?; + let empty_link_registry_ops = self.0.data.delete(link).map_err(WorkTableError::PagesError)?; let mut op: Operation< <<#pk_ident as TablePrimaryKey>::Generator as PrimaryKeyGeneratorState>::State, #pk_ident, @@ -86,6 +86,7 @@ impl Generator { id: uuid::Uuid::now_v7().into(), secondary_keys_events, primary_key_events, + empty_link_registry_operations: empty_link_registry_ops, link, }); self.2.apply_operation(op); diff --git a/src/in_memory/mod.rs b/src/in_memory/mod.rs index 137005a..99fd621 100644 --- a/src/in_memory/mod.rs +++ b/src/in_memory/mod.rs @@ -7,5 +7,5 @@ pub use data::{DATA_INNER_LENGTH, Data, ExecutionError as DataExecutionError}; pub use empty_links_registry::{ EmptyLinksRegistry, SizedEmptyLinkRegistry, UnsizedEmptyLinkRegistry, }; -pub use pages::{DataPages, ExecutionError as PagesExecutionError}; +pub use pages::{DataPages, ExecutionError as PagesExecutionError, InsertCdcOutput}; pub use row::{GhostWrapper, Query, RowWrapper, StorableRow}; diff --git a/src/in_memory/pages.rs b/src/in_memory/pages.rs index 74f546a..b879833 100644 --- a/src/in_memory/pages.rs +++ b/src/in_memory/pages.rs @@ -18,6 +18,7 @@ use rkyv::{ }; use crate::in_memory::empty_links_registry::EmptyLinksRegistry; +use crate::prelude::EmptyLinkRegistryOperation; use crate::{ in_memory::{ DATA_INNER_LENGTH, Data, DataExecutionError, @@ -49,6 +50,13 @@ where current_page_id: AtomicU32, } +#[derive(Debug)] +pub struct InsertCdcOutput { + pub link: Link, + pub data_bytes: Vec, + pub empty_link_registry_ops: Vec, +} + impl Default for DataPages where EmptyLinks: Default + EmptyLinksRegistry, @@ -94,6 +102,23 @@ where } pub fn insert(&self, row: Row) -> Result + where + Row: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + ::WrappedRow: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + SizeMeasurable, + { + self.insert_inner(row).map(|r| r.0) + } + + fn insert_inner( + &self, + row: Row, + ) -> Result<(Link, Vec), ExecutionError> where Row: Archive + for<'a> Serialize< @@ -110,6 +135,9 @@ where .empty_links .find_link_with_length(general_row.aligned_size() as u32) { + let mut empty_link_registry_ops = vec![]; + empty_link_registry_ops.push(EmptyLinkRegistryOperation::Remove(link)); + let pages = self.pages.read().unwrap(); let current_page: usize = page_id_mapper(link.page_id.into()); let page = &pages[current_page]; @@ -117,13 +145,14 @@ where match unsafe { page.save_row_by_link(&general_row, link) } { Ok((link, left_link)) => { if let Some(link) = left_link { + empty_link_registry_ops.push(EmptyLinkRegistryOperation::Add(link)); self.empty_links.add_empty_link(link) } - return Ok(link); + return Ok((link, empty_link_registry_ops)); } Err(e) => match e { DataExecutionError::InvalidLink => { - println!("Link back {:?}", link); + empty_link_registry_ops.pop(); self.empty_links.add_empty_link(link); } DataExecutionError::PageIsFull { .. } @@ -146,7 +175,7 @@ where match link { Ok(link) => { self.row_count.fetch_add(1, Ordering::Relaxed); - return Ok(link); + return Ok((link, vec![])); } Err(e) => match e { DataExecutionError::PageIsFull { .. } => { @@ -165,7 +194,7 @@ where } } - pub fn insert_cdc(&self, row: Row) -> Result<(Link, Vec), ExecutionError> + pub fn insert_cdc(&self, row: Row) -> Result where Row: Archive + for<'a> Serialize< @@ -176,12 +205,16 @@ where Strategy, Share>, rkyv::rancor::Error>, > + SizeMeasurable, { - let link = self.insert(row.clone())?; + let (link, ops) = self.insert_inner(row.clone())?; let general_row = ::WrappedRow::from_inner(row); let bytes = rkyv::to_bytes(&general_row) .expect("should be ok as insert not failed") .into_vec(); - Ok((link, bytes)) + Ok(InsertCdcOutput { + link, + data_bytes: bytes, + empty_link_registry_ops: ops, + }) } fn add_next_page(&self, tried_page: usize) { @@ -322,9 +355,9 @@ where Ok(link) } - pub fn delete(&self, link: Link) -> Result<(), ExecutionError> { + pub fn delete(&self, link: Link) -> Result, ExecutionError> { self.empty_links.add_empty_link(link); - Ok(()) + Ok(vec![EmptyLinkRegistryOperation::Add(link)]) } pub fn select_raw(&self, link: Link) -> Result, ExecutionError> { diff --git a/src/lib.rs b/src/lib.rs index 0a211dc..3d646b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,10 +29,10 @@ pub mod prelude { pub use crate::lock::{Lock, RowLock}; pub use crate::mem_stat::MemStat; pub use crate::persistence::{ - DeleteOperation, IndexTableOfContents, InsertOperation, Operation, OperationId, - PersistenceConfig, PersistenceEngine, PersistenceEngineOps, PersistenceTask, SpaceData, - SpaceDataOps, SpaceIndex, SpaceIndexOps, SpaceIndexUnsized, SpaceSecondaryIndexOps, - UpdateOperation, map_index_pages_to_toc_and_general, + DeleteOperation, EmptyLinkRegistryOperation, IndexTableOfContents, InsertOperation, + Operation, OperationId, PersistenceConfig, PersistenceEngine, PersistenceEngineOps, + PersistenceTask, SpaceData, SpaceDataOps, SpaceIndex, SpaceIndexOps, SpaceIndexUnsized, + SpaceSecondaryIndexOps, UpdateOperation, map_index_pages_to_toc_and_general, map_unsized_index_pages_to_toc_and_general, validate_events, }; pub use crate::primary_key::{PrimaryKeyGenerator, PrimaryKeyGeneratorState, TablePrimaryKey}; diff --git a/src/persistence/engine.rs b/src/persistence/engine.rs index 135335d..8e4ac60 100644 --- a/src/persistence/engine.rs +++ b/src/persistence/engine.rs @@ -152,8 +152,8 @@ where >, ) -> eyre::Result<()> { let batch_data_op = batch_op.get_batch_data_op()?; - let (pk_evs, secondary_evs) = batch_op.get_indexes_evs()?; + let mut empty_links_ops = batch_op.get_empty_link_registry_ops()?; { let mut futs = FuturesUnordered::new(); futs.push(Either::Left(Either::Right( @@ -167,12 +167,32 @@ where .process_change_event_batch(secondary_evs), )); - while (futs.next().await).is_some() {} + while futs.next().await.is_some() {} + } + + let mut need_to_save = false; + let info = self.data.get_mut_info(); + if !empty_links_ops.is_empty() { + let mut links_to_add = empty_links_ops.add_set.into_iter().collect::>(); + let empty_links = &mut info.inner.empty_links_list; + links_to_add.extend( + empty_links + .iter() + .filter(|l| !empty_links_ops.remove_set.remove(*l)) + .map(|l| *l) + .collect::>(), + ); + std::mem::swap(&mut info.inner.empty_links_list, &mut links_to_add); + + need_to_save = true; } if let Some(pk_gen_state_update) = batch_op.get_pk_gen_state()? { - let info = self.data.get_mut_info(); info.inner.pk_gen_state = pk_gen_state_update; + need_to_save = true; + } + + if need_to_save { self.data.save_info().await?; } diff --git a/src/persistence/mod.rs b/src/persistence/mod.rs index 6c9b8a1..6e475b2 100644 --- a/src/persistence/mod.rs +++ b/src/persistence/mod.rs @@ -8,8 +8,8 @@ use crate::persistence::operation::BatchOperation; pub use engine::PersistenceEngine; pub use manager::PersistenceConfig; pub use operation::{ - DeleteOperation, InsertOperation, Operation, OperationId, OperationType, UpdateOperation, - validate_events, + DeleteOperation, EmptyLinkRegistryOperation, InsertOperation, Operation, OperationId, + OperationType, UpdateOperation, validate_events, }; pub use space::{ IndexTableOfContents, SpaceData, SpaceDataOps, SpaceIndex, SpaceIndexOps, SpaceIndexUnsized, diff --git a/src/persistence/operation/batch.rs b/src/persistence/operation/batch.rs index 12c118a..4b63bea 100644 --- a/src/persistence/operation/batch.rs +++ b/src/persistence/operation/batch.rs @@ -363,6 +363,31 @@ where } } + pub fn get_empty_link_registry_ops(&self) -> eyre::Result { + let mut state = EmptyLinkOperationsState::default(); + for row in self + .info_wt + .select_all() + .order_on(BatchInnerRowFields::OperationId, Order::Asc) + .execute()? + { + if row.op_type == OperationType::Update { + continue; + } + let pos = row.pos; + let op = self + .ops + .get(pos) + .expect("should be available as loaded from info table"); + let evs = op + .empty_links_ops() + .expect("should be available as `Update` operations are sorted out"); + state.append_ops(evs.iter()); + } + + Ok(state) + } + pub fn get_batch_data_op(&self) -> eyre::Result { let mut data = HashMap::new(); for link in self.info_wt.iter_links() { @@ -392,3 +417,150 @@ where Ok(data) } } + +#[derive(Debug, Default)] +pub struct EmptyLinkOperationsState { + pub add_set: HashSet, + pub remove_set: HashSet, +} + +impl EmptyLinkOperationsState { + pub fn append_ops<'a>(&mut self, ops: impl Iterator) { + for op in ops { + match op { + EmptyLinkRegistryOperation::Add(link) => { + if !self.remove_set.remove(&link) { + self.add_set.insert(*link); + } + } + EmptyLinkRegistryOperation::Remove(link) => { + if !self.add_set.remove(&link) { + self.remove_set.insert(*link); + } + } + } + } + } + + #[cfg(test)] + pub fn get_result_ops(self) -> Vec { + self.add_set + .into_iter() + .map(|l| EmptyLinkRegistryOperation::Add(l)) + .chain( + self.remove_set + .into_iter() + .map(|l| EmptyLinkRegistryOperation::Remove(l)), + ) + .collect() + } + + pub fn is_empty(&self) -> bool { + self.remove_set.is_empty() && self.add_set.is_empty() + } +} + +#[cfg(test)] +mod empty_links_tests { + use std::collections::HashSet; + + use data_bucket::Link; + use data_bucket::page::PageId; + + use crate::persistence::operation::batch::EmptyLinkOperationsState; + use crate::prelude::EmptyLinkRegistryOperation; + + fn gen_ops(len: usize, add: bool) -> Vec { + let mut ops = vec![]; + let mut offset = 0; + + for _ in 0..len { + let length = fastrand::u32(10..60); + let link = Link { + page_id: PageId::from(0), + offset, + length, + }; + offset += length; + if add { + ops.push(EmptyLinkRegistryOperation::Add(link)) + } else { + ops.push(EmptyLinkRegistryOperation::Remove(link)) + } + } + + ops + } + + #[test] + fn test_links_collection_basic() { + let ops = vec![ + EmptyLinkRegistryOperation::Add(Link { + page_id: PageId::from(0), + offset: 0, + length: 20, + }), + EmptyLinkRegistryOperation::Add(Link { + page_id: PageId::from(0), + offset: 20, + length: 60, + }), + EmptyLinkRegistryOperation::Remove(Link { + page_id: PageId::from(0), + offset: 20, + length: 60, + }), + ]; + + let mut state = EmptyLinkOperationsState::default(); + state.append_ops(ops.iter()); + let res = state.get_result_ops(); + + assert_eq!(res.len(), 1); + assert_eq!(res, vec![ops[0]]) + } + + #[test] + fn test_links_collection_random_remove() { + let add_ops = gen_ops(256, true); + let mut remove_ops = HashSet::new(); + for _ in 0..64 { + let op_pos = fastrand::usize(0..256); + let mut op = add_ops[op_pos]; + while !remove_ops.insert(EmptyLinkRegistryOperation::Remove(op.link())) { + let op_pos = fastrand::usize(0..256); + op = add_ops[op_pos]; + } + } + assert_eq!(remove_ops.len(), 64); + + let mut state = EmptyLinkOperationsState::default(); + state.append_ops(add_ops.iter()); + state.append_ops(remove_ops.iter()); + let res = state.get_result_ops(); + + assert_eq!(res.len(), add_ops.len() - remove_ops.len()); + } + + #[test] + fn test_links_collection_random_add() { + let remove_ops = gen_ops(256, false); + let mut add_ops = HashSet::new(); + for _ in 0..64 { + let op_pos = fastrand::usize(0..256); + let mut op = remove_ops[op_pos]; + while !add_ops.insert(EmptyLinkRegistryOperation::Add(op.link())) { + let op_pos = fastrand::usize(0..256); + op = remove_ops[op_pos]; + } + } + assert_eq!(add_ops.len(), 64); + + let mut state = EmptyLinkOperationsState::default(); + state.append_ops(remove_ops.iter()); + state.append_ops(add_ops.iter()); + let res = state.get_result_ops(); + + assert_eq!(res.len(), remove_ops.len() - add_ops.len()); + } +} diff --git a/src/persistence/operation/mod.rs b/src/persistence/operation/mod.rs index cd4e549..c6c07fc 100644 --- a/src/persistence/operation/mod.rs +++ b/src/persistence/operation/mod.rs @@ -15,7 +15,9 @@ use uuid::Uuid; use crate::prelude::From; pub use batch::{BatchInnerRow, BatchInnerWorkTable, BatchOperation, PosByOpIdQuery}; -pub use operation::{DeleteOperation, InsertOperation, Operation, UpdateOperation}; +pub use operation::{ + DeleteOperation, EmptyLinkRegistryOperation, InsertOperation, Operation, UpdateOperation, +}; pub use util::validate_events; /// Represents page's identifier. Is unique within the table bounds diff --git a/src/persistence/operation/operation.rs b/src/persistence/operation/operation.rs index e00ad97..040dff0 100644 --- a/src/persistence/operation/operation.rs +++ b/src/persistence/operation/operation.rs @@ -105,6 +105,29 @@ impl Operation::Delete(_) => None, } } + + pub fn empty_links_ops(&self) -> Option<&Vec> { + match &self { + Operation::Insert(insert) => Some(&insert.empty_link_registry_operations), + Operation::Delete(delete) => Some(&delete.empty_link_registry_operations), + Operation::Update(_) => None, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Hash, Eq)] +pub enum EmptyLinkRegistryOperation { + Add(Link), + Remove(Link), +} + +impl EmptyLinkRegistryOperation { + pub fn link(&self) -> Link { + match self { + EmptyLinkRegistryOperation::Add(l) => *l, + EmptyLinkRegistryOperation::Remove(l) => *l, + } + } } #[derive(Clone, Debug)] @@ -112,6 +135,7 @@ pub struct InsertOperation { pub id: OperationId, pub primary_key_events: Vec>>, pub secondary_keys_events: SecondaryKeys, + pub empty_link_registry_operations: Vec, pub pk_gen_state: PrimaryKeyGenState, pub bytes: Vec, pub link: Link, @@ -130,5 +154,6 @@ pub struct DeleteOperation { pub id: OperationId, pub primary_key_events: Vec>>, pub secondary_keys_events: SecondaryKeys, + pub empty_link_registry_operations: Vec, pub link: Link, } diff --git a/src/table/mod.rs b/src/table/mod.rs index 1e8f90f..f062196 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -4,7 +4,9 @@ pub mod system_info; use std::fmt::Debug; use std::marker::PhantomData; -use crate::in_memory::{DataPages, EmptyLinksRegistry, GhostWrapper, RowWrapper, StorableRow}; +use crate::in_memory::{ + DataPages, EmptyLinksRegistry, GhostWrapper, InsertCdcOutput, RowWrapper, StorableRow, +}; use crate::lock::LockMap; use crate::persistence::{InsertOperation, Operation}; use crate::prelude::{OperationId, PrimaryKeyGeneratorState}; @@ -262,10 +264,16 @@ where AvailableIndexes: Debug + AvailableIndex, { let pk = row.get_primary_key().clone(); - let (link, _) = self + let output = self .data .insert_cdc(row.clone()) .map_err(WorkTableError::PagesError)?; + let InsertCdcOutput { + link, + data_bytes: _, + empty_link_registry_ops, + } = output; + let primary_key_events = self.pk_map.checked_insert_cdc(pk.clone(), link); if primary_key_events.is_none() { self.data.delete(link).map_err(WorkTableError::PagesError)?; @@ -303,6 +311,7 @@ where pk_gen_state: self.pk_gen.get_state(), primary_key_events: primary_key_events.expect("should be checked before for existence"), secondary_keys_events: indexes_res.expect("was checked before"), + empty_link_registry_operations: empty_link_registry_ops, bytes, link, }); @@ -421,10 +430,15 @@ where .get(&pk) .map(|v| v.get().value) .ok_or(WorkTableError::NotFound)?; - let (new_link, _) = self + let output = self .data .insert_cdc(row_new.clone()) .map_err(WorkTableError::PagesError)?; + let InsertCdcOutput { + link: new_link, + data_bytes: _, + mut empty_link_registry_ops, + } = output; unsafe { self.data .with_mut_ref(new_link, |r| r.unghost()) @@ -453,9 +467,11 @@ where }; } - self.data + let delete_ops = self + .data .delete(old_link) .map_err(WorkTableError::PagesError)?; + empty_link_registry_ops.extend(delete_ops); let bytes = self .data .select_raw(new_link) @@ -466,6 +482,7 @@ where pk_gen_state: self.pk_gen.get_state(), primary_key_events, secondary_keys_events: indexes_res.expect("was checked just before"), + empty_link_registry_operations: empty_link_registry_ops, bytes, link: new_link, }); diff --git a/tests/persistence/sync/mod.rs b/tests/persistence/sync/mod.rs index ba20696..bd53fd2 100644 --- a/tests/persistence/sync/mod.rs +++ b/tests/persistence/sync/mod.rs @@ -1,9 +1,10 @@ -use crate::remove_dir_if_exists; use std::time::Duration; use worktable::prelude::*; use worktable::worktable; +use crate::remove_dir_if_exists; + mod string_primary_index; mod string_re_read; mod string_secondary_index; @@ -394,3 +395,49 @@ fn test_space_delete_query_sync() { } }); } + +#[test] +fn test_space_empty_links_available() { + let config = + PersistenceConfig::new("tests/data/sync/empty_links", "tests/data/sync/empty_links"); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_io() + .enable_time() + .build() + .unwrap(); + + runtime.block_on(async { + remove_dir_if_exists("tests/data/sync/empty_links".to_string()).await; + + let pk = { + let table = TestSyncWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + let row1 = TestSyncRow { + another: 42, + non_unique: 0, + field: 0.234, + id: table.get_next_pk().into(), + }; + table.insert(row1.clone()).unwrap(); + let row2 = TestSyncRow { + another: 43, + non_unique: 0, + field: 0.235, + id: table.get_next_pk().into(), + }; + table.insert(row2.clone()).unwrap(); + table.delete(row2.id.into()).await.unwrap(); + + table.wait_for_ops().await; + row1.id + }; + { + let table = TestSyncWorkTable::load_from_file(config).await.unwrap(); + assert!(table.select(pk).is_some()); + assert_eq!(table.0.data.get_empty_links().len(), 1); + } + }); +} From 43ff895fdf04caf969ee05eb1e3bf2ab7d205bad Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Tue, 12 Aug 2025 01:28:47 +0300 Subject: [PATCH 04/12] clippy --- codegen/src/worktable/generator/table/mod.rs | 4 ++-- src/in_memory/empty_links_registry/mod.rs | 5 +++++ src/persistence/engine.rs | 2 +- src/persistence/operation/batch.rs | 10 +++++----- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/codegen/src/worktable/generator/table/mod.rs b/codegen/src/worktable/generator/table/mod.rs index a11ae0b..8e41b1d 100644 --- a/codegen/src/worktable/generator/table/mod.rs +++ b/codegen/src/worktable/generator/table/mod.rs @@ -86,8 +86,8 @@ impl Generator { let row_types = &self .columns .columns_map - .iter() - .map(|(_, t)| t.to_string()) + .values() + .map(|t| t.to_string()) .collect::>(); let row_unsized = is_unsized_vec(row_types); let derive = if self.is_persist { diff --git a/src/in_memory/empty_links_registry/mod.rs b/src/in_memory/empty_links_registry/mod.rs index f672ef8..d2397ca 100644 --- a/src/in_memory/empty_links_registry/mod.rs +++ b/src/in_memory/empty_links_registry/mod.rs @@ -24,4 +24,9 @@ pub trait EmptyLinksRegistry { /// Returns length of this [`EmptyLinksRegistry`]. fn len(&self) -> usize; + + /// Checks if this [`EmptyLinksRegistry`] is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } } diff --git a/src/persistence/engine.rs b/src/persistence/engine.rs index 8e4ac60..be3de98 100644 --- a/src/persistence/engine.rs +++ b/src/persistence/engine.rs @@ -179,7 +179,7 @@ where empty_links .iter() .filter(|l| !empty_links_ops.remove_set.remove(*l)) - .map(|l| *l) + .copied() .collect::>(), ); std::mem::swap(&mut info.inner.empty_links_list, &mut links_to_add); diff --git a/src/persistence/operation/batch.rs b/src/persistence/operation/batch.rs index 4b63bea..e40681c 100644 --- a/src/persistence/operation/batch.rs +++ b/src/persistence/operation/batch.rs @@ -429,12 +429,12 @@ impl EmptyLinkOperationsState { for op in ops { match op { EmptyLinkRegistryOperation::Add(link) => { - if !self.remove_set.remove(&link) { + if !self.remove_set.remove(link) { self.add_set.insert(*link); } } EmptyLinkRegistryOperation::Remove(link) => { - if !self.add_set.remove(&link) { + if !self.add_set.remove(link) { self.remove_set.insert(*link); } } @@ -446,11 +446,11 @@ impl EmptyLinkOperationsState { pub fn get_result_ops(self) -> Vec { self.add_set .into_iter() - .map(|l| EmptyLinkRegistryOperation::Add(l)) + .map(EmptyLinkRegistryOperation::Add) .chain( self.remove_set .into_iter() - .map(|l| EmptyLinkRegistryOperation::Remove(l)), + .map(EmptyLinkRegistryOperation::Remove), ) .collect() } @@ -494,7 +494,7 @@ mod empty_links_tests { #[test] fn test_links_collection_basic() { - let ops = vec![ + let ops = [ EmptyLinkRegistryOperation::Add(Link { page_id: PageId::from(0), offset: 0, From 878a12bef25897f2fe76f9e90c25b903a3d84274 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:53:45 +0300 Subject: [PATCH 05/12] WIP --- src/in_memory/data.rs | 60 ++++++-- .../empty_links_registry/defragmentator.rs | 1 - src/in_memory/empty_links_registry/mod.rs | 1 - src/table/defragmentator/mod.rs | 141 ++++++++++++++++++ src/table/mod.rs | 1 + 5 files changed, 188 insertions(+), 16 deletions(-) delete mode 100644 src/in_memory/empty_links_registry/defragmentator.rs create mode 100644 src/table/defragmentator/mod.rs diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index 5755ae1..9c88ca0 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -5,7 +5,9 @@ use std::sync::atomic::{AtomicU32, Ordering}; use data_bucket::page::INNER_PAGE_SIZE; use data_bucket::page::PageId; -use data_bucket::{DataPage, GeneralPage}; +use data_bucket::{ + DataPage, DefaultSizeMeasurable, GeneralPage, SizeMeasurable, SizeMeasure, align, +}; use derive_more::{Display, Error}; #[cfg(feature = "perf_measurements")] use performance_measurement_codegen::performance_measurement; @@ -45,6 +47,24 @@ impl DerefMut for AlignedBytes { } } +#[derive( + Archive, + Copy, + Clone, + Deserialize, + Debug, + Default, + Eq, + Hash, + Ord, + PartialEq, + PartialOrd, + Serialize, + SizeMeasure, +)] +#[rkyv(derive(Debug, PartialOrd, PartialEq, Eq, Ord))] +pub struct RowLength(pub u32); + #[derive(Archive, Deserialize, Debug, Serialize)] pub struct Data { /// [`Id`] of the [`General`] page of this [`Data`]. @@ -105,7 +125,10 @@ impl Data { { let bytes = rkyv::to_bytes::(row) .map_err(|_| ExecutionError::SerializeError)?; - let length = bytes.len(); + let link_length = bytes.len(); + let length_bytes = rkyv::to_bytes::(&RowLength(link_length as u32)) + .map_err(|_| ExecutionError::SerializeError)?; + let length = link_length + RowLength::default_aligned_size(); if length > DATA_LENGTH { return Err(ExecutionError::PageTooSmall { need: length, @@ -113,7 +136,7 @@ impl Data { }); } let length = length as u32; - let offset = self.free_offset.fetch_add(length, Ordering::AcqRel); + let mut offset = self.free_offset.fetch_add(length, Ordering::AcqRel); if offset > DATA_LENGTH as u32 - length { return Err(ExecutionError::PageIsFull { need: length, @@ -122,12 +145,15 @@ impl Data { } let inner_data = unsafe { &mut *self.inner_data.get() }; - inner_data[offset as usize..][..length as usize].copy_from_slice(bytes.as_slice()); + inner_data[offset as usize..][..RowLength::default_aligned_size()] + .copy_from_slice(length_bytes.as_slice()); + offset += RowLength::default_aligned_size() as u32; + inner_data[offset as usize..][..link_length].copy_from_slice(bytes.as_slice()); let link = Link { page_id: self.id, offset, - length, + length: link_length as u32, }; Ok(link) @@ -263,14 +289,14 @@ pub enum ExecutionError { #[cfg(test)] mod tests { + use crate::in_memory::data::{Data, ExecutionError, INNER_PAGE_SIZE, RowLength}; + use data_bucket::DefaultSizeMeasurable; + use rkyv::util::AlignedVec; + use rkyv::{Archive, Deserialize, Serialize}; use std::sync::atomic::Ordering; use std::sync::{Arc, mpsc}; use std::thread; - use rkyv::{Archive, Deserialize, Serialize}; - - use crate::in_memory::data::{Data, ExecutionError, INNER_PAGE_SIZE}; - #[derive( Archive, Copy, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, )] @@ -296,13 +322,19 @@ mod tests { let link = page.save_row(&row).unwrap(); assert_eq!(link.page_id, page.id); assert_eq!(link.length, 16); - assert_eq!(link.offset, 0); + assert_eq!(link.offset, RowLength::default_aligned_size() as u32); - assert_eq!(page.free_offset.load(Ordering::Relaxed), link.length); + assert_eq!( + page.free_offset.load(Ordering::Relaxed), + link.length + RowLength::default_aligned_size() as u32 + ); let inner_data = unsafe { &mut *page.inner_data.get() }; - let bytes = &inner_data[link.offset as usize..link.length as usize]; - let archived = unsafe { rkyv::access_unchecked::(bytes) }; + let mut bytes = AlignedVec::<8>::new(); + bytes.extend_from_slice( + &inner_data[link.offset as usize..(link.offset + link.length) as usize], + ); + let archived = unsafe { rkyv::access_unchecked::(&bytes) }; assert_eq!(archived, &row) } @@ -326,7 +358,7 @@ mod tests { #[test] fn data_page_full() { - let page = Data::::new(1.into()); + let page = Data::::new(1.into()); let row = TestRow { a: 10, b: 20 }; let _ = page.save_row(&row).unwrap(); diff --git a/src/in_memory/empty_links_registry/defragmentator.rs b/src/in_memory/empty_links_registry/defragmentator.rs deleted file mode 100644 index 8b13789..0000000 --- a/src/in_memory/empty_links_registry/defragmentator.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/in_memory/empty_links_registry/mod.rs b/src/in_memory/empty_links_registry/mod.rs index d2397ca..6d00bbd 100644 --- a/src/in_memory/empty_links_registry/mod.rs +++ b/src/in_memory/empty_links_registry/mod.rs @@ -1,4 +1,3 @@ -mod defragmentator; mod sized; mod unsized_; diff --git a/src/table/defragmentator/mod.rs b/src/table/defragmentator/mod.rs new file mode 100644 index 0000000..eda35eb --- /dev/null +++ b/src/table/defragmentator/mod.rs @@ -0,0 +1,141 @@ +use std::collections::HashMap; +use std::fmt::Debug; +use std::sync::Arc; + +use data_bucket::Link; +use data_bucket::page::PageId; +use indexset::core::node::NodeLike; +use indexset::core::pair::Pair; +use tokio::sync::{Notify, RwLock}; + +use crate::WorkTable; +use crate::in_memory::{EmptyLinksRegistry, RowWrapper, StorableRow}; +use crate::lock::LockMap; +use crate::prelude::{Lock, TablePrimaryKey}; + +pub struct DefragmentatorTask { + task_handle: tokio::task::AbortHandle, + + /// Shared map for locking pages that are in defragmentation progress. + lock: Arc>, + + /// Shared notifier for waking up [`Defragmentator`] + notify: Arc, +} + +#[derive(Debug)] +pub struct Defragmentator< + Row, + PrimaryKey, + EmptyLinks, + AvailableTypes, + AvailableIndexes, + SecondaryIndexes, + LockType, + PkGen, + NodeType, + const DATA_LENGTH: usize, +> { + /// [`WorkTable`] to work with. + table: Arc< + WorkTable< + Row, + PrimaryKey, + EmptyLinks, + AvailableTypes, + AvailableIndexes, + SecondaryIndexes, + LockType, + PkGen, + NodeType, + DATA_LENGTH, + >, + >, + + /// Map for locking pages that are in defragmentation progress. + lock_map: Arc>, + + /// Notifier for waking up [`Defragmentator`] + notify: Arc, +} + +impl< + Row, + PrimaryKey, + EmptyLinks, + AvailableTypes, + AvailableIndexes, + SecondaryIndexes, + LockType, + PkGen, + NodeType, + const DATA_LENGTH: usize, +> + Defragmentator< + Row, + PrimaryKey, + EmptyLinks, + AvailableTypes, + AvailableIndexes, + SecondaryIndexes, + LockType, + PkGen, + NodeType, + DATA_LENGTH, + > +where + PrimaryKey: Debug + Clone + Ord + Send + TablePrimaryKey + std::hash::Hash, + EmptyLinks: Default + EmptyLinksRegistry, + SecondaryIndexes: Default, + PkGen: Default, + NodeType: NodeLike> + Send + 'static, + Row: StorableRow + Send + Clone + 'static, + ::WrappedRow: RowWrapper, +{ + fn defragment(&self) -> eyre::Result<()> { + const SINGLE_LINK_RATIO_TRIGGER: f32 = 0.4; + + let empty_links = self.table.data.get_empty_links(); + let empty_links_len = empty_links.len(); + let mapped_links = Self::map_empty_links_by_pages(empty_links); + let single_link_pages = mapped_links.values().filter(|v| v.len() == 1).count(); + + if single_link_pages as f32 / empty_links_len as f32 > SINGLE_LINK_RATIO_TRIGGER { + self.defragment_if_triggered(mapped_links) + } else { + self.defragment_if_not_triggered(mapped_links) + } + } + + fn defragment_if_triggered( + &self, + mapped_links: HashMap>, + ) -> eyre::Result<()> { + Ok(()) + } + + fn defragment_if_not_triggered( + &self, + mapped_links: HashMap>, + ) -> eyre::Result<()> { + for (page_id, links) in mapped_links { + let lock_id = self.lock_map.next_id(); + let lock = Arc::new(RwLock::new(Lock::new(lock_id))); + self.lock_map.insert(page_id, lock).expect( + "nothing should be returned as this is single defragmentation thread for table", + ); + } + + Ok(()) + } + + fn map_empty_links_by_pages(empty_links: Vec) -> HashMap> { + let mut map = HashMap::new(); + for link in empty_links { + map.entry(link.page_id) + .and_modify(|v| v.push(link)) + .or_insert(vec![link]); + } + map + } +} diff --git a/src/table/mod.rs b/src/table/mod.rs index f062196..6ed4909 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,3 +1,4 @@ +//mod defragmentator; pub mod select; pub mod system_info; From ee07b2ad582d1116ab5ee87a3bde2bdb40aee7f2 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 13 Aug 2025 16:18:45 +0300 Subject: [PATCH 06/12] add length before row's in data page --- src/in_memory/data.rs | 36 +++++------------- src/in_memory/pages.rs | 21 ++-------- src/table/mod.rs | 10 ----- tests/data/expected/test_persist/.wt.data | Bin 18788 -> 19580 bytes .../expected/test_persist/another_idx.wt.idx | Bin 49146 -> 49146 bytes .../data/expected/test_persist/primary.wt.idx | Bin 49146 -> 49146 bytes .../test_without_secondary_indexes/.wt.data | Bin 18788 -> 19580 bytes .../primary.wt.idx | Bin 49146 -> 49146 bytes tests/data/test_persist/.wt.data | Bin 18788 -> 19580 bytes tests/data/test_persist/another_idx.wt.idx | Bin 49146 -> 49146 bytes tests/data/test_persist/primary.wt.idx | Bin 49146 -> 49146 bytes .../test_without_secondary_indexes/.wt.data | Bin 18788 -> 19580 bytes .../primary.wt.idx | Bin 49146 -> 49146 bytes tests/persistence/read.rs | 10 ++--- tests/persistence/toc/read.rs | 2 +- tests/worktable/base.rs | 6 +-- 16 files changed, 22 insertions(+), 63 deletions(-) diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index 9c88ca0..ae6c353 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -9,8 +9,6 @@ use data_bucket::{ DataPage, DefaultSizeMeasurable, GeneralPage, SizeMeasurable, SizeMeasure, align, }; use derive_more::{Display, Error}; -#[cfg(feature = "perf_measurements")] -use performance_measurement_codegen::performance_measurement; use rkyv::{ Archive, Deserialize, Portable, Serialize, api::high::HighDeserializer, @@ -47,6 +45,10 @@ impl DerefMut for AlignedBytes { } } +/// Value that represents size of the row which will be written just after this +/// value. +/// +/// `u64` is used to keep correct alignment #[derive( Archive, Copy, @@ -63,7 +65,7 @@ impl DerefMut for AlignedBytes { SizeMeasure, )] #[rkyv(derive(Debug, PartialOrd, PartialEq, Eq, Ord))] -pub struct RowLength(pub u32); +pub struct RowLength(pub u64); #[derive(Archive, Deserialize, Debug, Serialize)] pub struct Data { @@ -112,10 +114,6 @@ impl Data { self.id = id; } - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "DataRow") - )] pub fn save_row(&self, row: &Row) -> Result where Row: Archive @@ -126,7 +124,7 @@ impl Data { let bytes = rkyv::to_bytes::(row) .map_err(|_| ExecutionError::SerializeError)?; let link_length = bytes.len(); - let length_bytes = rkyv::to_bytes::(&RowLength(link_length as u32)) + let length_bytes = rkyv::to_bytes::(&RowLength(link_length as u64)) .map_err(|_| ExecutionError::SerializeError)?; let length = link_length + RowLength::default_aligned_size(); if length > DATA_LENGTH { @@ -160,10 +158,6 @@ impl Data { } #[allow(clippy::missing_safety_doc)] - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "DataRow") - )] pub unsafe fn save_row_by_link( &self, row: &Row, @@ -219,10 +213,6 @@ impl Data { Ok(unsafe { rkyv::access_unchecked_mut::<::Archived>(&mut bytes[..]) }) } - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "DataRow") - )] pub fn get_row_ref(&self, link: Link) -> Result<&::Archived, ExecutionError> where Row: Archive, @@ -236,10 +226,6 @@ impl Data { Ok(unsafe { rkyv::access_unchecked::<::Archived>(bytes) }) } - //#[cfg_attr( - // feature = "perf_measurements", - // performance_measurement(prefix_name = "DataRow") - //)] pub fn get_row(&self, link: Link) -> Result where Row: Archive, @@ -291,7 +277,6 @@ pub enum ExecutionError { mod tests { use crate::in_memory::data::{Data, ExecutionError, INNER_PAGE_SIZE, RowLength}; use data_bucket::DefaultSizeMeasurable; - use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; use std::sync::atomic::Ordering; use std::sync::{Arc, mpsc}; @@ -330,10 +315,7 @@ mod tests { ); let inner_data = unsafe { &mut *page.inner_data.get() }; - let mut bytes = AlignedVec::<8>::new(); - bytes.extend_from_slice( - &inner_data[link.offset as usize..(link.offset + link.length) as usize], - ); + let bytes = &inner_data[link.offset as usize..(link.offset + link.length) as usize]; let archived = unsafe { rkyv::access_unchecked::(&bytes) }; assert_eq!(archived, &row) } @@ -351,14 +333,14 @@ mod tests { assert_eq!(res, (link, None)); let inner_data = unsafe { &mut *page.inner_data.get() }; - let bytes = &inner_data[link.offset as usize..link.length as usize]; + let bytes = &inner_data[link.offset as usize..(link.offset + link.length) as usize]; let archived = unsafe { rkyv::access_unchecked::(bytes) }; assert_eq!(archived, &new_row) } #[test] fn data_page_full() { - let page = Data::::new(1.into()); + let page = Data::::new(1.into()); let row = TestRow { a: 10, b: 20 }; let _ = page.save_row(&row).unwrap(); diff --git a/src/in_memory/pages.rs b/src/in_memory/pages.rs index b879833..f010367 100644 --- a/src/in_memory/pages.rs +++ b/src/in_memory/pages.rs @@ -7,8 +7,6 @@ use std::{ use data_bucket::SizeMeasurable; use data_bucket::page::PageId; use derive_more::{Display, Error, From}; -#[cfg(feature = "perf_measurements")] -use performance_measurement_codegen::performance_measurement; use rkyv::{ Archive, Deserialize, Portable, Serialize, api::high::HighDeserializer, @@ -227,10 +225,6 @@ where } } - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "DataPages") - )] pub fn select(&self, link: Link) -> Result where Row: Archive @@ -270,10 +264,6 @@ where Ok(gen_row.get_inner()) } - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "DataPages") - )] pub fn with_ref(&self, link: Link, op: Op) -> Result where Row: Archive @@ -294,10 +284,6 @@ where } #[allow(clippy::missing_safety_doc)] - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "DataPages") - )] pub unsafe fn with_mut_ref( &self, link: Link, @@ -409,14 +395,15 @@ pub enum ExecutionError { #[cfg(test)] mod tests { + use data_bucket::DefaultSizeMeasurable; + use rkyv::{Archive, Deserialize, Serialize}; use std::collections::HashSet; use std::sync::atomic::Ordering; use std::sync::{Arc, RwLock}; use std::thread; use std::time::Instant; - use rkyv::{Archive, Deserialize, Serialize}; - + use crate::in_memory::data::RowLength; use crate::in_memory::pages::DataPages; use crate::in_memory::{PagesExecutionError, RowWrapper, StorableRow}; use crate::prelude::{ @@ -488,7 +475,7 @@ mod tests { assert_eq!(link.page_id, 1.into()); assert_eq!(link.length, 24); - assert_eq!(link.offset, 0); + assert_eq!(link.offset, RowLength::default_aligned_size() as u32); assert_eq!(pages.row_count.load(Ordering::Relaxed), 1); } diff --git a/src/table/mod.rs b/src/table/mod.rs index 6ed4909..3d09d0c 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -20,8 +20,6 @@ use data_bucket::{INNER_PAGE_SIZE, Link, SizeMeasurable}; use derive_more::{Display, Error, From}; use indexset::core::node::NodeLike; use indexset::core::pair::Pair; -#[cfg(feature = "perf_measurements")] -use performance_measurement_codegen::performance_measurement; use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; use rkyv::ser::Serializer; @@ -153,10 +151,6 @@ where } /// Selects `Row` from table identified with provided primary key. Returns `None` if no value presented. - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "WorkTable") - )] pub fn select(&self, pk: PrimaryKey) -> Option where LockType: 'static, @@ -180,10 +174,6 @@ where } } - #[cfg_attr( - feature = "perf_measurements", - performance_measurement(prefix_name = "WorkTable") - )] pub fn insert(&self, row: Row) -> Result where Row: Archive diff --git a/tests/data/expected/test_persist/.wt.data b/tests/data/expected/test_persist/.wt.data index cf48e267ea4eb6f1b731317e7495047c1d867afb..23de210f7b4293f676f17a8e496965853ee2d1c5 100644 GIT binary patch literal 19580 zcmeI3$8uCx00kv-Hpb+fF~MXoHrND{O-4l9AfU8hi}V-#8cUWeSnwb640MaC1D|ki z)vKQAzHf7Cwlg~N?HL`YtAl53Vsh%v#GA?2lT$O7hRySK@Zsb1^z^5}Gjt!|d-x7| zzw82CpbK-j+1DXt_VoyveFH*f--wXeHz8#9%?O!&3qoe!ijdj2A!PRL2$_8cLT2BI zklA-3WcJ+%nSBpJX8#!>v+qU7?E4Th`+kJXegGk}|ALU&4ElfX8#=_v!6!D>}L=%`yU9I{ZE9+PYA!PQ;2$}r~LT0~;klC*xWcKR_nf-5s%zgtQv)@F> z?6(jy`)!2Geg`45-$lsm_YgAseT2+DhLG7GAY}H32$}s6LS}!AklCLgWcH^Bnf)Jx k%>FMzX8#W%vp++~?9UN0`#3^oe}RzMUm|4oR|skUA9;;P_5c6? delta 406 zcmWN|yG=wf5I|8DIiUgc0uTuSUGUf)VEL5cQd|K}D8L0!#FR2+AjAnybaj8f5A}8U z+4jrx)%<(Bp8I|?`@YS=A}QL_Htz{KgGExbqfXEnERs4t94_YV>2AhJWw1z!7H)#h zV38E@wbb`)ckreG!C+G|oNzu+aL1(Z?iuR@xbOwv0Xzw~fXRt_$_MsDW O28*O40bu5|Cc#NBjnu`Ty*(_OaZ-Ypnlwi#apf!qjhBmfs#W*G!oWWh@a zkcIM}k&f}fi5FhyJ-`u1k2r&aS0j!#JR|z|7zwIkj(;3{8vFR;;D^Dd&nM0J{7CvK z9{({pIr(SI#J^1+f1N)5GJUkC@Q?x&pa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP z3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbF zC_sV#D3I>;Pa_}GrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rjMj7s&Vh=X?6oE6j}f`vqw*<7+zrObqu1 z7>S(#W@?sZYmVls&^*o80xi@cE!Gk()iN#D3a!*Ct=5{@Jz$;IYlAjwlQzd516#CJ z+q7Lfv{SpZTc!4Bul8xb4(Ol`>9CIIsE)->1jltkCpD^5I;}H0t8+T93%aOFx~wa@ zs%yHg8>)0uw{%-~bXWIuUt@ZphkB%OJ=POF)iXWU3%%4Uz1ADO)jPe{hi?zYGbliT z|51Upe@{*h;_{fUl<8Q+{Bk#x)tj%g^NV zm*yMHEAx%!wfQFV#(cARYo2!bNh7VZGvAUh#&G@STc>#H{S$BEy#8(GrTKRA%6x}; zZNAgIG2dn0n(sF6%uDm$e2;k%e{6aEd(BJpedd+69UYZ{>ugvod zyfd%Nd-I#-MSK|K_1`it&2O7m=6B3%^SkDa`91U2{Jwc-K4#vVKQJ%iLo2WUp?PWk z$hXf9%vrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ wfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=EqoDKPr;)cvgg0AHtzg#Z8m literal 49146 zcmeI)Ic!r&0LJkzoaQ#SIoI6gHn4!X&24T20nBZPkg{F0rbwAG(l#kmrc9YKWtx>z zY>{G%6suJV3d)p`w(!o_p6Ju=Qc&-Iq|tl+`Pqv9ah{#xO_%;Xx&l?g?EcjFT>bQO zrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun z0u-PC1t>rP3Q&Lo6!?z<@ml{l(l+k@5ay3z_QV@u#QTcNq+IKkl-2-<*Z#-*<~rB; z$Hy=zKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epy`2GUvy8m=de|(08Vg7kV zYApEH3IIK=wE?=qN&pMBNQ<>ZOO??wE!PUI)GDpk8fCRs>$F}Qv{9S1IjkPAMO(E^ z+qFYG!x{s5?b2@T(O&J-ejQLj2X#n?bwo#XOvhE!37yobuoA&(ozYp9bWZ1WK^JvN zmvu#DUDY*R*A3m&E!|c{cXU_xbYBnjP>)pAV?EJRb?cd)>xF81saJZfH+rjgdat@Z z=;QYX;}H~~!2hT~?yqLE5l)ZkOqsU-Tn#09wB+$D8b|bEn8_DK3?q6a%;bwBh7r9U zX7VLrKaS}AVJ2T1F^uR#VJ6Q+3?uqzn90KeF0B!LBFyB=TiQN;+I)ritocgwOn9>N zJgdy}=Bv$1=4;HW=2`Pz^R?y!=IhLd&DWccnQt(kG~Z}GW4_6J&U~|ZHoQMlhqMic zs$jmwjxU>UHLsa(Gw(CsZa!$f!+gYir}?;fT;(T@_;XH~=Ocy@{j2$|mbN{9MM$r6 zw|UNdk9pC2uX)9MpLyMUzj?p;0rMgAg88WVLGuaoL*~=whs|frkCP z=ghC07tL>&SIlpk*UfL4_nY4~A2N@hDRIQtHEMpRrEQ-uziU2ie$RZ?{Jwc6dFUJ{H^(j`8)G*^Y`Xc=5_P0<{!+%zk2hp_oI2vyvN*EkLxfS z3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbF aC_n)UP=Epypa2CZK!I-*DE->Lp7n303XB5) diff --git a/tests/data/expected/test_persist/primary.wt.idx b/tests/data/expected/test_persist/primary.wt.idx index 6924ed7a831d029a011c20c50abcec697623bbfa..af1f56470b5d5b19dce3783b2142b7026014dc6d 100644 GIT binary patch literal 49146 zcmeI)$8OwK7>40bu5|Cc#NBjnu`SzjZ*-S!xpxZ&=xyYSAdnm3jRfE&WEEt|MHX2G z$YS!Jkq+=7fq{7K_W(zfp5Y7*UX3`~@QmocVrP z3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbF z{$GJ~uYVf(n7;oZ=8rKa(+@GyW7T45{qQF#?E{cTy5~PV_Seik|LHjl3Q&Lo6rcbF zC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epy zpa2CZKmiI+fC3bt00k&O0SZun0u-PC1-`vNzVAQZ)1O{pcFf-{N>j7GwgbTAaBqN- z*a={^=4h_wX}$_A&_XTJVlB~9Ez@$X&`Pb+YOT>)t&80QHfWX?q}MC?RxQm1rUqdKFrI;Sz6*9BeFC0*7PUDY*R z*A3lNrCYkKJG!fTx~~Ts*F!zhV@>FZp6Z#N>xEwGm0s(O-s+v+>w`XidoZ3s0SbIa z1=jyP7)-_GF<&Xuv55Ka-B6O3=IJV$M)E3VK7UTa7|H9H+2Jjyo;H=NEjn|A2a)cgfWsA@g@7hp-y{gzR0{XkBwl4BfU;-zQnvSUuxc(FEj7V zmz($IE6j`dedP78G%w9pnOEkk&1>^D=8gGU^VWQwd1s#26-!rdzQN8f;?0rI+h|^z zZ!)jUH=EbyTg)5tt>&$H+T|yWw9d|aTf!K_^_y>>=IQrOyovMrcbJ#vJIyQeUFNm< zZu7=`k9ljp*Ss??&3p5G=0*Im<@N73FU=2_SLO%JYx6_qjrn2o*8GThXMWVYH$P@x z#6=;m|G0T+e!{#m&$~3G-*0Vx(#~(pPnoyor_DR_QS;vXjCm0kue|=V=B4>L^U8e8 zyf!~?-k4u7Z_VQ$+~F9$|III%_vV+)i@3<<^{d{HA$l zUYYmix6F(9Fv#n_ZC;w+F|W+;n%Cy{%p3Fj=B@by^Ui$Syf=SnUc`r1UjHNW()_V` zWjrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00q8MVDy*i`&s`1KYEL` literal 49146 zcmeI)IZTvk9LMn=p5u)-o;%)nkGkrpsCeT&Dk|Qi#Dwk0js+zp$u^Xfl$4Z|G@DRT zQf(+HwxA^0f_?roJm!}*CR*?JpFFUUa{{{17Zzvt)YXVVItSvXEF9H$nJaT`Mc3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O z0SZun0u-RYj}?e({bTYmKK>=GU(@Q2PtwF~r4>?3{WB@{0f>of{^Pc}g=_xfYZwYp zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O z0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2DazCgb2KVQ=ykFYqczphHPMc>;2 zpu4^|KvUWYV6m2Hsg`NE3RAcFipo_YsHeJ>gT~)iT z>AG&{rf%uB?x;g|bx-&8Ko9juk5$nVJ=HUH>bYL%rMmP=uk}W6^-k~gK~;U!r=JhT zS5SZg|Dyse|IE+V(&;guDf98ayP<@KWskFHOn51+>`MaEgmDwm!*$m z!u!(7zC18Z_+VPu3xR3EN7BlkHgKs=_;^~`SJt(0|0(lT=CkIj%?s(l^5d*AFPg74 zFPpD3ub4NR_n5CY?>FCIK4iYpeAIlC`Gomq^J()f=5yv-&70HDNA8f1>8@%u-)8r3 zH{WjFWxm6_*LfcY--Ve{SQW9G5TPfUEDljg<1G~qMmd+OToc#Dvq=U(#`^L^$e z^Zn)><_FBH<_FFD%nz9lnzx#dm>)JDH$P%NWq#Cr*8G@xA^kGv4#&-l<|oX{<|oZ7 z=5elwiT*w2r|R19e)H4jL*{49N6qstP4V~>=4b8x)8^;Q=giNWH>Z2IW9I1%ZhfM|q-29&Tl=*%0S@Q?xh4dnn|KA^)7tJ4; zm(3rWSIjHsJ?2l$`^}%451BtRA2shZpD=%JK5hQOe9ru(d2@Qv%^kYTTg_jYx0}B< z?=pX5-fRBWe8Bvj`LOwW^D*-e=9A`C^BMDx=IKwpug~|Bd5d|sx$hpgz#J$*0SZun z0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP V3Q&Lo6rcbFzEhz5XXAR-e*uNvj57cL diff --git a/tests/data/expected/test_without_secondary_indexes/.wt.data b/tests/data/expected/test_without_secondary_indexes/.wt.data index 555d8cb0e26c8125bc3bdf5a8ffd9d76d658d065..a9fdec9f3cc2f07a962b85ef3843812e2a114671 100644 GIT binary patch literal 19580 zcmeI3$BtB200jpy!DKKwXWN(@OvWbXU=wYENh84qBp4*~E&P%fUU=yx|1oqkRUjRH z;ausdx~uNn9QC#*jeLD34bImqR;w@I0D5GO}#q%7{&(KlkDN;gJ*HzPvvL zA+vvjklCjqWcFzYnSDA!W}ktO*}p}|>@yKE`z(aaKA8H`k=f@UWcIlTnSCBYW}lCc z*%u&W_Js(U{X2xrz6c?+FGk4hOAs>qQiROz?_6eIj*!_`AY}HH2$_8qLS|o$klEKD zWcIZPnSC8XW?zqx**73$_KgUceG@`v-;9viw;*Kp?-4Tl4+xq4M}*A26(O_#gpk>{ zA!PRL2$_8cLT2BIklA-3WcJ+%nSBpJX5WjD+4mu2_WcN%{bz*CegGk}A4JIPhY&LR zVT8}L@&`#FTn zejXvSUqHz07ZEc1C4|g=86mS@LCEY^5i&A+!I6klBAn$m}-|GW$)0%zg_Y zv)@L@>~|0{`(1?0{s%&4{}Umz-$Tgk_YpGt1BA@}5FxYwg^<}FA!PQ)2$}r}LS}!8 GkoJ$UU{8nu delta 406 zcmWN|yG=wf5I|8DIiUgc0uTuSUGUf)VEL5cQd|K}D8L0!#FR2+AjAnybaj8f5A}8U z+4jrx)%<(Bp8I|?`@YS=A}QL_Htz{KgGExbqfXEnERs4t94_YV>2AhJWw1z!7H)#h zV38E@wbb`)ckreG!C+G|oNzu+aL1(Z?iuR@xbOwv0Xzw~fXRt_$_MsDW O28*O40bxs`Lyxs{WZb8bmgBqb_K6h)xDkpPnh>>KnQe1X1*fjNQ~hT&x| zy)@tj{nwBMe6X#B)_w~pHs_gu!aKtrZn|dZ-(yD5K+K`B@rmJYC+g4NPYjNYe*W}v zrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O z0SZun0u-PC1t>rP3Q&Lo6rjL=7f9For;(58_pLF%i8-2X#7Os5i==h^TT;62KaF&a zf4c9_>1+Jca~Kq$00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiJTeSv)4f4-(a zy~3=Rzg?EbXMSl1fYJWm05f7IfLWTYIhw0^Dl}gUv`~w*SWC22%d}i8v{I|IT5Gg6 zb`Myu4ce$p+N>?H$G|pi*ADH}F74JH?NzCL+OGpTs6#reBRZ;MI<6D36TwNH(rKO1 zS)J2)UC>2c(q&!IRbA6{-Ox?l(rs0`qr1AN0o~UFJ=7yT))NhCNKf@l!+Ne4dZ|}> ztv7nBcY3c8efaudJc9xh_#YKm_t)g)c$^;dnKB)VnE%`jC3$I{&Z21~uVUuM&rTR4 zc^xzRoP;rwH!-u%O&BA28#DX7gfWu$VrDNA#z@}9%sxM1jO0c9$iASjQ(l@cG_TBK zBbfe3uTz^ZHgC+An78Ij&3op{%scbt=0&_8dHpNQOY@cHmH8_3+I+QnW4^|`HD7Dq zGtcXarK>YvZ;vnH!;v4i!MrrzXkM9bGOx`yn>XfL%vYhJ|3E3f~Yd1-#$yfVLFUYlPuZ_F>5x90H=Zh!PY|K?ZBJM*jNMVw^w`mdRn z=GVrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt P00q8K;LOic*R%c&anh1U literal 49146 zcmeI)IZTyV7{>7jSKM*Ob#TWWb=2#QJ18m&%5?{kglUuvOu}@Ol$0dXP*PG-(o8~0 zNimsFQc{vk!JPMAj(IX_VLkshIezfFOT2&KyVv4775aKA5;djO+SS|F_Df&)=imBT zx;j69`q=U7?~R{6cJ+7lmL9Yu{rN{)e=RI5%%v4LyLg;gJWekj<2Hr@6rcbFC_n)U zP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZ zKmiI+fC3bt00k&O0SZun0u-PC1t>s)Zx@Jb{bTYmzWyn#pVR7$Z_>nVb*rT6(sxo^ z_a772_{VMYi`V$aV;BlhfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epy zpa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2DaygZ?H;g0JGD!@wMTo?9s@Pnr~NvhgF2+cI-*(~)iE8{37ym_omQRB=&a7Aoe0kB zf-b6{OS-Hpx~glst{bY?P2JLM-O*j$(|t9lQ4jP`O?srqdZK1M)ibrIRnPT8Z7S-e zUg@>o=&j!Az1r2G4?iA^M^JzQ|Dyuc|12!@rqg3SQ|9A;cS8ve%N}RZnDDx^vM&ov z6W)+k_T_X#{%<~8Q)%?sum%$v=t%)8Aunh%(7G9NbIY(8eb#eC9ytND!iHuHJ& z?dDbK=OcH>$8=ZKn(wgt*PHJ&FPiT%?=jzPK4`wje8haO`M7!P@)Hw(&nfeoz%=2r z=KD%o_V|d9U*~@FYV!l;b>;`n8_W-xx0@d}?>9eUK4e~NK5Bl{e8T*g`Ly|Q^EvYq z=9TG}Id?c|USodBykLIXyxBa?6*1Aj+x$#P%RXR!)_mCfocWk}-lZv?f71NC-G9dX zg897pMf0k3^2!|w=C$UR%}pH6Jv;WGSMST~ z{a{{g-f8Z;$1O4k3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC m1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZK!NWRDEwKzp7me2HIl^u diff --git a/tests/data/test_persist/.wt.data b/tests/data/test_persist/.wt.data index cf48e267ea4eb6f1b731317e7495047c1d867afb..23de210f7b4293f676f17a8e496965853ee2d1c5 100644 GIT binary patch literal 19580 zcmeI3$8uCx00kv-Hpb+fF~MXoHrND{O-4l9AfU8hi}V-#8cUWeSnwb640MaC1D|ki z)vKQAzHf7Cwlg~N?HL`YtAl53Vsh%v#GA?2lT$O7hRySK@Zsb1^z^5}Gjt!|d-x7| zzw82CpbK-j+1DXt_VoyveFH*f--wXeHz8#9%?O!&3qoe!ijdj2A!PRL2$_8cLT2BI zklA-3WcJ+%nSBpJX8#!>v+qU7?E4Th`+kJXegGk}|ALU&4ElfX8#=_v!6!D>}L=%`yU9I{ZE9+PYA!PQ;2$}r~LT0~;klC*xWcKR_nf-5s%zgtQv)@F> z?6(jy`)!2Geg`45-$lsm_YgAseT2+DhLG7GAY}H32$}s6LS}!AklCLgWcH^Bnf)Jx k%>FMzX8#W%vp++~?9UN0`#3^oe}RzMUm|4oR|skUA9;;P_5c6? delta 406 zcmWN|yG=wf5I|8DIiUgc0uTuSUGUf)VEL5cQd|K}D8L0!#FR2+AjAnybaj8f5A}8U z+4jrx)%<(Bp8I|?`@YS=A}QL_Htz{KgGExbqfXEnERs4t94_YV>2AhJWw1z!7H)#h zV38E@wbb`)ckreG!C+G|oNzu+aL1(Z?iuR@xbOwv0Xzw~fXRt_$_MsDW O28*O40bu5|Cc#NBjnu`Ty*(_OaZ-Ypnlwi#apf!qjhBmfs#W*G!oWWh@a zkcIM}k&f}fi5FhyJ-`u1k2r&aS0j!#JR|z|7zwIkj(;3{8vFR;;D^Dd&nM0J{7CvK z9{({pIr(SI#J^1+f1N)5GJUkC@Q?x&pa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP z3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbF zC_sV#D3I>;Pa_}GrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rjMj7s&Vh=X?6oE6j}f`vqw*<7+zrObqu1 z7>S(#W@?sZYmVls&^*o80xi@cE!Gk()iN#D3a!*Ct=5{@Jz$;IYlAjwlQzd516#CJ z+q7Lfv{SpZTc!4Bul8xb4(Ol`>9CIIsE)->1jltkCpD^5I;}H0t8+T93%aOFx~wa@ zs%yHg8>)0uw{%-~bXWIuUt@ZphkB%OJ=POF)iXWU3%%4Uz1ADO)jPe{hi?zYGbliT z|51Upe@{*h;_{fUl<8Q+{Bk#x)tj%g^NV zm*yMHEAx%!wfQFV#(cARYo2!bNh7VZGvAUh#&G@STc>#H{S$BEy#8(GrTKRA%6x}; zZNAgIG2dn0n(sF6%uDm$e2;k%e{6aEd(BJpedd+69UYZ{>ugvod zyfd%Nd-I#-MSK|K_1`it&2O7m=6B3%^SkDa`91U2{Jwc-K4#vVKQJ%iLo2WUp?PWk z$hXf9%vrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ wfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=EqoDKPr;)cvgg0AHtzg#Z8m literal 49146 zcmeI)Ic!r&0LJkzoaQ#SIoI6gHn4!X&24T20nBZPkg{F0rbwAG(l#kmrc9YKWtx>z zY>{G%6suJV3d)p`w(!o_p6Ju=Qc&-Iq|tl+`Pqv9ah{#xO_%;Xx&l?g?EcjFT>bQO zrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun z0u-PC1t>rP3Q&Lo6!?z<@ml{l(l+k@5ay3z_QV@u#QTcNq+IKkl-2-<*Z#-*<~rB; z$Hy=zKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epy`2GUvy8m=de|(08Vg7kV zYApEH3IIK=wE?=qN&pMBNQ<>ZOO??wE!PUI)GDpk8fCRs>$F}Qv{9S1IjkPAMO(E^ z+qFYG!x{s5?b2@T(O&J-ejQLj2X#n?bwo#XOvhE!37yobuoA&(ozYp9bWZ1WK^JvN zmvu#DUDY*R*A3m&E!|c{cXU_xbYBnjP>)pAV?EJRb?cd)>xF81saJZfH+rjgdat@Z z=;QYX;}H~~!2hT~?yqLE5l)ZkOqsU-Tn#09wB+$D8b|bEn8_DK3?q6a%;bwBh7r9U zX7VLrKaS}AVJ2T1F^uR#VJ6Q+3?uqzn90KeF0B!LBFyB=TiQN;+I)ritocgwOn9>N zJgdy}=Bv$1=4;HW=2`Pz^R?y!=IhLd&DWccnQt(kG~Z}GW4_6J&U~|ZHoQMlhqMic zs$jmwjxU>UHLsa(Gw(CsZa!$f!+gYir}?;fT;(T@_;XH~=Ocy@{j2$|mbN{9MM$r6 zw|UNdk9pC2uX)9MpLyMUzj?p;0rMgAg88WVLGuaoL*~=whs|frkCP z=ghC07tL>&SIlpk*UfL4_nY4~A2N@hDRIQtHEMpRrEQ-uziU2ie$RZ?{Jwc6dFUJ{H^(j`8)G*^Y`Xc=5_P0<{!+%zk2hp_oI2vyvN*EkLxfS z3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbF aC_n)UP=Epypa2CZK!I-*DE->Lp7n303XB5) diff --git a/tests/data/test_persist/primary.wt.idx b/tests/data/test_persist/primary.wt.idx index 6924ed7a831d029a011c20c50abcec697623bbfa..af1f56470b5d5b19dce3783b2142b7026014dc6d 100644 GIT binary patch literal 49146 zcmeI)$8OwK7>40bu5|Cc#NBjnu`SzjZ*-S!xpxZ&=xyYSAdnm3jRfE&WEEt|MHX2G z$YS!Jkq+=7fq{7K_W(zfp5Y7*UX3`~@QmocVrP z3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbF z{$GJ~uYVf(n7;oZ=8rKa(+@GyW7T45{qQF#?E{cTy5~PV_Seik|LHjl3Q&Lo6rcbF zC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epy zpa2CZKmiI+fC3bt00k&O0SZun0u-PC1-`vNzVAQZ)1O{pcFf-{N>j7GwgbTAaBqN- z*a={^=4h_wX}$_A&_XTJVlB~9Ez@$X&`Pb+YOT>)t&80QHfWX?q}MC?RxQm1rUqdKFrI;Sz6*9BeFC0*7PUDY*R z*A3lNrCYkKJG!fTx~~Ts*F!zhV@>FZp6Z#N>xEwGm0s(O-s+v+>w`XidoZ3s0SbIa z1=jyP7)-_GF<&Xuv55Ka-B6O3=IJV$M)E3VK7UTa7|H9H+2Jjyo;H=NEjn|A2a)cgfWsA@g@7hp-y{gzR0{XkBwl4BfU;-zQnvSUuxc(FEj7V zmz($IE6j`dedP78G%w9pnOEkk&1>^D=8gGU^VWQwd1s#26-!rdzQN8f;?0rI+h|^z zZ!)jUH=EbyTg)5tt>&$H+T|yWw9d|aTf!K_^_y>>=IQrOyovMrcbJ#vJIyQeUFNm< zZu7=`k9ljp*Ss??&3p5G=0*Im<@N73FU=2_SLO%JYx6_qjrn2o*8GThXMWVYH$P@x z#6=;m|G0T+e!{#m&$~3G-*0Vx(#~(pPnoyor_DR_QS;vXjCm0kue|=V=B4>L^U8e8 zyf!~?-k4u7Z_VQ$+~F9$|III%_vV+)i@3<<^{d{HA$l zUYYmix6F(9Fv#n_ZC;w+F|W+;n%Cy{%p3Fj=B@by^Ui$Syf=SnUc`r1UjHNW()_V` zWjrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00q8MVDy*i`&s`1KYEL` literal 49146 zcmeI)IZTvk9LMn=p5u)-o;%)nkGkrpsCeT&Dk|Qi#Dwk0js+zp$u^Xfl$4Z|G@DRT zQf(+HwxA^0f_?roJm!}*CR*?JpFFUUa{{{17Zzvt)YXVVItSvXEF9H$nJaT`Mc3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O z0SZun0u-RYj}?e({bTYmKK>=GU(@Q2PtwF~r4>?3{WB@{0f>of{^Pc}g=_xfYZwYp zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O z0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2DazCgb2KVQ=ykFYqczphHPMc>;2 zpu4^|KvUWYV6m2Hsg`NE3RAcFipo_YsHeJ>gT~)iT z>AG&{rf%uB?x;g|bx-&8Ko9juk5$nVJ=HUH>bYL%rMmP=uk}W6^-k~gK~;U!r=JhT zS5SZg|Dyse|IE+V(&;guDf98ayP<@KWskFHOn51+>`MaEgmDwm!*$m z!u!(7zC18Z_+VPu3xR3EN7BlkHgKs=_;^~`SJt(0|0(lT=CkIj%?s(l^5d*AFPg74 zFPpD3ub4NR_n5CY?>FCIK4iYpeAIlC`Gomq^J()f=5yv-&70HDNA8f1>8@%u-)8r3 zH{WjFWxm6_*LfcY--Ve{SQW9G5TPfUEDljg<1G~qMmd+OToc#Dvq=U(#`^L^$e z^Zn)><_FBH<_FFD%nz9lnzx#dm>)JDH$P%NWq#Cr*8G@xA^kGv4#&-l<|oX{<|oZ7 z=5elwiT*w2r|R19e)H4jL*{49N6qstP4V~>=4b8x)8^;Q=giNWH>Z2IW9I1%ZhfM|q-29&Tl=*%0S@Q?xh4dnn|KA^)7tJ4; zm(3rWSIjHsJ?2l$`^}%451BtRA2shZpD=%JK5hQOe9ru(d2@Qv%^kYTTg_jYx0}B< z?=pX5-fRBWe8Bvj`LOwW^D*-e=9A`C^BMDx=IKwpug~|Bd5d|sx$hpgz#J$*0SZun z0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP V3Q&Lo6rcbFzEhz5XXAR-e*uNvj57cL diff --git a/tests/data/test_without_secondary_indexes/.wt.data b/tests/data/test_without_secondary_indexes/.wt.data index 555d8cb0e26c8125bc3bdf5a8ffd9d76d658d065..a9fdec9f3cc2f07a962b85ef3843812e2a114671 100644 GIT binary patch literal 19580 zcmeI3$BtB200jpy!DKKwXWN(@OvWbXU=wYENh84qBp4*~E&P%fUU=yx|1oqkRUjRH z;ausdx~uNn9QC#*jeLD34bImqR;w@I0D5GO}#q%7{&(KlkDN;gJ*HzPvvL zA+vvjklCjqWcFzYnSDA!W}ktO*}p}|>@yKE`z(aaKA8H`k=f@UWcIlTnSCBYW}lCc z*%u&W_Js(U{X2xrz6c?+FGk4hOAs>qQiROz?_6eIj*!_`AY}HH2$_8qLS|o$klEKD zWcIZPnSC8XW?zqx**73$_KgUceG@`v-;9viw;*Kp?-4Tl4+xq4M}*A26(O_#gpk>{ zA!PRL2$_8cLT2BIklA-3WcJ+%nSBpJX5WjD+4mu2_WcN%{bz*CegGk}A4JIPhY&LR zVT8}L@&`#FTn zejXvSUqHz07ZEc1C4|g=86mS@LCEY^5i&A+!I6klBAn$m}-|GW$)0%zg_Y zv)@L@>~|0{`(1?0{s%&4{}Umz-$Tgk_YpGt1BA@}5FxYwg^<}FA!PQ)2$}r}LS}!8 GkoJ$UU{8nu delta 406 zcmWN|yG=wf5I|8DIiUgc0uTuSUGUf)VEL5cQd|K}D8L0!#FR2+AjAnybaj8f5A}8U z+4jrx)%<(Bp8I|?`@YS=A}QL_Htz{KgGExbqfXEnERs4t94_YV>2AhJWw1z!7H)#h zV38E@wbb`)ckreG!C+G|oNzu+aL1(Z?iuR@xbOwv0Xzw~fXRt_$_MsDW O28*O40bxs`Lyxs{WZb8bmgBqb_K6h)xDkpPnh>>KnQe1X1*fjNQ~hT&x| zy)@tj{nwBMe6X#B)_w~pHs_gu!aKtrZn|dZ-(yD5K+K`B@rmJYC+g4NPYjNYe*W}v zrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O z0SZun0u-PC1t>rP3Q&Lo6rjL=7f9For;(58_pLF%i8-2X#7Os5i==h^TT;62KaF&a zf4c9_>1+Jca~Kq$00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+ zfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiJTeSv)4f4-(a zy~3=Rzg?EbXMSl1fYJWm05f7IfLWTYIhw0^Dl}gUv`~w*SWC22%d}i8v{I|IT5Gg6 zb`Myu4ce$p+N>?H$G|pi*ADH}F74JH?NzCL+OGpTs6#reBRZ;MI<6D36TwNH(rKO1 zS)J2)UC>2c(q&!IRbA6{-Ox?l(rs0`qr1AN0o~UFJ=7yT))NhCNKf@l!+Ne4dZ|}> ztv7nBcY3c8efaudJc9xh_#YKm_t)g)c$^;dnKB)VnE%`jC3$I{&Z21~uVUuM&rTR4 zc^xzRoP;rwH!-u%O&BA28#DX7gfWu$VrDNA#z@}9%sxM1jO0c9$iASjQ(l@cG_TBK zBbfe3uTz^ZHgC+An78Ij&3op{%scbt=0&_8dHpNQOY@cHmH8_3+I+QnW4^|`HD7Dq zGtcXarK>YvZ;vnH!;v4i!MrrzXkM9bGOx`yn>XfL%vYhJ|3E3f~Yd1-#$yfVLFUYlPuZ_F>5x90H=Zh!PY|K?ZBJM*jNMVw^w`mdRn z=GVrP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt P00q8K;LOic*R%c&anh1U literal 49146 zcmeI)IZTyV7{>7jSKM*Ob#TWWb=2#QJ18m&%5?{kglUuvOu}@Ol$0dXP*PG-(o8~0 zNimsFQc{vk!JPMAj(IX_VLkshIezfFOT2&KyVv4775aKA5;djO+SS|F_Df&)=imBT zx;j69`q=U7?~R{6cJ+7lmL9Yu{rN{)e=RI5%%v4LyLg;gJWekj<2Hr@6rcbFC_n)U zP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZ zKmiI+fC3bt00k&O0SZun0u-PC1t>s)Zx@Jb{bTYmzWyn#pVR7$Z_>nVb*rT6(sxo^ z_a772_{VMYi`V$aV;BlhfC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epy zpa2CZKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2DaygZ?H;g0JGD!@wMTo?9s@Pnr~NvhgF2+cI-*(~)iE8{37ym_omQRB=&a7Aoe0kB zf-b6{OS-Hpx~glst{bY?P2JLM-O*j$(|t9lQ4jP`O?srqdZK1M)ibrIRnPT8Z7S-e zUg@>o=&j!Az1r2G4?iA^M^JzQ|Dyuc|12!@rqg3SQ|9A;cS8ve%N}RZnDDx^vM&ov z6W)+k_T_X#{%<~8Q)%?sum%$v=t%)8Aunh%(7G9NbIY(8eb#eC9ytND!iHuHJ& z?dDbK=OcH>$8=ZKn(wgt*PHJ&FPiT%?=jzPK4`wje8haO`M7!P@)Hw(&nfeoz%=2r z=KD%o_V|d9U*~@FYV!l;b>;`n8_W-xx0@d}?>9eUK4e~NK5Bl{e8T*g`Ly|Q^EvYq z=9TG}Id?c|USodBykLIXyxBa?6*1Aj+x$#P%RXR!)_mCfocWk}-lZv?f71NC-G9dX zg897pMf0k3^2!|w=C$UR%}pH6Jv;WGSMST~ z{a{{g-f8Z;$1O4k3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt00k&O0SZun0u-PC m1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZK!NWRDEwKzp7me2HIl^u diff --git a/tests/persistence/read.rs b/tests/persistence/read.rs index 7b65805..4c2fc05 100644 --- a/tests/persistence/read.rs +++ b/tests/persistence/read.rs @@ -49,7 +49,7 @@ async fn test_primary_index_parse() { let mut key = 1; let length = 24; - let mut offset = 0; + let mut offset = 8; let page_id = 1.into(); for val in &index.inner.index_values[..index.inner.current_length as usize] { @@ -64,7 +64,7 @@ async fn test_primary_index_parse() { ); key += 1; - offset += length; + offset += length + 8; } } @@ -86,7 +86,7 @@ async fn test_another_idx_index_parse() { let mut key = 1; let length = 24; - let mut offset = 0; + let mut offset = 8; let page_id = 1.into(); for val in &index.inner.index_values[..index.inner.current_length as usize] { @@ -101,7 +101,7 @@ async fn test_another_idx_index_parse() { ); key += 1; - offset += length; + offset += length + 8; } } @@ -121,7 +121,7 @@ async fn test_data_parse() { assert_eq!(data.header.previous_id, 0.into()); assert_eq!(data.header.next_id, 0.into()); assert_eq!(data.header.page_type, PageType::Data); - assert_eq!(data.header.data_length, 2376); + assert_eq!(data.header.data_length, 3168); } #[tokio::test] diff --git a/tests/persistence/toc/read.rs b/tests/persistence/toc/read.rs index 1f54a76..2ef1110 100644 --- a/tests/persistence/toc/read.rs +++ b/tests/persistence/toc/read.rs @@ -46,7 +46,7 @@ async fn test_index_table_of_contents_read_from_space() { 99, Link { page_id: 1.into(), - offset: 2352, + offset: 3144, length: 24 } )), diff --git a/tests/worktable/base.rs b/tests/worktable/base.rs index dacc014..a7de57f 100644 --- a/tests/worktable/base.rs +++ b/tests/worktable/base.rs @@ -1196,12 +1196,12 @@ async fn test_update_by_pk() { } //#[test] -fn _bench() { +fn bench() { let table = TestWorkTable::default(); - let mut v = Vec::with_capacity(10000); + let mut v = Vec::with_capacity(100_000); - for i in 0..10000 { + for i in 0..100_000 { let row = TestRow { id: table.get_next_pk().into(), test: i + 1, From 2094fd062c8110eb1c2b40e9c439f268c4ab8350 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 13 Aug 2025 16:59:13 +0300 Subject: [PATCH 07/12] fix length update on rewrite --- src/in_memory/data.rs | 64 ++++++++++++++++++++++++++++++++++++++--- tests/worktable/base.rs | 2 +- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index ae6c353..46986f9 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -171,9 +171,10 @@ impl Data { { let bytes = rkyv::to_bytes(row).map_err(|_| ExecutionError::SerializeError)?; let length = bytes.len() as u32; + let length_bytes = rkyv::to_bytes::(&RowLength(length as u64)) + .map_err(|_| ExecutionError::SerializeError)?; let link_left = if link.length > length { - link.length = length; Some(Link { page_id: link.page_id, offset: link.offset + length, @@ -184,8 +185,11 @@ impl Data { } else { return Err(ExecutionError::InvalidLink); }; - + link.length = length; let inner_data = unsafe { &mut *self.inner_data.get() }; + inner_data[link.offset as usize - RowLength::default_aligned_size()..] + [..RowLength::default_aligned_size()] + .copy_from_slice(length_bytes.as_slice()); inner_data[link.offset as usize..][..link.length as usize] .copy_from_slice(bytes.as_slice()); @@ -275,8 +279,10 @@ pub enum ExecutionError { #[cfg(test)] mod tests { - use crate::in_memory::data::{Data, ExecutionError, INNER_PAGE_SIZE, RowLength}; - use data_bucket::DefaultSizeMeasurable; + use crate::in_memory::data::{ + ArchivedRowLength, Data, ExecutionError, INNER_PAGE_SIZE, RowLength, + }; + use data_bucket::{DefaultSizeMeasurable, Link}; use rkyv::{Archive, Deserialize, Serialize}; use std::sync::atomic::Ordering; use std::sync::{Arc, mpsc}; @@ -291,6 +297,15 @@ mod tests { b: u64, } + #[derive( + Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, + )] + #[rkyv(compare(PartialEq), derive(Debug))] + struct UnsizedTestRow { + a: u64, + b: String, + } + #[test] fn data_page_length_valid() { let data = Data::<()>::new(1.into()); @@ -338,6 +353,47 @@ mod tests { assert_eq!(archived, &new_row) } + #[test] + fn data_page_overwrite_row_unsized() { + let page = Data::::new(1.into()); + let row = UnsizedTestRow { + a: 10, + b: "SomeString__________2000".to_string(), + }; + + let link = page.save_row(&row).unwrap(); + + let new_row = UnsizedTestRow { + a: 20, + b: "SomeString".to_string(), + }; + let (res_link, left_link) = unsafe { page.save_row_by_link(&new_row, link) }.unwrap(); + + assert_ne!(res_link, link); + assert_eq!( + left_link, + Some(Link { + page_id: link.page_id, + offset: 40, + length: 8 + }) + ); + assert_eq!(left_link.unwrap().offset, res_link.offset + res_link.length); + assert_eq!(left_link.unwrap().length, link.length - res_link.length); + + let inner_data = unsafe { &mut *page.inner_data.get() }; + let bytes = + &inner_data[res_link.offset as usize..(res_link.offset + res_link.length) as usize]; + let archived = unsafe { rkyv::access_unchecked::(bytes) }; + assert_eq!(archived, &new_row); + + let length_bytes = &inner_data[res_link.offset as usize - RowLength::default_aligned_size() + ..res_link.offset as usize]; + let archived = unsafe { rkyv::access_unchecked::(length_bytes) }; + let length = rkyv::deserialize::(archived).unwrap(); + assert_eq!(length.0 as u32, res_link.length); + } + #[test] fn data_page_full() { let page = Data::::new(1.into()); diff --git a/tests/worktable/base.rs b/tests/worktable/base.rs index a7de57f..2ea6aa2 100644 --- a/tests/worktable/base.rs +++ b/tests/worktable/base.rs @@ -1196,7 +1196,7 @@ async fn test_update_by_pk() { } //#[test] -fn bench() { +fn _bench() { let table = TestWorkTable::default(); let mut v = Vec::with_capacity(100_000); From 4c5bc7d85779a8309c13a4e67b8660b457ee9a62 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 13 Aug 2025 17:46:10 +0300 Subject: [PATCH 08/12] implement get_next_lying_row fn --- src/in_memory/data.rs | 76 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index 46986f9..0d30ca4 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -230,6 +230,44 @@ impl Data { Ok(unsafe { rkyv::access_unchecked::<::Archived>(bytes) }) } + pub fn get_next_lying_row_ref( + &self, + link: Link, + ) -> Result<(&::Archived, Link), ExecutionError> + where + Row: Archive, + ::Archived: Deserialize>, + { + if link.offset > self.free_offset.load(Ordering::Acquire) + || link.offset + link.length + RowLength::default_aligned_size() as u32 + > self.free_offset.load(Ordering::Acquire) + { + return Err(ExecutionError::DeserializeError); + } + + let inner_data = unsafe { &*self.inner_data.get() }; + let length_bytes = &inner_data[(link.offset + link.length) as usize..] + [..RowLength::default_aligned_size()]; + let archived = + unsafe { rkyv::access_unchecked::<::Archived>(length_bytes) }; + let length = rkyv::deserialize::(archived) + .map_err(|_| ExecutionError::DeserializeError)? + .0; + let offset = (link.offset + link.length) as usize + RowLength::default_aligned_size(); + let bytes = &inner_data[offset..][..length as usize]; + + let row_link = Link { + page_id: link.page_id, + offset: link.offset + link.length + RowLength::default_aligned_size() as u32, + length: length as u32, + }; + + Ok(( + unsafe { rkyv::access_unchecked::<::Archived>(bytes) }, + row_link, + )) + } + pub fn get_row(&self, link: Link) -> Result where Row: Archive, @@ -240,6 +278,19 @@ impl Data { .map_err(|_| ExecutionError::DeserializeError) } + pub fn get_next_lying_row(&self, link: Link) -> Result<(Row, Link), ExecutionError> + where + Row: Archive, + ::Archived: Deserialize>, + { + let (row, row_link) = self.get_next_lying_row_ref(link)?; + Ok(( + rkyv::deserialize::<_, rkyv::rancor::Error>(row) + .map_err(|_| ExecutionError::DeserializeError)?, + row_link, + )) + } + pub fn get_raw_row(&self, link: Link) -> Result, ExecutionError> { if link.offset > self.free_offset.load(Ordering::Acquire) { return Err(ExecutionError::DeserializeError); @@ -279,15 +330,17 @@ pub enum ExecutionError { #[cfg(test)] mod tests { - use crate::in_memory::data::{ - ArchivedRowLength, Data, ExecutionError, INNER_PAGE_SIZE, RowLength, - }; - use data_bucket::{DefaultSizeMeasurable, Link}; - use rkyv::{Archive, Deserialize, Serialize}; use std::sync::atomic::Ordering; use std::sync::{Arc, mpsc}; use std::thread; + use data_bucket::{DefaultSizeMeasurable, Link}; + use rkyv::{Archive, Deserialize, Serialize}; + + use crate::in_memory::data::{ + ArchivedRowLength, Data, ExecutionError, INNER_PAGE_SIZE, RowLength, + }; + #[derive( Archive, Copy, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, )] @@ -501,6 +554,19 @@ mod tests { assert_eq!(deserialized, row) } + #[test] + fn data_page_get_next_row() { + let page = Data::::new(1.into()); + let row1 = TestRow { a: 10, b: 20 }; + let link1 = page.save_row(&row1).unwrap(); + let row2 = TestRow { a: 40, b: 50 }; + let link2 = page.save_row(&row2).unwrap(); + + let (deserialized, res_link) = page.get_next_lying_row(link1).unwrap(); + assert_eq!(deserialized, row2); + assert_eq!(res_link, link2) + } + #[test] fn multithread() { let page = Data::::new(1.into()); From 4590fa68ae212d6feb38e0aa5939d764527f6bf6 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 13 Aug 2025 17:50:24 +0300 Subject: [PATCH 09/12] update gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 9d11d1e..a88387b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ Cargo.lock tests/data/sync/ /tests/data/unsized_secondary_sync/ /tests/data/unsized_primary_sync/ +/tests/data/key/ +/tests/data/uuid/ +/tests/data/concurrent From 07377a62e740ee94efc37f01a8914a63ffb910dc Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 13 Aug 2025 20:49:06 +0300 Subject: [PATCH 10/12] fix links deletion --- src/in_memory/data.rs | 28 +++++++++++--- .../empty_links_registry/unsized_.rs | 5 ++- src/in_memory/mod.rs | 2 +- src/in_memory/pages.rs | 13 ++++--- src/table/defragmentator/mod.rs | 13 +++++-- src/table/mod.rs | 2 +- tests/worktable/base.rs | 5 ++- tests/worktable/unsized_.rs | 37 +++++++++++++------ 8 files changed, 75 insertions(+), 30 deletions(-) diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index 0d30ca4..7a87452 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -169,23 +169,28 @@ impl Data { Strategy, Share>, rkyv::rancor::Error>, >, { + // links in this fn should be used from EmptyLinkRegistry (returned after `delete` method) + let bytes = rkyv::to_bytes(row).map_err(|_| ExecutionError::SerializeError)?; let length = bytes.len() as u32; let length_bytes = rkyv::to_bytes::(&RowLength(length as u64)) .map_err(|_| ExecutionError::SerializeError)?; - let link_left = if link.length > length { + let link_left = if link.length > length + RowLength::default_aligned_size() as u32 { Some(Link { page_id: link.page_id, - offset: link.offset + length, - length: link.length - length, + offset: link.offset + length + RowLength::default_aligned_size() as u32, + length: link.length - (length + RowLength::default_aligned_size() as u32), }) - } else if link.length == length { + } else if link.length == length + RowLength::default_aligned_size() as u32 { None } else { return Err(ExecutionError::InvalidLink); }; + + link.offset += RowLength::default_aligned_size() as u32; link.length = length; + let inner_data = unsafe { &mut *self.inner_data.get() }; inner_data[link.offset as usize - RowLength::default_aligned_size()..] [..RowLength::default_aligned_size()] @@ -301,6 +306,15 @@ impl Data { Ok(bytes.to_vec()) } + pub fn delete_row(mut link: Link) -> Link { + // we don't remove data on delete, but we need to append space allocated + // for length bytes to link + link.offset -= RowLength::default_aligned_size() as u32; + link.length += RowLength::default_aligned_size() as u32; + + link + } + pub fn get_bytes(&self) -> [u8; DATA_LENGTH] { let data = unsafe { &*self.inner_data.get() }; data.0 @@ -394,9 +408,10 @@ mod tests { let row = TestRow { a: 10, b: 20 }; let link = page.save_row(&row).unwrap(); + let res_link = Data::::delete_row(link); let new_row = TestRow { a: 20, b: 20 }; - let res = unsafe { page.save_row_by_link(&new_row, link) }.unwrap(); + let res = unsafe { page.save_row_by_link(&new_row, res_link) }.unwrap(); assert_eq!(res, (link, None)); @@ -415,12 +430,13 @@ mod tests { }; let link = page.save_row(&row).unwrap(); + let res_link = Data::::delete_row(link); let new_row = UnsizedTestRow { a: 20, b: "SomeString".to_string(), }; - let (res_link, left_link) = unsafe { page.save_row_by_link(&new_row, link) }.unwrap(); + let (res_link, left_link) = unsafe { page.save_row_by_link(&new_row, res_link) }.unwrap(); assert_ne!(res_link, link); assert_eq!( diff --git a/src/in_memory/empty_links_registry/unsized_.rs b/src/in_memory/empty_links_registry/unsized_.rs index b9a48c1..e020b17 100644 --- a/src/in_memory/empty_links_registry/unsized_.rs +++ b/src/in_memory/empty_links_registry/unsized_.rs @@ -1,6 +1,7 @@ -use data_bucket::Link; +use data_bucket::{DefaultSizeMeasurable, Link}; use crate::IndexMultiMap; +use crate::in_memory::RowLength; use crate::in_memory::empty_links_registry::EmptyLinksRegistry; #[derive(Debug, Default)] @@ -13,7 +14,7 @@ impl EmptyLinksRegistry for UnsizedEmptyLinkRegistry { fn find_link_with_length(&self, size: u32) -> Option { if let Some(link) = self.0.remove_max().map(|(_, l)| l) { - if link.length < size { + if link.length < size + RowLength::default_aligned_size() as u32 { self.0.insert(link.length, link); None } else { diff --git a/src/in_memory/mod.rs b/src/in_memory/mod.rs index 99fd621..b299cbb 100644 --- a/src/in_memory/mod.rs +++ b/src/in_memory/mod.rs @@ -3,7 +3,7 @@ mod empty_links_registry; mod pages; mod row; -pub use data::{DATA_INNER_LENGTH, Data, ExecutionError as DataExecutionError}; +pub use data::{DATA_INNER_LENGTH, Data, ExecutionError as DataExecutionError, RowLength}; pub use empty_links_registry::{ EmptyLinksRegistry, SizedEmptyLinkRegistry, UnsizedEmptyLinkRegistry, }; diff --git a/src/in_memory/pages.rs b/src/in_memory/pages.rs index f010367..4014b06 100644 --- a/src/in_memory/pages.rs +++ b/src/in_memory/pages.rs @@ -342,8 +342,9 @@ where } pub fn delete(&self, link: Link) -> Result, ExecutionError> { - self.empty_links.add_empty_link(link); - Ok(vec![EmptyLinkRegistryOperation::Add(link)]) + let res_link = Data::::delete_row(link); + self.empty_links.add_empty_link(res_link); + Ok(vec![EmptyLinkRegistryOperation::Add(res_link)]) } pub fn select_raw(&self, link: Link) -> Result, ExecutionError> { @@ -405,7 +406,7 @@ mod tests { use crate::in_memory::data::RowLength; use crate::in_memory::pages::DataPages; - use crate::in_memory::{PagesExecutionError, RowWrapper, StorableRow}; + use crate::in_memory::{Data, PagesExecutionError, RowWrapper, StorableRow}; use crate::prelude::{ EmptyLinksRegistry, SizeMeasurable, SizeMeasure, SizedEmptyLinkRegistry, align, }; @@ -536,9 +537,11 @@ mod tests { assert_eq!( pages.empty_links.find_link_with_length(link.length), - Some(link) + Some(Data::::delete_row(link)) ); - pages.empty_links.add_empty_link(link); + pages + .empty_links + .add_empty_link(Data::::delete_row(link)); let row = TestRow { a: 20, b: 20 }; let new_link = pages.insert(row).unwrap(); diff --git a/src/table/defragmentator/mod.rs b/src/table/defragmentator/mod.rs index eda35eb..f3631f6 100644 --- a/src/table/defragmentator/mod.rs +++ b/src/table/defragmentator/mod.rs @@ -35,7 +35,11 @@ pub struct Defragmentator< PkGen, NodeType, const DATA_LENGTH: usize, -> { +> where + PrimaryKey: Clone + Ord + Send + 'static + std::hash::Hash, + Row: StorableRow + Send + Clone + 'static, + NodeType: NodeLike> + Send + 'static, +{ /// [`WorkTable`] to work with. table: Arc< WorkTable< @@ -118,7 +122,10 @@ where &self, mapped_links: HashMap>, ) -> eyre::Result<()> { - for (page_id, links) in mapped_links { + for (page_id, mut links) in mapped_links { + // sorting `Link`s in ascending order. + links.sort_by(|l1, l2| l1.offset.cmp(&l2.offset)); + let lock_id = self.lock_map.next_id(); let lock = Arc::new(RwLock::new(Lock::new(lock_id))); self.lock_map.insert(page_id, lock).expect( @@ -133,7 +140,7 @@ where let mut map = HashMap::new(); for link in empty_links { map.entry(link.page_id) - .and_modify(|v| v.push(link)) + .and_modify(|v: &mut Vec<_>| v.push(link)) .or_insert(vec![link]); } map diff --git a/src/table/mod.rs b/src/table/mod.rs index 3d09d0c..6c8a517 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,4 +1,4 @@ -//mod defragmentator; +mod defragmentator; pub mod select; pub mod system_info; diff --git a/tests/worktable/base.rs b/tests/worktable/base.rs index 2ea6aa2..45f3ce9 100644 --- a/tests/worktable/base.rs +++ b/tests/worktable/base.rs @@ -199,7 +199,10 @@ async fn update_string() { let selected_row = table.select(pk).unwrap(); assert_eq!(selected_row, updated); - assert_eq!(table.0.data.get_empty_links().first().unwrap(), &first_link); + assert_eq!( + table.0.data.get_empty_links().first().unwrap(), + &Data::::delete_row(first_link) + ); assert!(table.select(2).is_none()) } diff --git a/tests/worktable/unsized_.rs b/tests/worktable/unsized_.rs index 834b0de..3b0c738 100644 --- a/tests/worktable/unsized_.rs +++ b/tests/worktable/unsized_.rs @@ -62,7 +62,10 @@ async fn test_update_string_full_row() { exchange: "bigger test to test string update".to_string(), } ); - assert_eq!(table.0.data.get_empty_links().first().unwrap(), &first_link) + assert_eq!( + table.0.data.get_empty_links().first().unwrap(), + &Data::::delete_row(first_link) + ) } #[tokio::test] @@ -93,7 +96,10 @@ async fn test_update_string_by_unique() { exchange: "bigger test to test string update".to_string(), } ); - assert_eq!(table.0.data.get_empty_links().first().unwrap(), &first_link) + assert_eq!( + table.0.data.get_empty_links().first().unwrap(), + &Data::::delete_row(first_link) + ) } #[tokio::test] @@ -124,7 +130,10 @@ async fn test_update_string_by_pk() { exchange: "bigger test to test string update".to_string(), } ); - assert_eq!(table.0.data.get_empty_links().first().unwrap(), &first_link) + assert_eq!( + table.0.data.get_empty_links().first().unwrap(), + &Data::::delete_row(first_link) + ) } #[tokio::test] @@ -175,8 +184,8 @@ async fn test_update_string_by_non_unique() { ); let empty_links = table.0.data.get_empty_links(); assert_eq!(empty_links.len(), 2); - assert!(empty_links.contains(&first_link)); - assert!(empty_links.contains(&second_link)) + assert!(empty_links.contains(&Data::::delete_row(first_link))); + assert!(empty_links.contains(&Data::::delete_row(second_link))) } #[tokio::test] @@ -350,7 +359,10 @@ async fn test_update_many_strings_by_unique() { other_srting: "other".to_string(), } ); - assert_eq!(table.0.data.get_empty_links().first().unwrap(), &first_link) + assert_eq!( + table.0.data.get_empty_links().first().unwrap(), + &Data::::delete_row(first_link) + ) } #[tokio::test] @@ -386,7 +398,10 @@ async fn test_update_many_strings_by_pk() { other_srting: "other".to_string(), } ); - assert_eq!(table.0.data.get_empty_links().first().unwrap(), &first_link) + assert_eq!( + table.0.data.get_empty_links().first().unwrap(), + &Data::::delete_row(first_link) + ) } #[tokio::test] @@ -449,8 +464,8 @@ async fn test_update_many_strings_by_non_unique() { ); let empty_links = table.0.data.get_empty_links(); assert_eq!(empty_links.len(), 2); - assert!(empty_links.contains(&first_link)); - assert!(empty_links.contains(&second_link)) + assert!(empty_links.contains(&Data::::delete_row(first_link))); + assert!(empty_links.contains(&Data::::delete_row(second_link))) } #[tokio::test] @@ -513,8 +528,8 @@ async fn test_update_many_strings_by_string() { ); let empty_links = table.0.data.get_empty_links(); assert_eq!(empty_links.len(), 2); - assert!(empty_links.contains(&first_link)); - assert!(empty_links.contains(&second_link)) + assert!(empty_links.contains(&Data::::delete_row(first_link))); + assert!(empty_links.contains(&Data::::delete_row(second_link))) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] From fce55d9e4a4cb1c068909df6c831b3111e68e513 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Thu, 14 Aug 2025 22:46:44 +0300 Subject: [PATCH 11/12] some first implementation to test --- src/in_memory/data.rs | 6 +- src/in_memory/pages.rs | 44 +++++--- src/table/defragmentator/mod.rs | 175 +++++++++++++++++++++++++++----- 3 files changed, 184 insertions(+), 41 deletions(-) diff --git a/src/in_memory/data.rs b/src/in_memory/data.rs index 7a87452..427e09d 100644 --- a/src/in_memory/data.rs +++ b/src/in_memory/data.rs @@ -247,7 +247,7 @@ impl Data { || link.offset + link.length + RowLength::default_aligned_size() as u32 > self.free_offset.load(Ordering::Acquire) { - return Err(ExecutionError::DeserializeError); + return Err(ExecutionError::InvalidLink); } let inner_data = unsafe { &*self.inner_data.get() }; @@ -319,6 +319,10 @@ impl Data { let data = unsafe { &*self.inner_data.get() }; data.0 } + + pub(crate) fn set_free_offset(&self, offset: u32) { + self.free_offset.store(offset, Ordering::Relaxed) + } } /// Error that can appear on [`Data`] page operations. diff --git a/src/in_memory/pages.rs b/src/in_memory/pages.rs index 4014b06..bd18eec 100644 --- a/src/in_memory/pages.rs +++ b/src/in_memory/pages.rs @@ -1,12 +1,13 @@ use std::{ fmt::Debug, + sync::Arc, sync::atomic::{AtomicU32, AtomicU64, Ordering}, - sync::{Arc, RwLock}, }; use data_bucket::SizeMeasurable; use data_bucket::page::PageId; use derive_more::{Display, Error, From}; +use parking_lot::RwLock; use rkyv::{ Archive, Deserialize, Portable, Serialize, api::high::HighDeserializer, @@ -136,7 +137,7 @@ where let mut empty_link_registry_ops = vec![]; empty_link_registry_ops.push(EmptyLinkRegistryOperation::Remove(link)); - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let current_page: usize = page_id_mapper(link.page_id.into()); let page = &pages[current_page]; @@ -163,7 +164,7 @@ where loop { let (link, tried_page) = { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let current_page = page_id_mapper(self.current_page_id.load(Ordering::Acquire) as usize); let page = &pages[current_page]; @@ -216,7 +217,7 @@ where } fn add_next_page(&self, tried_page: usize) { - let mut pages = self.pages.write().expect("lock should be not poisoned"); + let mut pages = self.pages.write(); if tried_page == page_id_mapper(self.current_page_id.load(Ordering::Acquire) as usize) { let index = self.last_page_id.fetch_add(1, Ordering::AcqRel) + 1; @@ -234,9 +235,8 @@ where <::WrappedRow as Archive>::Archived: Portable + Deserialize<::WrappedRow, HighDeserializer>, { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let page = pages - // - 1 is used because page ids are starting from 1. .get(page_id_mapper(link.page_id.into())) .ok_or(ExecutionError::PageNotFound(link.page_id))?; let gen_row = page.get_row(link).map_err(ExecutionError::DataPageError)?; @@ -252,9 +252,8 @@ where <::WrappedRow as Archive>::Archived: Portable + Deserialize<::WrappedRow, HighDeserializer>, { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let page = pages - // - 1 is used because page ids are starting from 1. .get(page_id_mapper(link.page_id.into())) .ok_or(ExecutionError::PageNotFound(link.page_id))?; let gen_row = page.get_row(link).map_err(ExecutionError::DataPageError)?; @@ -272,7 +271,7 @@ where >, Op: Fn(&<::WrappedRow as Archive>::Archived) -> Res, { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let page = pages .get::(page_id_mapper(link.page_id.into())) .ok_or(ExecutionError::PageNotFound(link.page_id))?; @@ -297,7 +296,7 @@ where <::WrappedRow as Archive>::Archived: Portable, Op: FnMut(&mut <::WrappedRow as Archive>::Archived) -> Res, { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let page = pages .get(page_id_mapper(link.page_id.into())) .ok_or(ExecutionError::PageNotFound(link.page_id))?; @@ -328,7 +327,7 @@ where Strategy, Share>, rkyv::rancor::Error>, >, { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let page = pages .get(page_id_mapper(link.page_id.into())) .ok_or(ExecutionError::PageNotFound(link.page_id))?; @@ -348,7 +347,7 @@ where } pub fn select_raw(&self, link: Link) -> Result, ExecutionError> { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); let page = pages .get(page_id_mapper(link.page_id.into())) .ok_or(ExecutionError::PageNotFound(link.page_id))?; @@ -357,7 +356,7 @@ where } pub fn get_bytes(&self) -> Vec<([u8; DATA_LENGTH], u32)> { - let pages = self.pages.read().unwrap(); + let pages = self.pages.read(); pages .iter() .map(|p| (p.get_bytes(), p.free_offset.load(Ordering::Relaxed))) @@ -365,7 +364,7 @@ where } pub fn get_page_count(&self) -> usize { - self.pages.read().unwrap().len() + self.pages.read().len() } pub fn get_empty_links(&self) -> Vec { @@ -381,6 +380,18 @@ where self } + + pub(crate) fn get_page( + &self, + id: PageId, + ) -> Result::WrappedRow, DATA_LENGTH>>, ExecutionError> { + let pages = self.pages.read(); + let page = pages + .get(page_id_mapper(id.into())) + .ok_or(ExecutionError::PageNotFound(id))?; + + Ok(page.clone()) + } } #[derive(Debug, Display, Error, From, PartialEq)] @@ -396,14 +407,15 @@ pub enum ExecutionError { #[cfg(test)] mod tests { - use data_bucket::DefaultSizeMeasurable; - use rkyv::{Archive, Deserialize, Serialize}; use std::collections::HashSet; use std::sync::atomic::Ordering; use std::sync::{Arc, RwLock}; use std::thread; use std::time::Instant; + use data_bucket::DefaultSizeMeasurable; + use rkyv::{Archive, Deserialize, Serialize}; + use crate::in_memory::data::RowLength; use crate::in_memory::pages::DataPages; use crate::in_memory::{Data, PagesExecutionError, RowWrapper, StorableRow}; diff --git a/src/table/defragmentator/mod.rs b/src/table/defragmentator/mod.rs index f3631f6..fb8a584 100644 --- a/src/table/defragmentator/mod.rs +++ b/src/table/defragmentator/mod.rs @@ -1,17 +1,26 @@ -use std::collections::HashMap; -use std::fmt::Debug; -use std::sync::Arc; - -use data_bucket::Link; use data_bucket::page::PageId; +use data_bucket::{Link, SizeMeasurable}; use indexset::core::node::NodeLike; use indexset::core::pair::Pair; +use rkyv::api::high::HighDeserializer; +use rkyv::rancor::Strategy; +use rkyv::ser::Serializer; +use rkyv::ser::allocator::ArenaHandle; +use rkyv::ser::sharing::Share; +use rkyv::util::AlignedVec; +use rkyv::{Archive, Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; +use std::sync::Arc; +use std::sync::atomic::Ordering; use tokio::sync::{Notify, RwLock}; -use crate::WorkTable; -use crate::in_memory::{EmptyLinksRegistry, RowWrapper, StorableRow}; +use crate::in_memory::{ + DataExecutionError, EmptyLinksRegistry, GhostWrapper, RowWrapper, StorableRow, +}; use crate::lock::LockMap; use crate::prelude::{Lock, TablePrimaryKey}; +use crate::{TableRow, TableSecondaryIndex, WorkTable, WorkTableError}; pub struct DefragmentatorTask { task_handle: tokio::task::AbortHandle, @@ -90,11 +99,23 @@ impl< where PrimaryKey: Debug + Clone + Ord + Send + TablePrimaryKey + std::hash::Hash, EmptyLinks: Default + EmptyLinksRegistry, - SecondaryIndexes: Default, + SecondaryIndexes: Default + TableSecondaryIndex, PkGen: Default, + AvailableIndexes: Debug, NodeType: NodeLike> + Send + 'static, - Row: StorableRow + Send + Clone + 'static, + Row: Archive + TableRow + StorableRow + Send + Clone + 'static, ::WrappedRow: RowWrapper, + Row: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + <::WrappedRow as Archive>::Archived: + Deserialize<::WrappedRow, HighDeserializer>, + ::WrappedRow: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + SizeMeasurable, + <::WrappedRow as Archive>::Archived: GhostWrapper, { fn defragment(&self) -> eyre::Result<()> { const SINGLE_LINK_RATIO_TRIGGER: f32 = 0.4; @@ -104,45 +125,151 @@ where let mapped_links = Self::map_empty_links_by_pages(empty_links); let single_link_pages = mapped_links.values().filter(|v| v.len() == 1).count(); - if single_link_pages as f32 / empty_links_len as f32 > SINGLE_LINK_RATIO_TRIGGER { - self.defragment_if_triggered(mapped_links) - } else { - self.defragment_if_not_triggered(mapped_links) - } + let links_left = + if single_link_pages as f32 / empty_links_len as f32 > SINGLE_LINK_RATIO_TRIGGER { + self.defragment_if_triggered(mapped_links) + } else { + self.defragment_if_not_triggered(mapped_links) + }?; + + Ok(()) } fn defragment_if_triggered( &self, - mapped_links: HashMap>, - ) -> eyre::Result<()> { - Ok(()) + mapped_links: HashMap>, + ) -> eyre::Result> { + let mut links_left = vec![]; + Ok(links_left) } fn defragment_if_not_triggered( &self, - mapped_links: HashMap>, - ) -> eyre::Result<()> { + mapped_links: HashMap>, + ) -> eyre::Result> { + let mut links_left = vec![]; + for (page_id, mut links) in mapped_links { // sorting `Link`s in ascending order. - links.sort_by(|l1, l2| l1.offset.cmp(&l2.offset)); + let first_link = links + .iter() + .min_by(|l1, l2| l1.offset.cmp(&l2.offset)) + .copied() + .expect("links should not be empty"); + let mut temporary_removed_rows = vec![]; let lock_id = self.lock_map.next_id(); let lock = Arc::new(RwLock::new(Lock::new(lock_id))); self.lock_map.insert(page_id, lock).expect( "nothing should be returned as this is single defragmentation thread for table", ); + + links.remove(&first_link); + let mut link = first_link; + let page = self.table.data.get_page(page_id)?; + + // removing all rows on page after first link + loop { + let res = page.get_next_lying_row(link); + let (wrapped_row, res_link) = match res { + Ok(res) => res, + Err(e) => match e { + DataExecutionError::InvalidLink => { + break; + } + _ => return Err(e.into()), + }, + }; + + if !links.remove(&res_link) { + let row = wrapped_row.get_inner().clone(); + temporary_removed_rows.push(row.clone()); + self.table + .pk_map + .remove(&row.get_primary_key()) + .expect("should exist as current page is blocked"); + self.table + .indexes + .delete_row(row, res_link) + .expect("should be ok as current page is blocked") + } + + link = res_link; + } + + page.set_free_offset(first_link.offset); + + for row in temporary_removed_rows { + let wrapped_row = Row::WrappedRow::from_inner(row.clone()); + let new_link = page.save_row(&wrapped_row)?; + self.table + .pk_map + .insert(row.get_primary_key(), new_link) + .expect("should not exist as current page was blocked"); + self.table + .indexes + .save_row(row, new_link) + .expect("should be ok as current page was blocked"); + unsafe { self.table.data.with_mut_ref(new_link, |r| r.unghost())? } + } + + let current_offset = page.free_offset.load(Ordering::Relaxed); + page.set_free_offset(DATA_LENGTH as u32); + + let link_left = Link { + page_id, + offset: current_offset, + length: DATA_LENGTH as u32 - current_offset, + }; + + links_left.push(link_left) } - Ok(()) + Ok(links_left) } - fn map_empty_links_by_pages(empty_links: Vec) -> HashMap> { + fn map_empty_links_by_pages(empty_links: Vec) -> HashMap> { let mut map = HashMap::new(); for link in empty_links { map.entry(link.page_id) - .and_modify(|v: &mut Vec<_>| v.push(link)) - .or_insert(vec![link]); + .and_modify(|s: &mut HashSet<_>| { + s.insert(link); + }) + .or_insert({ + let mut s = HashSet::new(); + s.insert(link); + s + }); } + map } } + +#[cfg(test)] +mod tests { + use crate::prelude::*; + use worktable_codegen::worktable; + + worktable! ( + name: Test, + columns: { + id: u64 primary_key autoincrement, + test: i64, + another: u64, + exchange: String + }, + indexes: { + test_idx: test unique, + exchnage_idx: exchange, + another_idx: another, + } + queries: { + delete: { + ByAnother() by another, + ByExchange() by exchange, + ByTest() by test, + } + } + ); +} From a3a72f2b23fdcfa9183a8025729eb0285d1325bc Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 15 Aug 2025 00:17:18 +0300 Subject: [PATCH 12/12] test in progress --- src/table/defragmentator/mod.rs | 68 ++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/src/table/defragmentator/mod.rs b/src/table/defragmentator/mod.rs index fb8a584..1f2a95b 100644 --- a/src/table/defragmentator/mod.rs +++ b/src/table/defragmentator/mod.rs @@ -23,8 +23,6 @@ use crate::prelude::{Lock, TablePrimaryKey}; use crate::{TableRow, TableSecondaryIndex, WorkTable, WorkTableError}; pub struct DefragmentatorTask { - task_handle: tokio::task::AbortHandle, - /// Shared map for locking pages that are in defragmentation progress. lock: Arc>, @@ -117,6 +115,36 @@ where > + SizeMeasurable, <::WrappedRow as Archive>::Archived: GhostWrapper, { + pub fn new( + table: Arc< + WorkTable< + Row, + PrimaryKey, + EmptyLinks, + AvailableTypes, + AvailableIndexes, + SecondaryIndexes, + LockType, + PkGen, + NodeType, + DATA_LENGTH, + >, + >, + ) -> (Self, DefragmentatorTask) { + let s = Self { + table, + lock_map: Arc::new(LockMap::default()), + notify: Arc::new(Notify::new()), + }; + + let t = DefragmentatorTask { + lock: s.lock_map.clone(), + notify: s.notify.clone(), + }; + + (s, t) + } + fn defragment(&self) -> eyre::Result<()> { const SINGLE_LINK_RATIO_TRIGGER: f32 = 0.4; @@ -249,6 +277,9 @@ where #[cfg(test)] mod tests { use crate::prelude::*; + use crate::table::defragmentator::Defragmentator; + use std::collections::HashSet; + use std::sync::Arc; use worktable_codegen::worktable; worktable! ( @@ -272,4 +303,37 @@ mod tests { } } ); + + #[tokio::test] + async fn test_defragmentation_logic() { + let table = Arc::new(TestWorkTable::default()); + + let mut pks = HashSet::new(); + + for i in 0..100 { + let row = TestRow { + id: table.get_next_pk().into(), + test: i, + another: (i as u64) % 20, + exchange: format!("another_{i}"), + }; + let pk = table.insert(row).unwrap(); + pks.insert(pk.0); + } + + let mut removed_pks = HashSet::new(); + while removed_pks.len() != 30 { + let mut id = fastrand::u64(0..100); + while !removed_pks.insert(id) { + id = fastrand::u64(0..100); + } + } + + for pk in removed_pks { + pks.remove(&pk); + table.delete(pk.into()).await.unwrap(); + } + + let (d, _) = Defragmentator::new(table.clone()); + } }