diff --git a/.github/workflows/arm.yml b/.github/workflows/arm.yml index 2e63e6a93..e2aa8c317 100644 --- a/.github/workflows/arm.yml +++ b/.github/workflows/arm.yml @@ -18,7 +18,7 @@ jobs: aws-region: ${{ secrets.AWS_REGION }} - name: Start EC2 runner id: start-ec2-runner - uses: machulav/ec2-github-runner@v2.4.1 + uses: machulav/ec2-github-runner@v2.4.2 with: mode: start github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} @@ -50,7 +50,7 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: ${{ secrets.AWS_REGION }} - name: Stop EC2 runner - uses: machulav/ec2-github-runner@v2.4.1 + uses: machulav/ec2-github-runner@v2.4.2 with: mode: stop github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} diff --git a/.github/workflows/benchmark-runner.yml b/.github/workflows/benchmark-runner.yml index 50f5d785c..534d9f674 100644 --- a/.github/workflows/benchmark-runner.yml +++ b/.github/workflows/benchmark-runner.yml @@ -31,7 +31,7 @@ jobs: aws-region: ${{ secrets.AWS_REGION_BENCHMARK }} - name: Start EC2 runner id: start-ec2-runner - uses: machulav/ec2-github-runner@v2 + uses: machulav/ec2-github-runner@v2.4.2 with: mode: start github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} @@ -102,10 +102,9 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: ${{ secrets.AWS_REGION_BENCHMARK }} - name: Stop EC2 runner - uses: machulav/ec2-github-runner@v2 + uses: machulav/ec2-github-runner@v2.4.2 with: mode: stop github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} label: ${{ needs.start-runner.outputs.runner_label }} ec2-instance-id: ${{ needs.start-runner.outputs.ec2_instance_id }} - diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 2ef2bff72..1da4a084a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -16,7 +16,7 @@ jobs: aws-region: ${{ secrets.AWS_REGION_BENCHMARK }} - name: Start EC2 runner id: start-ec2-runner - uses: machulav/ec2-github-runner@v2 + uses: machulav/ec2-github-runner@v2.4.2 with: mode: start github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} @@ -103,7 +103,7 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: ${{ secrets.AWS_REGION_BENCHMARK }} - name: Stop EC2 runner - uses: machulav/ec2-github-runner@v2 + uses: machulav/ec2-github-runner@v2.4.2 with: mode: stop github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} diff --git a/rust/vecsim-c/src/index.rs b/rust/vecsim-c/src/index.rs index 60195a3e6..86990c175 100644 --- a/rust/vecsim-c/src/index.rs +++ b/rust/vecsim-c/src/index.rs @@ -1099,9 +1099,20 @@ macro_rules! impl_tiered_wrapper { }; } -// Implement wrappers for Tiered indices (f32 only for now) +// Implement wrappers for Tiered indices impl_tiered_wrapper!(TieredSingleF32Wrapper, TieredSingle, f32, false); +impl_tiered_wrapper!(TieredSingleF64Wrapper, TieredSingle, f64, false); +impl_tiered_wrapper!(TieredSingleBF16Wrapper, TieredSingle, BFloat16, false); +impl_tiered_wrapper!(TieredSingleFP16Wrapper, TieredSingle, Float16, false); +impl_tiered_wrapper!(TieredSingleI8Wrapper, TieredSingle, Int8, false); +impl_tiered_wrapper!(TieredSingleU8Wrapper, TieredSingle, UInt8, false); + impl_tiered_wrapper!(TieredMultiF32Wrapper, TieredMulti, f32, true); +impl_tiered_wrapper!(TieredMultiF64Wrapper, TieredMulti, f64, true); +impl_tiered_wrapper!(TieredMultiBF16Wrapper, TieredMulti, BFloat16, true); +impl_tiered_wrapper!(TieredMultiFP16Wrapper, TieredMulti, Float16, true); +impl_tiered_wrapper!(TieredMultiI8Wrapper, TieredMulti, Int8, true); +impl_tiered_wrapper!(TieredMultiU8Wrapper, TieredMulti, UInt8, true); // ============================================================================ // Disk Index Wrappers @@ -1247,8 +1258,13 @@ macro_rules! impl_disk_wrapper { }; } -// Implement wrappers for Disk indices (f32 only for now) +// Implement wrappers for Disk indices impl_disk_wrapper!(DiskSingleF32Wrapper, f32); +impl_disk_wrapper!(DiskSingleF64Wrapper, f64); +impl_disk_wrapper!(DiskSingleBF16Wrapper, BFloat16); +impl_disk_wrapper!(DiskSingleFP16Wrapper, Float16); +impl_disk_wrapper!(DiskSingleI8Wrapper, Int8); +impl_disk_wrapper!(DiskSingleU8Wrapper, UInt8); /// Create a new disk-based index. pub fn create_disk_index(params: &DiskParams) -> Option> { @@ -1257,14 +1273,34 @@ pub fn create_disk_index(params: &DiskParams) -> Option> { let metric = params.base.metric; let dim = params.base.dim; - // Only f32 is supported for now - if data_type != VecSimType::VecSimType_FLOAT32 { - return None; - } - - let index = DiskIndexSingle::::new(rust_params).ok()?; - let wrapper: Box = - Box::new(DiskSingleF32Wrapper::new(index, data_type, metric)); + let wrapper: Box = match data_type { + VecSimType::VecSimType_FLOAT32 => { + let index = DiskIndexSingle::::new(rust_params).ok()?; + Box::new(DiskSingleF32Wrapper::new(index, data_type, metric)) + } + VecSimType::VecSimType_FLOAT64 => { + let index = DiskIndexSingle::::new(rust_params).ok()?; + Box::new(DiskSingleF64Wrapper::new(index, data_type, metric)) + } + VecSimType::VecSimType_BFLOAT16 => { + let index = DiskIndexSingle::::new(rust_params).ok()?; + Box::new(DiskSingleBF16Wrapper::new(index, data_type, metric)) + } + VecSimType::VecSimType_FLOAT16 => { + let index = DiskIndexSingle::::new(rust_params).ok()?; + Box::new(DiskSingleFP16Wrapper::new(index, data_type, metric)) + } + VecSimType::VecSimType_INT8 => { + let index = DiskIndexSingle::::new(rust_params).ok()?; + Box::new(DiskSingleI8Wrapper::new(index, data_type, metric)) + } + VecSimType::VecSimType_UINT8 => { + let index = DiskIndexSingle::::new(rust_params).ok()?; + Box::new(DiskSingleU8Wrapper::new(index, data_type, metric)) + } + // INT32 and INT64 types not supported for disk indices + VecSimType::VecSimType_INT32 | VecSimType::VecSimType_INT64 => return None, + }; Some(Box::new(IndexHandle::new( wrapper, @@ -1540,17 +1576,57 @@ pub fn create_tiered_index(params: &TieredParams) -> Option> { let dim = params.base.dim; let is_multi = params.base.multi; - // Tiered index currently only supports f32 let wrapper: Box = match (data_type, is_multi) { (VecSimType::VecSimType_FLOAT32, false) => Box::new(TieredSingleF32Wrapper::new( TieredSingle::new(rust_params), data_type, )), + (VecSimType::VecSimType_FLOAT64, false) => Box::new(TieredSingleF64Wrapper::new( + TieredSingle::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_BFLOAT16, false) => Box::new(TieredSingleBF16Wrapper::new( + TieredSingle::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_FLOAT16, false) => Box::new(TieredSingleFP16Wrapper::new( + TieredSingle::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_INT8, false) => Box::new(TieredSingleI8Wrapper::new( + TieredSingle::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_UINT8, false) => Box::new(TieredSingleU8Wrapper::new( + TieredSingle::new(rust_params), + data_type, + )), (VecSimType::VecSimType_FLOAT32, true) => Box::new(TieredMultiF32Wrapper::new( TieredMulti::new(rust_params), data_type, )), - _ => return None, // Tiered only supports f32 currently + (VecSimType::VecSimType_FLOAT64, true) => Box::new(TieredMultiF64Wrapper::new( + TieredMulti::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_BFLOAT16, true) => Box::new(TieredMultiBF16Wrapper::new( + TieredMulti::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_FLOAT16, true) => Box::new(TieredMultiFP16Wrapper::new( + TieredMulti::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_INT8, true) => Box::new(TieredMultiI8Wrapper::new( + TieredMulti::new(rust_params), + data_type, + )), + (VecSimType::VecSimType_UINT8, true) => Box::new(TieredMultiU8Wrapper::new( + TieredMulti::new(rust_params), + data_type, + )), + // INT32 and INT64 types not yet supported for vector indices + (VecSimType::VecSimType_INT32, _) | (VecSimType::VecSimType_INT64, _) => return None, }; Some(Box::new(IndexHandle::new( diff --git a/rust/vecsim-c/src/lib.rs b/rust/vecsim-c/src/lib.rs index ed1dec6a7..429e1a80c 100644 --- a/rust/vecsim-c/src/lib.rs +++ b/rust/vecsim-c/src/lib.rs @@ -588,6 +588,16 @@ pub unsafe extern "C" fn VecSimIndex_NewHNSW(params: *const HNSWParams_C) -> *mu } } +/// Convert compat VecSimSvsQuantBits to types VecSimSvsQuantBits. +fn convert_compat_quant_bits(quant: compat::VecSimSvsQuantBits) -> types::VecSimSvsQuantBits { + match quant { + compat::VecSimSvsQuantBits::VecSimSvsQuant_NONE => types::VecSimSvsQuantBits::VecSimSvsQuant_NONE, + compat::VecSimSvsQuantBits::VecSimSvsQuant_Scalar => types::VecSimSvsQuantBits::VecSimSvsQuant_Scalar, + compat::VecSimSvsQuantBits::VecSimSvsQuant_4 => types::VecSimSvsQuantBits::VecSimSvsQuant_4, + compat::VecSimSvsQuantBits::VecSimSvsQuant_8 => types::VecSimSvsQuantBits::VecSimSvsQuant_8, + } +} + /// Create a new SVS (Vamana) index with specific parameters. /// /// # Safety @@ -616,6 +626,7 @@ pub unsafe extern "C" fn VecSimIndex_NewSVS(params: *const SVSParams_C) -> *mut constructionWindowSize: c_params.construction_window_size, searchWindowSize: c_params.search_window_size, twoPassConstruction: true, // Default value + quantBits: convert_compat_quant_bits(c_params.quantBits), }; match create_svs_index(&rust_params) { Some(boxed) => Box::into_raw(boxed) as *mut VecSimIndex, @@ -2253,7 +2264,7 @@ fn load_index_from_file(path: &std::path::Path) -> Option> { true, ))) } - // HNSW Single + // HNSW Single - f32 (IndexTypeId::HnswSingle, DataTypeId::F32) => { use vecsim::index::HnswSingle; let index = HnswSingle::::load_from_file(path).ok()?; @@ -2270,7 +2281,96 @@ fn load_index_from_file(path: &std::path::Path) -> Option> { false, ))) } - // HNSW Multi + // HNSW Single - f64 + (IndexTypeId::HnswSingle, DataTypeId::F64) => { + use vecsim::index::HnswSingle; + let index = HnswSingle::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswSingleF64Wrapper::new( + index, + VecSimType::VecSimType_FLOAT64, + )), + VecSimType::VecSimType_FLOAT64, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + false, + ))) + } + // HNSW Single - BFloat16 + (IndexTypeId::HnswSingle, DataTypeId::BFloat16) => { + use vecsim::index::HnswSingle; + use vecsim::types::BFloat16; + let index = HnswSingle::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswSingleBF16Wrapper::new( + index, + VecSimType::VecSimType_BFLOAT16, + )), + VecSimType::VecSimType_BFLOAT16, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + false, + ))) + } + // HNSW Single - Float16 + (IndexTypeId::HnswSingle, DataTypeId::Float16) => { + use vecsim::index::HnswSingle; + use vecsim::types::Float16; + let index = HnswSingle::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswSingleFP16Wrapper::new( + index, + VecSimType::VecSimType_FLOAT16, + )), + VecSimType::VecSimType_FLOAT16, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + false, + ))) + } + // HNSW Single - Int8 + (IndexTypeId::HnswSingle, DataTypeId::Int8) => { + use vecsim::index::HnswSingle; + use vecsim::types::Int8; + let index = HnswSingle::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswSingleI8Wrapper::new( + index, + VecSimType::VecSimType_INT8, + )), + VecSimType::VecSimType_INT8, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + false, + ))) + } + // HNSW Single - UInt8 + (IndexTypeId::HnswSingle, DataTypeId::UInt8) => { + use vecsim::index::HnswSingle; + use vecsim::types::UInt8; + let index = HnswSingle::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswSingleU8Wrapper::new( + index, + VecSimType::VecSimType_UINT8, + )), + VecSimType::VecSimType_UINT8, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + false, + ))) + } + // HNSW Multi - f32 (IndexTypeId::HnswMulti, DataTypeId::F32) => { use vecsim::index::HnswMulti; let index = HnswMulti::::load_from_file(path).ok()?; @@ -2287,6 +2387,95 @@ fn load_index_from_file(path: &std::path::Path) -> Option> { true, ))) } + // HNSW Multi - f64 + (IndexTypeId::HnswMulti, DataTypeId::F64) => { + use vecsim::index::HnswMulti; + let index = HnswMulti::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswMultiF64Wrapper::new( + index, + VecSimType::VecSimType_FLOAT64, + )), + VecSimType::VecSimType_FLOAT64, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + true, + ))) + } + // HNSW Multi - BFloat16 + (IndexTypeId::HnswMulti, DataTypeId::BFloat16) => { + use vecsim::index::HnswMulti; + use vecsim::types::BFloat16; + let index = HnswMulti::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswMultiBF16Wrapper::new( + index, + VecSimType::VecSimType_BFLOAT16, + )), + VecSimType::VecSimType_BFLOAT16, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + true, + ))) + } + // HNSW Multi - Float16 + (IndexTypeId::HnswMulti, DataTypeId::Float16) => { + use vecsim::index::HnswMulti; + use vecsim::types::Float16; + let index = HnswMulti::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswMultiFP16Wrapper::new( + index, + VecSimType::VecSimType_FLOAT16, + )), + VecSimType::VecSimType_FLOAT16, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + true, + ))) + } + // HNSW Multi - Int8 + (IndexTypeId::HnswMulti, DataTypeId::Int8) => { + use vecsim::index::HnswMulti; + use vecsim::types::Int8; + let index = HnswMulti::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswMultiI8Wrapper::new( + index, + VecSimType::VecSimType_INT8, + )), + VecSimType::VecSimType_INT8, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + true, + ))) + } + // HNSW Multi - UInt8 + (IndexTypeId::HnswMulti, DataTypeId::UInt8) => { + use vecsim::index::HnswMulti; + use vecsim::types::UInt8; + let index = HnswMulti::::load_from_file(path).ok()?; + let metric = convert_metric_to_c(header.metric); + Some(Box::new(IndexHandle::new( + Box::new(index::HnswMultiU8Wrapper::new( + index, + VecSimType::VecSimType_UINT8, + )), + VecSimType::VecSimType_UINT8, + VecSimAlgo::VecSimAlgo_HNSWLIB, + metric, + header.dimension, + true, + ))) + } // SVS Single (IndexTypeId::SvsSingle, DataTypeId::F32) => { use vecsim::index::SvsSingle; @@ -3368,11 +3557,11 @@ mod tests { #[test] fn test_tiered_unsupported_type_returns_null() { let mut params = test_tiered_params(); - params.base.type_ = VecSimType::VecSimType_FLOAT64; // Not supported + params.base.type_ = VecSimType::VecSimType_INT32; // Not supported unsafe { let index = VecSimIndex_NewTiered(¶ms); - assert!(index.is_null(), "Tiered should fail for f64"); + assert!(index.is_null(), "Tiered should fail for INT32"); } } @@ -3630,13 +3819,13 @@ mod tests { #[test] fn test_disk_unsupported_type_returns_null() { let dir = tempdir().unwrap(); - let path = dir.path().join("test_disk_f64.bin"); + let path = dir.path().join("test_disk_int32.bin"); let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); let params = DiskParams { base: VecSimParams { algo: VecSimAlgo::VecSimAlgo_BF, - type_: VecSimType::VecSimType_FLOAT64, // Not supported + type_: VecSimType::VecSimType_INT32, // Not supported dim: 4, metric: VecSimMetric::VecSimMetric_L2, multi: false, @@ -3653,7 +3842,7 @@ mod tests { unsafe { let index = VecSimIndex_NewDisk(¶ms); - assert!(index.is_null(), "Disk index should fail for f64"); + assert!(index.is_null(), "Disk index should fail for INT32"); } } diff --git a/rust/vecsim-c/src/params.rs b/rust/vecsim-c/src/params.rs index 317a286c7..30d23fecf 100644 --- a/rust/vecsim-c/src/params.rs +++ b/rust/vecsim-c/src/params.rs @@ -1,6 +1,6 @@ //! C-compatible parameter structs for index creation. -use crate::types::{VecSimAlgo, VecSimMetric, VecSimType}; +use crate::types::{VecSimAlgo, VecSimMetric, VecSimSvsQuantBits, VecSimType}; /// Common base parameters for all index types. #[repr(C)] @@ -102,6 +102,8 @@ pub struct SVSParams { pub searchWindowSize: usize, /// Enable two-pass construction for better recall (default: true). pub twoPassConstruction: bool, + /// Quantization mode for memory-efficient storage. + pub quantBits: VecSimSvsQuantBits, } impl Default for SVSParams { @@ -116,6 +118,7 @@ impl Default for SVSParams { constructionWindowSize: 200, searchWindowSize: 100, twoPassConstruction: true, + quantBits: VecSimSvsQuantBits::VecSimSvsQuant_NONE, } } } @@ -327,6 +330,23 @@ impl SVSParams { .with_search_l(self.searchWindowSize) .with_capacity(self.base.initialCapacity) .with_two_pass(self.twoPassConstruction) + .with_quantization(self.quantBits.to_rust_quantization()) + } +} + +impl VecSimSvsQuantBits { + /// Convert C FFI quantization enum to Rust quantization enum. + pub fn to_rust_quantization(&self) -> vecsim::index::SvsQuantization { + match self { + VecSimSvsQuantBits::VecSimSvsQuant_NONE => vecsim::index::SvsQuantization::None, + VecSimSvsQuantBits::VecSimSvsQuant_Scalar => vecsim::index::SvsQuantization::Scalar, + VecSimSvsQuantBits::VecSimSvsQuant_4 => vecsim::index::SvsQuantization::Lvq4, + VecSimSvsQuantBits::VecSimSvsQuant_8 => vecsim::index::SvsQuantization::Lvq8, + VecSimSvsQuantBits::VecSimSvsQuant_4x4 => vecsim::index::SvsQuantization::Lvq4x4, + VecSimSvsQuantBits::VecSimSvsQuant_4x8 => vecsim::index::SvsQuantization::Lvq4x8, + VecSimSvsQuantBits::VecSimSvsQuant_4x8_LeanVec => vecsim::index::SvsQuantization::LeanVec4x8, + VecSimSvsQuantBits::VecSimSvsQuant_8x8_LeanVec => vecsim::index::SvsQuantization::LeanVec8x8, + } } } diff --git a/rust/vecsim-c/src/query.rs b/rust/vecsim-c/src/query.rs index 22708dcab..4fba3a6a6 100644 --- a/rust/vecsim-c/src/query.rs +++ b/rust/vecsim-c/src/query.rs @@ -33,7 +33,7 @@ impl QueryReplyHandle { } } - pub fn get_iterator(&self) -> QueryReplyIteratorHandle { + pub fn get_iterator(&self) -> QueryReplyIteratorHandle<'_> { QueryReplyIteratorHandle::new(&self.reply.results) } diff --git a/rust/vecsim-c/src/types.rs b/rust/vecsim-c/src/types.rs index 54f19c7f9..7593184cc 100644 --- a/rust/vecsim-c/src/types.rs +++ b/rust/vecsim-c/src/types.rs @@ -181,6 +181,32 @@ pub enum VecSearchMode { RANGE_QUERY = 5, } +/// SVS quantization bits configuration. +/// +/// Matches C++ VecSimSvsQuantBits enum for compatibility. +/// The values are encoded as: primary_bits | (residual_bits << 8) | (is_leanvec << 16) +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum VecSimSvsQuantBits { + /// No quantization (full precision). + #[default] + VecSimSvsQuant_NONE = 0, + /// 8-bit scalar quantization (SQ8). + VecSimSvsQuant_Scalar = 1, + /// 4-bit LVQ quantization. + VecSimSvsQuant_4 = 4, + /// 8-bit LVQ quantization. + VecSimSvsQuant_8 = 8, + /// 4-bit primary + 4-bit residual LVQ. + VecSimSvsQuant_4x4 = 0x0404, // 4 | (4 << 8) + /// 4-bit primary + 8-bit residual LVQ. + VecSimSvsQuant_4x8 = 0x0804, // 4 | (8 << 8) + /// LeanVec with 4-bit primary + 8-bit residual. + VecSimSvsQuant_4x8_LeanVec = 0x010804, // 4 | (8 << 8) | (1 << 16) + /// LeanVec with 8-bit primary + 8-bit residual. + VecSimSvsQuant_8x8_LeanVec = 0x010808, // 8 | (8 << 8) | (1 << 16) +} + /// Timeout callback function type. /// Returns non-zero on timeout. pub type timeoutCallbackFunction = Option i32>; diff --git a/rust/vecsim/src/distance/simd/avx.rs b/rust/vecsim/src/distance/simd/avx.rs index 5c168db28..ed17a9305 100644 --- a/rust/vecsim/src/distance/simd/avx.rs +++ b/rust/vecsim/src/distance/simd/avx.rs @@ -8,7 +8,7 @@ #![cfg(target_arch = "x86_64")] -use crate::types::VectorElement; +use crate::types::{DistanceType, VectorElement}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/rust/vecsim/src/distance/simd/avx2.rs b/rust/vecsim/src/distance/simd/avx2.rs index 3f66550ea..0e4fc0c91 100644 --- a/rust/vecsim/src/distance/simd/avx2.rs +++ b/rust/vecsim/src/distance/simd/avx2.rs @@ -5,7 +5,7 @@ #![cfg(target_arch = "x86_64")] -use crate::types::VectorElement; +use crate::types::{DistanceType, VectorElement}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/rust/vecsim/src/distance/simd/avx512.rs b/rust/vecsim/src/distance/simd/avx512.rs index eeef9559b..d0d0dd8f4 100644 --- a/rust/vecsim/src/distance/simd/avx512.rs +++ b/rust/vecsim/src/distance/simd/avx512.rs @@ -5,7 +5,7 @@ #![cfg(target_arch = "x86_64")] -use crate::types::VectorElement; +use crate::types::{DistanceType, VectorElement}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/rust/vecsim/src/distance/simd/avx512bw.rs b/rust/vecsim/src/distance/simd/avx512bw.rs index b7a38fb5f..e48097aea 100644 --- a/rust/vecsim/src/distance/simd/avx512bw.rs +++ b/rust/vecsim/src/distance/simd/avx512bw.rs @@ -10,7 +10,7 @@ #![cfg(target_arch = "x86_64")] -use crate::types::{Int8, UInt8, VectorElement}; +use crate::types::{Int8, UInt8, DistanceType, VectorElement}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/rust/vecsim/src/distance/simd/sse.rs b/rust/vecsim/src/distance/simd/sse.rs index 850d5f841..0c6d60ad6 100644 --- a/rust/vecsim/src/distance/simd/sse.rs +++ b/rust/vecsim/src/distance/simd/sse.rs @@ -5,7 +5,7 @@ #![cfg(target_arch = "x86_64")] -use crate::types::VectorElement; +use crate::types::{DistanceType, VectorElement}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/rust/vecsim/src/distance/simd/sse4.rs b/rust/vecsim/src/distance/simd/sse4.rs index f5bef2c79..d8c2a9c09 100644 --- a/rust/vecsim/src/distance/simd/sse4.rs +++ b/rust/vecsim/src/distance/simd/sse4.rs @@ -7,7 +7,7 @@ #![cfg(target_arch = "x86_64")] -use crate::types::VectorElement; +use crate::types::{DistanceType, VectorElement}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/rust/vecsim/src/index/hnsw/concurrent_graph.rs b/rust/vecsim/src/index/hnsw/concurrent_graph.rs index 349ef45a2..ef28ddb5f 100644 --- a/rust/vecsim/src/index/hnsw/concurrent_graph.rs +++ b/rust/vecsim/src/index/hnsw/concurrent_graph.rs @@ -12,7 +12,16 @@ use super::ElementGraphData; use crate::types::IdType; use parking_lot::RwLock; use std::cell::UnsafeCell; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering}; + +/// Flags for element status (matches C++ Flags enum). +#[allow(dead_code)] +mod flags { + /// Element is logically deleted but still exists in the graph. + pub const DELETE_MARK: u8 = 0x1; + /// Element is being inserted into the graph. + pub const IN_PROCESS: u8 = 0x2; +} /// Default segment size (number of elements per segment). const SEGMENT_SIZE: usize = 4096; @@ -90,6 +99,10 @@ pub struct ConcurrentGraph { segment_size: usize, /// Total number of initialized elements (approximate, may be slightly stale). len: AtomicUsize, + /// Atomic flags for each element (DELETE_MARK, IN_PROCESS). + /// Stored separately for O(1) access without acquiring segment lock. + /// This matches the C++ idToMetaData approach. + element_flags: RwLock>, } impl ConcurrentGraph { @@ -102,10 +115,16 @@ impl ConcurrentGraph { .map(|_| GraphSegment::new(segment_size)) .collect(); + // Pre-allocate flags array for initial capacity + let flags: Vec = (0..initial_capacity) + .map(|_| AtomicU8::new(0)) + .collect(); + Self { segments: RwLock::new(segments), segment_size, len: AtomicUsize::new(0), + element_flags: RwLock::new(flags), } } @@ -197,6 +216,56 @@ impl ConcurrentGraph { } } + /// Ensure the flags array has capacity for the given ID. + fn ensure_flags_capacity(&self, id: IdType) { + let id_usize = id as usize; + + // Fast path - check with read lock + { + let flags = self.element_flags.read(); + if id_usize < flags.len() { + return; + } + } + + // Slow path - need to grow + let mut flags = self.element_flags.write(); + // Double-check after acquiring write lock + let needed = id_usize + 1; + let current_len = flags.len(); + if needed > current_len { + // Grow by at least one segment worth + let new_capacity = needed.max(current_len + self.segment_size); + let additional = new_capacity - current_len; + flags.reserve(additional); + for _ in 0..additional { + flags.push(AtomicU8::new(0)); + } + } + } + + /// Check if an element is marked as deleted (O(1) lock-free after initial read lock). + #[inline] + pub fn is_marked_deleted(&self, id: IdType) -> bool { + let id_usize = id as usize; + let flags = self.element_flags.read(); + if id_usize < flags.len() { + flags[id_usize].load(Ordering::Acquire) & flags::DELETE_MARK != 0 + } else { + false + } + } + + /// Mark an element as deleted atomically. + pub fn mark_deleted(&self, id: IdType) { + self.ensure_flags_capacity(id); + let flags = self.element_flags.read(); + let id_usize = id as usize; + if id_usize < flags.len() { + flags[id_usize].fetch_or(flags::DELETE_MARK, Ordering::Release); + } + } + /// Get the approximate number of elements. #[inline] pub fn len(&self) -> usize { @@ -260,9 +329,35 @@ impl ConcurrentGraph { } drop(segments); - // Set new elements + // Reset flags array (clear all flags) + { + let mut flags = self.element_flags.write(); + // Clear existing flags + for flag in flags.iter() { + flag.store(0, Ordering::Release); + } + // Resize if needed + let needed = new_elements.len(); + let current_len = flags.len(); + if needed > current_len { + let additional = needed - current_len; + flags.reserve(additional); + for _ in 0..additional { + flags.push(AtomicU8::new(0)); + } + } + } + + // Set new elements and update flags for deleted elements for (id, element) in new_elements.into_iter().enumerate() { if let Some(data) = element { + // If the element is marked as deleted in metadata, update flags + if data.meta.deleted { + let flags = self.element_flags.read(); + if id < flags.len() { + flags[id].fetch_or(flags::DELETE_MARK, Ordering::Release); + } + } self.set(id as IdType, data); } } diff --git a/rust/vecsim/src/index/hnsw/mod.rs b/rust/vecsim/src/index/hnsw/mod.rs index 0c2d594d8..9cf47ffd5 100644 --- a/rust/vecsim/src/index/hnsw/mod.rs +++ b/rust/vecsim/src/index/hnsw/mod.rs @@ -724,6 +724,10 @@ impl HnswCore { /// Mark an element as deleted (concurrent version). pub fn mark_deleted_concurrent(&self, id: IdType) { + // Update the flags array first (O(1) atomic operation for fast deleted checks) + self.graph.mark_deleted(id); + + // Also update the element's metadata for consistency if let Some(element) = self.graph.get(id) { // ElementMetaData.deleted is not atomic, but this is a best-effort // tombstone - reads may see stale state briefly, which is acceptable diff --git a/rust/vecsim/src/index/hnsw/search.rs b/rust/vecsim/src/index/hnsw/search.rs index 5bc28f228..0b905db78 100644 --- a/rust/vecsim/src/index/hnsw/search.rs +++ b/rust/vecsim/src/index/hnsw/search.rs @@ -19,6 +19,13 @@ use crate::utils::{MaxHeap, MinHeap}; pub trait GraphAccess { /// Get an element by ID. fn get(&self, id: IdType) -> Option<&ElementGraphData>; + + /// Check if an element is marked as deleted. + /// Default implementation uses get(), but can be overridden for efficiency. + #[inline] + fn is_deleted(&self, id: IdType) -> bool { + self.get(id).is_some_and(|e| e.meta.deleted) + } } /// Implementation for slice-based graphs (used in tests). @@ -43,6 +50,13 @@ impl GraphAccess for ConcurrentGraph { fn get(&self, id: IdType) -> Option<&ElementGraphData> { ConcurrentGraph::get(self, id) } + + /// O(1) deleted check using separate flags array. + /// This avoids acquiring a segment read lock for every neighbor check. + #[inline] + fn is_deleted(&self, id: IdType) -> bool { + self.is_marked_deleted(id) + } } /// Result of a layer search: (id, distance) pairs. @@ -236,10 +250,7 @@ where candidates.push(neighbor, dist); // Only add to results if not deleted and passes filter - let is_deleted = graph - .get(neighbor) - .is_some_and(|e| e.meta.deleted); - if !is_deleted { + if !graph.is_deleted(neighbor) { let passes = filter.is_none_or(|f| f(neighbor)); if passes { results.try_insert(neighbor, dist); @@ -251,12 +262,8 @@ where } } - // Convert results to vector - results - .into_sorted_vec() - .into_iter() - .map(|e| (e.id, e.distance)) - .collect() + // Convert results to vector (optimized to minimize allocations) + results.into_sorted_pairs() } use crate::types::LabelType; @@ -402,10 +409,7 @@ where } // Only update label tracking if not deleted - let is_deleted = graph - .get(neighbor) - .is_some_and(|e| e.meta.deleted); - if !is_deleted { + if !graph.is_deleted(neighbor) { if let Some(&label) = id_to_label.get(&neighbor) { let passes = filter.is_none_or(|f| f(label)); if passes { @@ -576,15 +580,10 @@ where candidates.push(neighbor, dist); // Only add to results if not deleted, within radius, and passes filter - if dist_f64 <= radius.to_f64() { - let is_deleted = graph - .get(neighbor) - .is_some_and(|e| e.meta.deleted); - if !is_deleted { - let passes = filter.is_none_or(|f| f(neighbor)); - if passes { - results.push((neighbor, dist)); - } + if dist_f64 <= radius.to_f64() && !graph.is_deleted(neighbor) { + let passes = filter.is_none_or(|f| f(neighbor)); + if passes { + results.push((neighbor, dist)); } } } diff --git a/rust/vecsim/src/index/hnsw/single.rs b/rust/vecsim/src/index/hnsw/single.rs index 1760f9c3a..d16bcd785 100644 --- a/rust/vecsim/src/index/hnsw/single.rs +++ b/rust/vecsim/src/index/hnsw/single.rs @@ -6,7 +6,7 @@ use super::{ElementGraphData, HnswCore, HnswParams}; use crate::index::traits::{BatchIterator, IndexError, IndexInfo, QueryError, VecSimIndex}; use crate::query::{QueryParams, QueryReply, QueryResult}; -use crate::types::{DistanceType, IdType, LabelType, VectorElement}; +use crate::types::{IdType, LabelType, VectorElement}; use dashmap::DashMap; /// Statistics about an HNSW index. diff --git a/rust/vecsim/src/index/hnsw/visited.rs b/rust/vecsim/src/index/hnsw/visited.rs index 80bc47e3c..dd569189b 100644 --- a/rust/vecsim/src/index/hnsw/visited.rs +++ b/rust/vecsim/src/index/hnsw/visited.rs @@ -123,7 +123,7 @@ impl VisitedNodesHandlerPool { } /// Get a handler from the pool, creating one if necessary. - pub fn get(&self) -> PooledHandler { + pub fn get(&self) -> PooledHandler<'_> { let cap = self.default_capacity.load(std::sync::atomic::Ordering::Acquire); let handler = self.handlers.lock().pop().unwrap_or_else(|| { VisitedNodesHandler::new(cap) diff --git a/rust/vecsim/src/index/mod.rs b/rust/vecsim/src/index/mod.rs index 78c3ccca4..51dffa3b3 100644 --- a/rust/vecsim/src/index/mod.rs +++ b/rust/vecsim/src/index/mod.rs @@ -41,7 +41,7 @@ pub use tiered::{ // Re-export SVS types pub use svs::{ - SvsParams, SvsSingle, SvsMulti, SvsStats, + SvsParams, SvsSingle, SvsMulti, SvsStats, SvsQuantization, }; // Re-export Tiered SVS types diff --git a/rust/vecsim/src/index/svs/mod.rs b/rust/vecsim/src/index/svs/mod.rs index b3280aa4b..a1c899aed 100644 --- a/rust/vecsim/src/index/svs/mod.rs +++ b/rust/vecsim/src/index/svs/mod.rs @@ -43,10 +43,12 @@ pub mod graph; pub mod search; pub mod single; pub mod multi; +pub mod sq8; pub use graph::{VamanaGraph, VamanaGraphData}; pub use single::{SvsSingle, SvsStats, SvsSingleBatchIterator}; pub use multi::{SvsMulti, SvsMultiBatchIterator}; +pub use sq8::SvsSq8Core; use crate::containers::DataBlocks; use crate::distance::{create_distance_function, DistanceFunction, Metric}; @@ -66,6 +68,32 @@ pub const DEFAULT_CONSTRUCTION_L: usize = 200; /// Default search window size. pub const DEFAULT_SEARCH_L: usize = 100; +/// Quantization mode for SVS index. +/// +/// Quantization reduces memory usage by storing compressed vectors. +/// The tradeoff is slightly reduced recall accuracy. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SvsQuantization { + /// No quantization - full precision vectors. + #[default] + None, + /// Scalar 8-bit quantization (SQ8). + /// Each dimension is quantized to 8-bit unsigned integer with per-vector scaling. + Scalar, + /// 4-bit LVQ quantization. + Lvq4, + /// 8-bit LVQ quantization. + Lvq8, + /// 4-bit primary + 4-bit residual LVQ (two-level). + Lvq4x4, + /// 4-bit primary + 8-bit residual LVQ (two-level). + Lvq4x8, + /// LeanVec with 4-bit primary + 8-bit residual (dimension reduction + quantization). + LeanVec4x8, + /// LeanVec with 8-bit primary + 8-bit residual (dimension reduction + quantization). + LeanVec8x8, +} + /// Parameters for creating an SVS (Vamana) index. #[derive(Debug, Clone)] pub struct SvsParams { @@ -85,6 +113,8 @@ pub struct SvsParams { pub initial_capacity: usize, /// Use two-pass construction (recommended for better recall). pub two_pass_construction: bool, + /// Quantization mode for memory-efficient storage. + pub quantization: SvsQuantization, } impl SvsParams { @@ -99,6 +129,7 @@ impl SvsParams { search_window_size: DEFAULT_SEARCH_L, initial_capacity: 1024, two_pass_construction: true, + quantization: SvsQuantization::None, } } @@ -132,6 +163,12 @@ impl SvsParams { self } + /// Set quantization mode. + pub fn with_quantization(mut self, quantization: SvsQuantization) -> Self { + self.quantization = quantization; + self + } + /// Enable/disable two-pass construction. pub fn with_two_pass(mut self, enable: bool) -> Self { self.two_pass_construction = enable; diff --git a/rust/vecsim/src/index/svs/search.rs b/rust/vecsim/src/index/svs/search.rs index 1b0da3b63..1aa6e4be8 100644 --- a/rust/vecsim/src/index/svs/search.rs +++ b/rust/vecsim/src/index/svs/search.rs @@ -249,6 +249,134 @@ fn select_closest(candidates: &[(IdType, D)], max_degree: usize sorted.into_iter().take(max_degree).map(|(id, _)| id).collect() } +// ============================================================================ +// SQ8 Asymmetric Distance Search +// ============================================================================ + +use crate::containers::Sq8DataBlocks; +use crate::distance::Metric; +use crate::quantization::sq8::{sq8_asymmetric_cosine, sq8_asymmetric_inner_product, sq8_asymmetric_l2_squared}; + +/// Result of an SQ8 search with f32 distances. +pub struct Sq8SearchResult { + pub results: Vec, +} + +/// Single SQ8 search result entry. +pub struct Sq8SearchResultEntry { + pub id: IdType, + pub distance: f32, +} + +/// Greedy beam search using SQ8 asymmetric distance. +/// +/// Query vector is in f32, stored vectors are in SQ8 format. +#[allow(clippy::too_many_arguments)] +pub fn greedy_beam_search_sq8

( + entry_point: IdType, + query: &[f32], + beam_width: usize, + graph: &VamanaGraph, + data: &Sq8DataBlocks, + metric: Metric, + dim: usize, + visited: &VisitedNodesHandler, + filter: Option<&P>, +) -> Sq8SearchResult +where + P: Fn(IdType) -> bool + ?Sized, +{ + // Compute asymmetric distance + let compute_dist = |id: IdType| -> Option { + let (quantized, meta) = data.get(id)?; + Some(match metric { + Metric::L2 => sq8_asymmetric_l2_squared(query, quantized, meta, dim), + Metric::InnerProduct => sq8_asymmetric_inner_product(query, quantized, meta, dim), + Metric::Cosine => sq8_asymmetric_cosine(query, quantized, meta, dim), + }) + }; + + // Candidates to explore (min-heap: closest first) + let mut candidates = MinHeap::::with_capacity(beam_width * 2); + + // Results (max-heap: keeps L closest, largest at top) + let mut results = MaxHeap::::new(beam_width); + + // Initialize with entry point + visited.visit(entry_point); + + if let Some(dist) = compute_dist(entry_point) { + candidates.push(entry_point, dist); + + if !graph.is_deleted(entry_point) { + let passes = filter.is_none_or(|f| f(entry_point)); + if passes { + results.insert(entry_point, dist); + } + } + } + + // Explore candidates + while let Some(candidate) = candidates.pop() { + // Check if we can stop early + if results.is_full() { + if let Some(worst_dist) = results.top_distance() { + if (candidate.distance as f64) > worst_dist.to_f64() { + break; + } + } + } + + // Skip deleted nodes + if graph.is_deleted(candidate.id) { + continue; + } + + // Explore neighbors + for neighbor in graph.get_neighbors(candidate.id) { + if visited.visit(neighbor) { + continue; // Already visited + } + + // Skip deleted neighbors + if graph.is_deleted(neighbor) { + continue; + } + + // Compute distance + if let Some(dist) = compute_dist(neighbor) { + // Check filter + let passes = filter.is_none_or(|f| f(neighbor)); + + // Add to results if it passes filter and is close enough + if passes + && (!results.is_full() + || (dist as f64) < results.top_distance().unwrap().to_f64()) + { + results.try_insert(neighbor, dist); + } + + // Add to candidates for exploration if close enough + if !results.is_full() || (dist as f64) < results.top_distance().unwrap().to_f64() { + candidates.push(neighbor, dist); + } + } + } + } + + // Convert to sorted vector + Sq8SearchResult { + results: results + .into_sorted_vec() + .into_iter() + .map(|e| Sq8SearchResultEntry { + id: e.id, + distance: e.distance, + }) + .collect(), + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/vecsim/src/index/svs/single.rs b/rust/vecsim/src/index/svs/single.rs index 1ffef3d91..c4f58536a 100644 --- a/rust/vecsim/src/index/svs/single.rs +++ b/rust/vecsim/src/index/svs/single.rs @@ -646,6 +646,7 @@ impl SvsSingle { search_window_size, initial_capacity: header.count.max(1024), two_pass_construction, + quantization: super::SvsQuantization::None, // Loaded indices use no quantization }; // Create the index diff --git a/rust/vecsim/src/index/svs/sq8.rs b/rust/vecsim/src/index/svs/sq8.rs new file mode 100644 index 000000000..bf26332a6 --- /dev/null +++ b/rust/vecsim/src/index/svs/sq8.rs @@ -0,0 +1,278 @@ +//! SQ8-quantized SVS (Vamana) index implementation. +//! +//! This module provides SVS indices that use scalar 8-bit quantization for +//! memory-efficient vector storage. Vectors are stored as u8 with per-vector +//! metadata for dequantization and asymmetric distance computation. +//! +//! ## Memory Savings +//! +//! SQ8 quantization reduces memory usage by ~4x for f32 vectors: +//! - Original: 4 bytes per dimension +//! - Quantized: 1 byte per dimension + 16 bytes metadata per vector +//! +//! ## Asymmetric Distance +//! +//! During search, the query vector stays in f32 while stored vectors are in SQ8. +//! This provides better accuracy than symmetric quantization. + +use super::{SvsParams, VamanaGraph}; +use crate::containers::Sq8DataBlocks; +use crate::distance::Metric; +use crate::index::hnsw::VisitedNodesHandlerPool; +use crate::quantization::sq8::{sq8_asymmetric_cosine, sq8_asymmetric_inner_product, sq8_asymmetric_l2_squared}; +use crate::quantization::Sq8VectorMeta; +use crate::types::{IdType, INVALID_ID}; +use std::sync::atomic::AtomicU32; + +/// Core SQ8-quantized SVS implementation. +pub struct SvsSq8Core { + /// Quantized vector storage. + pub data: Sq8DataBlocks, + /// Graph structure. + pub graph: VamanaGraph, + /// Distance metric. + pub metric: Metric, + /// Medoid (entry point). + pub medoid: AtomicU32, + /// Pool of visited handlers for concurrent searches. + pub visited_pool: VisitedNodesHandlerPool, + /// Parameters. + pub params: SvsParams, +} + +impl SvsSq8Core { + /// Create a new SQ8-quantized SVS core. + pub fn new(params: SvsParams) -> Self { + let data = Sq8DataBlocks::new(params.dim, params.initial_capacity); + let graph = VamanaGraph::new(params.initial_capacity, params.graph_max_degree); + let visited_pool = VisitedNodesHandlerPool::new(params.initial_capacity); + + Self { + data, + graph, + metric: params.metric, + medoid: AtomicU32::new(INVALID_ID), + visited_pool, + params, + } + } + + /// Add a vector (will be quantized) and return its internal ID. + pub fn add_vector(&mut self, vector: &[f32]) -> Option { + self.data.add(vector) + } + + /// Get quantized data and metadata by ID. + #[inline] + pub fn get_quantized(&self, id: IdType) -> Option<(&[u8], &Sq8VectorMeta)> { + self.data.get(id) + } + + /// Decode a vector back to f32. + pub fn decode_vector(&self, id: IdType) -> Option> { + self.data.decode(id) + } + + /// Compute asymmetric distance between f32 query and quantized stored vector. + #[inline] + pub fn asymmetric_distance(&self, query: &[f32], quantized: &[u8], meta: &Sq8VectorMeta) -> f32 { + match self.metric { + Metric::L2 => sq8_asymmetric_l2_squared(query, quantized, meta, self.params.dim), + Metric::InnerProduct => sq8_asymmetric_inner_product(query, quantized, meta, self.params.dim), + Metric::Cosine => sq8_asymmetric_cosine(query, quantized, meta, self.params.dim), + } + } + + /// Compute distance between query and stored vector by ID. + #[inline] + pub fn distance_to(&self, query: &[f32], id: IdType) -> Option { + let (quantized, meta) = self.get_quantized(id)?; + Some(self.asymmetric_distance(query, quantized, meta)) + } + + /// Find the medoid (approximate centroid) of all vectors. + pub fn find_medoid(&self) -> Option { + let ids: Vec = self.data.iter_ids().collect(); + if ids.is_empty() { + return None; + } + if ids.len() == 1 { + return Some(ids[0]); + } + + // Sample at most 1000 vectors for medoid computation + let sample_size = ids.len().min(1000); + let sample: Vec = if ids.len() <= sample_size { + ids.clone() + } else { + use rand::seq::SliceRandom; + let mut rng = rand::thread_rng(); + let mut shuffled = ids.clone(); + shuffled.shuffle(&mut rng); + shuffled.into_iter().take(sample_size).collect() + }; + + // Find vector with minimum total distance + // For SQ8, we decode to f32 for accurate medoid computation + let mut best_id = sample[0]; + let mut best_total_dist = f64::MAX; + + for &candidate in &sample { + if let Some(candidate_data) = self.decode_vector(candidate) { + let total_dist: f64 = sample + .iter() + .filter(|&&id| id != candidate) + .filter_map(|&id| self.distance_to(&candidate_data, id)) + .map(|d| d as f64) + .sum(); + + if total_dist < best_total_dist { + best_total_dist = total_dist; + best_id = candidate; + } + } + } + + Some(best_id) + } + + /// Get the dimension. + #[inline] + pub fn dim(&self) -> usize { + self.params.dim + } + + /// Get the number of vectors. + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + /// Check if empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Mark a slot as deleted. + pub fn mark_deleted(&mut self, id: IdType) -> bool { + self.data.mark_deleted(id) + } + + /// Update a vector at the given ID. + pub fn update_vector(&mut self, id: IdType, vector: &[f32]) -> bool { + self.data.update(id, vector) + } + + /// Get the search window size. + pub fn search_window_size(&self) -> usize { + self.params.search_window_size + } + + /// Get the medoid ID. + pub fn get_medoid(&self) -> IdType { + self.medoid.load(std::sync::atomic::Ordering::Acquire) + } + + /// Set the medoid ID. + pub fn set_medoid(&self, id: IdType) { + self.medoid.store(id, std::sync::atomic::Ordering::Release) + } +} + +use super::search::greedy_beam_search_sq8; +use crate::query::QueryResult; +use crate::types::LabelType; +use parking_lot::RwLock; +use std::collections::HashMap; +use std::sync::atomic::AtomicUsize; + +/// Single-value SQ8-quantized SVS index. +/// +/// Each label has exactly one associated vector, stored in SQ8 format. +pub struct SvsSq8Single { + /// Core SQ8-quantized SVS implementation. + core: RwLock, + /// Label to internal ID mapping. + label_to_id: RwLock>, + /// Internal ID to label mapping. + id_to_label: RwLock>, + /// Number of vectors. + count: AtomicUsize, + /// Maximum capacity (if set). + capacity: Option, + /// Construction completed flag. + construction_done: RwLock, +} + +impl SvsSq8Single { + /// Create a new SQ8-quantized SVS index. + pub fn new(params: SvsParams) -> Self { + let initial_capacity = params.initial_capacity; + Self { + core: RwLock::new(SvsSq8Core::new(params)), + label_to_id: RwLock::new(HashMap::with_capacity(initial_capacity)), + id_to_label: RwLock::new(HashMap::with_capacity(initial_capacity)), + count: AtomicUsize::new(0), + capacity: None, + construction_done: RwLock::new(false), + } + } + + /// Get the distance metric. + pub fn metric(&self) -> Metric { + self.core.read().metric + } + + /// Get the dimension. + pub fn dim(&self) -> usize { + self.core.read().params.dim + } + + /// Get the search window size. + pub fn search_l(&self) -> usize { + self.core.read().search_window_size() + } + + /// Search for the k nearest neighbors using SQ8 asymmetric distance. + fn search_knn(&self, query: &[f32], k: usize, ef: usize) -> Vec> { + let core = self.core.read(); + + let medoid = core.get_medoid(); + if medoid == INVALID_ID || core.is_empty() { + return vec![]; + } + + let visited = core.visited_pool.get(); + + // Perform greedy beam search with SQ8 asymmetric distance + let search_result = greedy_beam_search_sq8( + medoid, + query, + ef.max(k), + &core.graph, + &core.data, + core.metric, + core.params.dim, + &visited, + None::<&fn(IdType) -> bool>, + ); + + // Map internal IDs to labels + let id_to_label = self.id_to_label.read(); + let mut results: Vec> = search_result + .results + .into_iter() + .filter_map(|r| { + id_to_label.get(&r.id).map(|&label| QueryResult { + label, + distance: r.distance, + }) + }) + .collect(); + + results.truncate(k); + results + } +} + diff --git a/rust/vecsim/src/utils/heap.rs b/rust/vecsim/src/utils/heap.rs index 148bca482..71dcac12b 100644 --- a/rust/vecsim/src/utils/heap.rs +++ b/rust/vecsim/src/utils/heap.rs @@ -139,10 +139,14 @@ impl MaxHeap { if self.heap.len() < self.capacity { self.heap.push(MaxHeapEntry(HeapEntry::new(id, distance))); true - } else if let Some(top) = self.heap.peek() { + } else if let Some(mut top) = self.heap.peek_mut() { if distance < top.0.distance { - self.heap.pop(); - self.heap.push(MaxHeapEntry(HeapEntry::new(id, distance))); + // Replace in-place using PeekMut - avoids separate pop+push operations + // PeekMut::pop() removes the top element efficiently, then we push the new one + // This is more efficient than pop() + push() because it avoids redundant sift operations + *top = MaxHeapEntry(HeapEntry::new(id, distance)); + // Drop the PeekMut to trigger sift_down + drop(top); true } else { false @@ -169,7 +173,10 @@ impl MaxHeap { /// Convert to a sorted vector (smallest distance first). pub fn into_sorted_vec(self) -> Vec> { - let mut entries: Vec<_> = self.heap.into_iter().map(|e| e.0).collect(); + // Pre-allocate with exact capacity to avoid reallocations + let len = self.heap.len(); + let mut entries = Vec::with_capacity(len); + entries.extend(self.heap.into_iter().map(|e| e.0)); entries.sort_by(|a, b| { a.distance .partial_cmp(&b.distance) @@ -178,6 +185,25 @@ impl MaxHeap { entries } + /// Convert to a sorted vector of (id, distance) pairs. + /// This is optimized to minimize allocations by reusing the heap's buffer. + #[inline] + pub fn into_sorted_pairs(self) -> Vec<(IdType, D)> { + // Use into_sorted_iter from BinaryHeap which pops in sorted order + let len = self.heap.len(); + let mut result = Vec::with_capacity(len); + // BinaryHeap::into_sorted_vec() uses into_iter + sort, we do the same + // but map directly to the output format + let mut entries: Vec<_> = self.heap.into_vec(); + entries.sort_by(|a, b| { + a.0.distance + .partial_cmp(&b.0.distance) + .unwrap_or(Ordering::Equal) + }); + result.extend(entries.into_iter().map(|e| (e.0.id, e.0.distance))); + result + } + /// Convert to a vector (unordered). pub fn into_vec(self) -> Vec> { self.heap.into_iter().map(|e| e.0).collect() diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h index f9d94dc52..a4d5e08e4 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h +++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h @@ -94,6 +94,11 @@ class TieredHNSWIndex : public VecSimTieredIndex { // associated swap jobs. std::mutex idToRepairJobsGuard; + // Counter for vectors inserted directly into HNSW by the main thread (bypassing flat buffer). + // This happens in WriteInPlace mode or when the flat buffer is full. + // Not atomic since it's only accessed from the main thread. + size_t directHNSWInsertions{0}; + void executeInsertJob(HNSWInsertJob *job); void executeRepairJob(HNSWRepairJob *job); @@ -211,6 +216,7 @@ class TieredHNSWIndex : public VecSimTieredIndex { // needed. VecSimIndexDebugInfo debugInfo() const override; VecSimIndexBasicInfo basicInfo() const override; + VecSimIndexStatsInfo statisticInfo() const override; VecSimDebugInfoIterator *debugInfoIterator() const override; VecSimBatchIterator *newBatchIterator(const void *queryBlob, VecSimQueryParams *queryParams) const override { @@ -729,6 +735,8 @@ int TieredHNSWIndex::addVector(const void *blob, labelType l this->lockMainIndexGuard(); hnsw_index->addVector(storage_blob.get(), label); this->unlockMainIndexGuard(); + // Track direct insertion to HNSW (bypassing flat buffer) + ++this->directHNSWInsertions; return ret; } if (this->frontendIndex->indexSize() >= this->flatBufferLimit) { @@ -746,6 +754,8 @@ int TieredHNSWIndex::addVector(const void *blob, labelType l // index. auto storage_blob = this->frontendIndex->preprocessForStorage(blob); this->insertVectorToHNSW(hnsw_index, label, storage_blob.get()); + // Track direct insertion to HNSW (flat buffer was full) + ++this->directHNSWInsertions; return ret; } // Otherwise, we fall back to the "regular" insertion into the flat buffer @@ -1151,6 +1161,13 @@ void TieredHNSWIndex::TieredHNSW_BatchIterator::filter_irrel results.resize(cur_end - results.begin()); } +template +VecSimIndexStatsInfo TieredHNSWIndex::statisticInfo() const { + auto stats = VecSimTieredIndex::statisticInfo(); + stats.directHNSWInsertions = this->directHNSWInsertions; + return stats; +} + template VecSimIndexDebugInfo TieredHNSWIndex::debugInfo() const { auto info = VecSimTieredIndex::debugInfo(); diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h index fa136b7fe..9f6544d1b 100644 --- a/src/VecSim/vec_sim_common.h +++ b/src/VecSim/vec_sim_common.h @@ -250,6 +250,7 @@ typedef struct { void *storage; // Opaque pointer to disk storage const char *indexName; size_t indexNameLen; + bool rerank; // Whether to enable reranking for disk-based HNSW } VecSimDiskContext; typedef struct { @@ -342,6 +343,9 @@ typedef struct { size_t memory; size_t numberOfMarkedDeleted; // The number of vectors that are marked as deleted (HNSW/tiered // only). + size_t directHNSWInsertions; // Count of vectors inserted directly into HNSW by main thread + // (bypassing flat buffer). Tiered HNSW only. + size_t flatBufferSize; // Current flat buffer size. Tiered indexes only. } VecSimIndexStatsInfo; typedef struct { diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h index 5cd18d38d..88eabea69 100644 --- a/src/VecSim/vec_sim_index.h +++ b/src/VecSim/vec_sim_index.h @@ -111,6 +111,8 @@ struct VecSimIndexAbstract : public VecSimIndexInterface { return info; } + IndexCalculatorInterface *getIndexCalculator() const { return indexCalculator; } + public: /** * @brief Construct a new Vec Sim Index object @@ -196,6 +198,8 @@ struct VecSimIndexAbstract : public VecSimIndexInterface { return VecSimIndexStatsInfo{ .memory = this->getAllocationSize(), .numberOfMarkedDeleted = 0, + .directHNSWInsertions = 0, + .flatBufferSize = 0, }; } diff --git a/src/VecSim/vec_sim_tiered_index.h b/src/VecSim/vec_sim_tiered_index.h index 0a36b1f86..61294b90b 100644 --- a/src/VecSim/vec_sim_tiered_index.h +++ b/src/VecSim/vec_sim_tiered_index.h @@ -320,6 +320,8 @@ VecSimIndexStatsInfo VecSimTieredIndex::statisticInfo() cons auto stats = VecSimIndexStatsInfo{ .memory = this->getAllocationSize(), .numberOfMarkedDeleted = this->getNumMarkedDeleted(), + .directHNSWInsertions = 0, // Base tiered index returns 0; TieredHNSWIndex overrides + .flatBufferSize = this->frontendIndex->indexSize(), }; return stats; diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt index 859f2c0af..c98aa7c30 100644 --- a/tests/benchmark/CMakeLists.txt +++ b/tests/benchmark/CMakeLists.txt @@ -39,6 +39,10 @@ endif() # Spaces benchmarks # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# Simple HNSW benchmark (self-contained, no external data needed) +add_executable(simple_hnsw_bench simple_hnsw_bench.cpp) +target_link_libraries(simple_hnsw_bench VectorSimilarity) + set(DATA_TYPE fp32 fp64 bf16 fp16 int8 uint8 sq8_fp32 sq8_sq8) foreach(data_type IN LISTS DATA_TYPE) add_executable(bm_spaces_${data_type} spaces_benchmarks/bm_spaces_${data_type}.cpp) diff --git a/tests/benchmark/simple_hnsw_bench.cpp b/tests/benchmark/simple_hnsw_bench.cpp new file mode 100644 index 000000000..5f6af44a9 --- /dev/null +++ b/tests/benchmark/simple_hnsw_bench.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2006-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Simple HNSW benchmark that creates an in-memory index for comparison with Rust. + * This benchmark is self-contained and doesn't require external data files. + * + * Usage: ./simple_hnsw_bench + */ +#include +#include +#include +#include +#include "VecSim/vec_sim.h" +#include "VecSim/algorithms/hnsw/hnsw.h" +#include "VecSim/index_factories/hnsw_factory.h" + +constexpr size_t DIM = 128; +constexpr size_t N_VECTORS = 10000; +constexpr size_t M = 16; +constexpr size_t EF_CONSTRUCTION = 100; +constexpr size_t EF_RUNTIME = 100; +constexpr size_t N_QUERIES = 1000; + +std::vector generate_random_vector(size_t dim, std::mt19937 &gen) { + std::uniform_real_distribution dist(0.0f, 1.0f); + std::vector vec(dim); + for (size_t i = 0; i < dim; ++i) { + vec[i] = dist(gen); + } + return vec; +} + +int main() { + std::mt19937 gen(42); // Fixed seed for reproducibility + + std::cout << "=== C++ HNSW Benchmark ===" << std::endl; + std::cout << "Config: " << N_VECTORS << " vectors, " << DIM << " dimensions, M=" << M + << ", ef_construction=" << EF_CONSTRUCTION << ", ef_runtime=" << EF_RUNTIME + << std::endl; + std::cout << std::endl; + + // Create HNSW parameters + HNSWParams params = {.dim = DIM, + .metric = VecSimMetric_L2, + .type = VecSimType_FLOAT32, + .M = M, + .efConstruction = EF_CONSTRUCTION, + .efRuntime = EF_RUNTIME}; + + // Create index + VecSimIndex *index = HNSWFactory::NewIndex(¶ms); + + // Generate and insert vectors + std::cout << "Inserting " << N_VECTORS << " vectors..." << std::endl; + auto insert_start = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < N_VECTORS; ++i) { + auto vec = generate_random_vector(DIM, gen); + VecSimIndex_AddVector(index, vec.data(), i); + } + + auto insert_end = std::chrono::high_resolution_clock::now(); + auto insert_duration = + std::chrono::duration_cast(insert_end - insert_start); + double insert_throughput = N_VECTORS * 1000.0 / insert_duration.count(); + + std::cout << "Insertion time: " << insert_duration.count() << " ms (" << insert_throughput + << " vec/s)" << std::endl; + std::cout << std::endl; + + // Generate query vectors + std::vector> queries; + for (size_t i = 0; i < N_QUERIES; ++i) { + queries.push_back(generate_random_vector(DIM, gen)); + } + + // KNN Search benchmark + std::cout << "Running " << N_QUERIES << " KNN queries (k=10)..." << std::endl; + + auto knn_start = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < N_QUERIES; ++i) { + VecSimQueryReply *results = + VecSimIndex_TopKQuery(index, queries[i].data(), 10, nullptr, BY_SCORE); + VecSimQueryReply_Free(results); + } + + auto knn_end = std::chrono::high_resolution_clock::now(); + auto knn_duration = std::chrono::duration_cast(knn_end - knn_start); + double avg_knn_time = static_cast(knn_duration.count()) / N_QUERIES; + double knn_throughput = N_QUERIES * 1000000.0 / knn_duration.count(); + + std::cout << "KNN k=10 (ef=" << EF_RUNTIME << "): avg " << avg_knn_time << " µs (" + << knn_throughput << " queries/s)" << std::endl; + std::cout << std::endl; + + // Range Search benchmark + std::cout << "Running " << N_QUERIES << " Range queries (radius=10.0)..." << std::endl; + + auto range_start = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < N_QUERIES; ++i) { + VecSimQueryReply *results = + VecSimIndex_RangeQuery(index, queries[i].data(), 10.0, nullptr, BY_SCORE); + VecSimQueryReply_Free(results); + } + + auto range_end = std::chrono::high_resolution_clock::now(); + auto range_duration = + std::chrono::duration_cast(range_end - range_start); + double avg_range_time = static_cast(range_duration.count()) / N_QUERIES; + double range_throughput = N_QUERIES * 1000000.0 / range_duration.count(); + + std::cout << "Range (r=10): avg " << avg_range_time << " µs (" << range_throughput + << " queries/s)" << std::endl; + std::cout << std::endl; + + // Cleanup + VecSimIndex_Free(index); + + std::cout << "=== Summary ===" << std::endl; + std::cout << "Insertion: " << insert_throughput << " vec/s" << std::endl; + std::cout << "KNN (k=10): " << avg_knn_time << " µs (" << knn_throughput << " q/s)" + << std::endl; + std::cout << "Range (r=10): " << avg_range_time << " µs (" << range_throughput << " q/s)" + << std::endl; + + return 0; +} diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index 04b1f18b9..ffcac02ea 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -2761,6 +2761,9 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) { EXPECT_EQ(info.tieredInfo.backgroundIndexing, false); EXPECT_EQ(info.tieredInfo.bufferLimit, 1000); EXPECT_EQ(info.tieredInfo.specificTieredBackendInfo.hnswTieredInfo.pendingSwapJobsThreshold, 1); + // Verify new tiered-specific stats + EXPECT_EQ(stats.flatBufferSize, 0); + EXPECT_EQ(stats.directHNSWInsertions, 0); // Validate that Static info returns the right restricted info as well. VecSimIndexBasicInfo s_info = VecSimIndex_BasicInfo(tiered_index); @@ -2787,6 +2790,9 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) { info.tieredInfo.frontendCommonInfo.memory); EXPECT_EQ(info.commonInfo.memory, stats.memory); EXPECT_EQ(info.tieredInfo.backgroundIndexing, true); + // Vector is in flat buffer, no direct insertions yet + EXPECT_EQ(stats.flatBufferSize, 1); + EXPECT_EQ(stats.directHNSWInsertions, 0); mock_thread_pool.thread_iteration(); info = tiered_index->debugInfo(); @@ -2803,6 +2809,9 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) { info.tieredInfo.frontendCommonInfo.memory); EXPECT_EQ(info.commonInfo.memory, stats.memory); EXPECT_EQ(info.tieredInfo.backgroundIndexing, false); + // Vector moved from flat buffer to HNSW by background thread + EXPECT_EQ(stats.flatBufferSize, 0); + EXPECT_EQ(stats.directHNSWInsertions, 0); if (TypeParam::isMulti()) { GenerateAndAddVector(tiered_index, dim, 1, 1); @@ -2839,6 +2848,74 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) { EXPECT_EQ(info.tieredInfo.backgroundIndexing, false); } +TYPED_TEST(HNSWTieredIndexTest, testDirectHNSWInsertionsStats) { + // Test that directHNSWInsertions counter is incremented when flat buffer is full. + size_t dim = 4; + size_t buffer_limit = 5; + HNSWParams params = {.type = TypeParam::get_index_type(), + .dim = dim, + .metric = VecSimMetric_L2, + .multi = TypeParam::isMulti()}; + VecSimParams hnsw_params = CreateParams(params); + auto mock_thread_pool = tieredIndexMock(); + + // Create index with small buffer limit + auto *tiered_index = + this->CreateTieredHNSWIndex(hnsw_params, mock_thread_pool, 1, buffer_limit); + + // Fill the flat buffer + for (size_t i = 0; i < buffer_limit; i++) { + GenerateAndAddVector(tiered_index, dim, i, i); + } + + VecSimIndexStatsInfo stats = tiered_index->statisticInfo(); + EXPECT_EQ(stats.flatBufferSize, buffer_limit); + EXPECT_EQ(stats.directHNSWInsertions, 0); + + // Add more vectors - these should go directly to HNSW + size_t extra_vectors = 3; + for (size_t i = buffer_limit; i < buffer_limit + extra_vectors; i++) { + GenerateAndAddVector(tiered_index, dim, i, i); + } + + stats = tiered_index->statisticInfo(); + EXPECT_EQ(stats.flatBufferSize, buffer_limit); + EXPECT_EQ(stats.directHNSWInsertions, extra_vectors); + + // Drain the flat buffer by starting threads and waiting for them to finish + mock_thread_pool.init_threads(); + mock_thread_pool.thread_pool_join(); + + stats = tiered_index->statisticInfo(); + EXPECT_EQ(stats.flatBufferSize, 0); + // Direct insertions counter should be preserved + EXPECT_EQ(stats.directHNSWInsertions, extra_vectors); + + // Test write-in-place mode: vectors should go directly to HNSW even when buffer is not full + VecSim_SetWriteMode(VecSim_WriteInPlace); + + size_t write_in_place_vectors = 4; + size_t label_offset = buffer_limit + extra_vectors; + for (size_t i = 0; i < write_in_place_vectors; i++) { + GenerateAndAddVector(tiered_index, dim, label_offset + i, label_offset + i); + } + + stats = tiered_index->statisticInfo(); + // Flat buffer should still be empty (vectors went directly to HNSW) + EXPECT_EQ(stats.flatBufferSize, 0); + // Direct insertions counter should include the write-in-place vectors + EXPECT_EQ(stats.directHNSWInsertions, extra_vectors + write_in_place_vectors); + + // Verify all vectors are in the backend index + VecSimIndexDebugInfo info = tiered_index->debugInfo(); + EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, + buffer_limit + extra_vectors + write_in_place_vectors); + EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0); + + // Reset to async mode + VecSim_SetWriteMode(VecSim_WriteAsync); +} + TYPED_TEST(HNSWTieredIndexTest, testInfoIterator) { // Create TieredHNSW index instance with a mock queue. size_t dim = 4;