diff --git a/.github/workflows/arm.yml b/.github/workflows/arm.yml
index 2e63e6a93..e2aa8c317 100644
--- a/.github/workflows/arm.yml
+++ b/.github/workflows/arm.yml
@@ -18,7 +18,7 @@ jobs:
           aws-region: ${{ secrets.AWS_REGION }}
       - name: Start EC2 runner
         id: start-ec2-runner
-        uses: machulav/ec2-github-runner@v2.4.1
+        uses: machulav/ec2-github-runner@v2.4.2
         with:
           mode: start
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
@@ -50,7 +50,7 @@ jobs:
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ secrets.AWS_REGION }}
       - name: Stop EC2 runner
-        uses: machulav/ec2-github-runner@v2.4.1
+        uses: machulav/ec2-github-runner@v2.4.2
         with:
           mode: stop
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
diff --git a/.github/workflows/benchmark-runner.yml b/.github/workflows/benchmark-runner.yml
index 50f5d785c..534d9f674 100644
--- a/.github/workflows/benchmark-runner.yml
+++ b/.github/workflows/benchmark-runner.yml
@@ -31,7 +31,7 @@ jobs:
           aws-region: ${{ secrets.AWS_REGION_BENCHMARK }}
       - name: Start EC2 runner
         id: start-ec2-runner
-        uses: machulav/ec2-github-runner@v2
+        uses: machulav/ec2-github-runner@v2.4.2
         with:
           mode: start
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
@@ -102,10 +102,9 @@ jobs:
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ secrets.AWS_REGION_BENCHMARK }}
       - name: Stop EC2 runner
-        uses: machulav/ec2-github-runner@v2
+        uses: machulav/ec2-github-runner@v2.4.2
         with:
           mode: stop
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
           label: ${{ needs.start-runner.outputs.runner_label }}
           ec2-instance-id: ${{ needs.start-runner.outputs.ec2_instance_id }}
-
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 2ef2bff72..1da4a084a 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -16,7 +16,7 @@ jobs:
           aws-region: ${{ secrets.AWS_REGION_BENCHMARK }}
       - name: Start EC2 runner
         id: start-ec2-runner
-        uses: machulav/ec2-github-runner@v2
+        uses: machulav/ec2-github-runner@v2.4.2
         with:
           mode: start
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
@@ -103,7 +103,7 @@ jobs:
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ secrets.AWS_REGION_BENCHMARK }}
       - name: Stop EC2 runner
-        uses: machulav/ec2-github-runner@v2
+        uses: machulav/ec2-github-runner@v2.4.2
         with:
           mode: stop
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
diff --git a/rust/vecsim-c/src/index.rs b/rust/vecsim-c/src/index.rs
index 60195a3e6..86990c175 100644
--- a/rust/vecsim-c/src/index.rs
+++ b/rust/vecsim-c/src/index.rs
@@ -1099,9 +1099,20 @@ macro_rules! impl_tiered_wrapper {
     };
 }
 
-// Implement wrappers for Tiered indices (f32 only for now)
+// Implement wrappers for Tiered indices
 impl_tiered_wrapper!(TieredSingleF32Wrapper, TieredSingle<f32>, f32, false);
+impl_tiered_wrapper!(TieredSingleF64Wrapper, TieredSingle<f64>, f64, false);
+impl_tiered_wrapper!(TieredSingleBF16Wrapper, TieredSingle<BFloat16>, BFloat16, false);
+impl_tiered_wrapper!(TieredSingleFP16Wrapper, TieredSingle<Float16>, Float16, false);
+impl_tiered_wrapper!(TieredSingleI8Wrapper, TieredSingle<Int8>, Int8, false);
+impl_tiered_wrapper!(TieredSingleU8Wrapper, TieredSingle<UInt8>, UInt8, false);
+
 impl_tiered_wrapper!(TieredMultiF32Wrapper, TieredMulti<f32>, f32, true);
+impl_tiered_wrapper!(TieredMultiF64Wrapper, TieredMulti<f64>, f64, true);
+impl_tiered_wrapper!(TieredMultiBF16Wrapper, TieredMulti<BFloat16>, BFloat16, true);
+impl_tiered_wrapper!(TieredMultiFP16Wrapper, TieredMulti<Float16>, Float16, true);
+impl_tiered_wrapper!(TieredMultiI8Wrapper, TieredMulti<Int8>, Int8, true);
+impl_tiered_wrapper!(TieredMultiU8Wrapper, TieredMulti<UInt8>, UInt8, true);
 
 // ============================================================================
 // Disk Index Wrappers
@@ -1247,8 +1258,13 @@ macro_rules! impl_disk_wrapper {
     };
 }
 
-// Implement wrappers for Disk indices (f32 only for now)
+// Implement wrappers for Disk indices
 impl_disk_wrapper!(DiskSingleF32Wrapper, f32);
+impl_disk_wrapper!(DiskSingleF64Wrapper, f64);
+impl_disk_wrapper!(DiskSingleBF16Wrapper, BFloat16);
+impl_disk_wrapper!(DiskSingleFP16Wrapper, Float16);
+impl_disk_wrapper!(DiskSingleI8Wrapper, Int8);
+impl_disk_wrapper!(DiskSingleU8Wrapper, UInt8);
 
 /// Create a new disk-based index.
 pub fn create_disk_index(params: &DiskParams) -> Option<Box<IndexHandle>> {
@@ -1257,14 +1273,34 @@ pub fn create_disk_index(params: &DiskParams) -> Option<Box<IndexHandle>> {
     let metric = params.base.metric;
     let dim = params.base.dim;
 
-    // Only f32 is supported for now
-    if data_type != VecSimType::VecSimType_FLOAT32 {
-        return None;
-    }
-
-    let index = DiskIndexSingle::<f32>::new(rust_params).ok()?;
-    let wrapper: Box<dyn IndexWrapper> =
-        Box::new(DiskSingleF32Wrapper::new(index, data_type, metric));
+    let wrapper: Box<dyn IndexWrapper> = match data_type {
+        VecSimType::VecSimType_FLOAT32 => {
+            let index = DiskIndexSingle::<f32>::new(rust_params).ok()?;
+            Box::new(DiskSingleF32Wrapper::new(index, data_type, metric))
+        }
+        VecSimType::VecSimType_FLOAT64 => {
+            let index = DiskIndexSingle::<f64>::new(rust_params).ok()?;
+            Box::new(DiskSingleF64Wrapper::new(index, data_type, metric))
+        }
+        VecSimType::VecSimType_BFLOAT16 => {
+            let index = DiskIndexSingle::<BFloat16>::new(rust_params).ok()?;
+            Box::new(DiskSingleBF16Wrapper::new(index, data_type, metric))
+        }
+        VecSimType::VecSimType_FLOAT16 => {
+            let index = DiskIndexSingle::<Float16>::new(rust_params).ok()?;
+            Box::new(DiskSingleFP16Wrapper::new(index, data_type, metric))
+        }
+        VecSimType::VecSimType_INT8 => {
+            let index = DiskIndexSingle::<Int8>::new(rust_params).ok()?;
+            Box::new(DiskSingleI8Wrapper::new(index, data_type, metric))
+        }
+        VecSimType::VecSimType_UINT8 => {
+            let index = DiskIndexSingle::<UInt8>::new(rust_params).ok()?;
+            Box::new(DiskSingleU8Wrapper::new(index, data_type, metric))
+        }
+        // INT32 and INT64 types not supported for disk indices
+        VecSimType::VecSimType_INT32 | VecSimType::VecSimType_INT64 => return None,
+    };
 
     Some(Box::new(IndexHandle::new(
         wrapper,
@@ -1540,17 +1576,57 @@ pub fn create_tiered_index(params: &TieredParams) -> Option<Box<IndexHandle>> {
     let dim = params.base.dim;
     let is_multi = params.base.multi;
 
-    // Tiered index currently only supports f32
     let wrapper: Box<dyn IndexWrapper> = match (data_type, is_multi) {
         (VecSimType::VecSimType_FLOAT32, false) => Box::new(TieredSingleF32Wrapper::new(
             TieredSingle::new(rust_params),
             data_type,
         )),
+        (VecSimType::VecSimType_FLOAT64, false) => Box::new(TieredSingleF64Wrapper::new(
+            TieredSingle::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_BFLOAT16, false) => Box::new(TieredSingleBF16Wrapper::new(
+            TieredSingle::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_FLOAT16, false) => Box::new(TieredSingleFP16Wrapper::new(
+            TieredSingle::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_INT8, false) => Box::new(TieredSingleI8Wrapper::new(
+            TieredSingle::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_UINT8, false) => Box::new(TieredSingleU8Wrapper::new(
+            TieredSingle::new(rust_params),
+            data_type,
+        )),
         (VecSimType::VecSimType_FLOAT32, true) => Box::new(TieredMultiF32Wrapper::new(
             TieredMulti::new(rust_params),
             data_type,
         )),
-        _ => return None, // Tiered only supports f32 currently
+        (VecSimType::VecSimType_FLOAT64, true) => Box::new(TieredMultiF64Wrapper::new(
+            TieredMulti::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_BFLOAT16, true) => Box::new(TieredMultiBF16Wrapper::new(
+            TieredMulti::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_FLOAT16, true) => Box::new(TieredMultiFP16Wrapper::new(
+            TieredMulti::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_INT8, true) => Box::new(TieredMultiI8Wrapper::new(
+            TieredMulti::new(rust_params),
+            data_type,
+        )),
+        (VecSimType::VecSimType_UINT8, true) => Box::new(TieredMultiU8Wrapper::new(
+            TieredMulti::new(rust_params),
+            data_type,
+        )),
+        // INT32 and INT64 types not yet supported for vector indices
+        (VecSimType::VecSimType_INT32, _) | (VecSimType::VecSimType_INT64, _) => return None,
     };
 
     Some(Box::new(IndexHandle::new(
diff --git a/rust/vecsim-c/src/lib.rs b/rust/vecsim-c/src/lib.rs
index ed1dec6a7..429e1a80c 100644
--- a/rust/vecsim-c/src/lib.rs
+++ b/rust/vecsim-c/src/lib.rs
@@ -588,6 +588,16 @@ pub unsafe extern "C" fn VecSimIndex_NewHNSW(params: *const HNSWParams_C) -> *mu
     }
 }
 
+/// Convert compat VecSimSvsQuantBits to types VecSimSvsQuantBits.
+fn convert_compat_quant_bits(quant: compat::VecSimSvsQuantBits) -> types::VecSimSvsQuantBits {
+    match quant {
+        compat::VecSimSvsQuantBits::VecSimSvsQuant_NONE => types::VecSimSvsQuantBits::VecSimSvsQuant_NONE,
+        compat::VecSimSvsQuantBits::VecSimSvsQuant_Scalar => types::VecSimSvsQuantBits::VecSimSvsQuant_Scalar,
+        compat::VecSimSvsQuantBits::VecSimSvsQuant_4 => types::VecSimSvsQuantBits::VecSimSvsQuant_4,
+        compat::VecSimSvsQuantBits::VecSimSvsQuant_8 => types::VecSimSvsQuantBits::VecSimSvsQuant_8,
+    }
+}
+
 /// Create a new SVS (Vamana) index with specific parameters.
 ///
 /// # Safety
@@ -616,6 +626,7 @@ pub unsafe extern "C" fn VecSimIndex_NewSVS(params: *const SVSParams_C) -> *mut
         constructionWindowSize: c_params.construction_window_size,
         searchWindowSize: c_params.search_window_size,
         twoPassConstruction: true, // Default value
+        quantBits: convert_compat_quant_bits(c_params.quantBits),
     };
     match create_svs_index(&rust_params) {
         Some(boxed) => Box::into_raw(boxed) as *mut VecSimIndex,
@@ -2253,7 +2264,7 @@ fn load_index_from_file(path: &std::path::Path) -> Option<Box<IndexHandle>> {
                 true,
             )))
         }
-        // HNSW Single
+        // HNSW Single - f32
         (IndexTypeId::HnswSingle, DataTypeId::F32) => {
             use vecsim::index::HnswSingle;
             let index = HnswSingle::<f32>::load_from_file(path).ok()?;
@@ -2270,7 +2281,96 @@ fn load_index_from_file(path: &std::path::Path) -> Option<Box<IndexHandle>> {
                 false,
             )))
         }
-        // HNSW Multi
+        // HNSW Single - f64
+        (IndexTypeId::HnswSingle, DataTypeId::F64) => {
+            use vecsim::index::HnswSingle;
+            let index = HnswSingle::<f64>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswSingleF64Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_FLOAT64,
+                )),
+                VecSimType::VecSimType_FLOAT64,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                false,
+            )))
+        }
+        // HNSW Single - BFloat16
+        (IndexTypeId::HnswSingle, DataTypeId::BFloat16) => {
+            use vecsim::index::HnswSingle;
+            use vecsim::types::BFloat16;
+            let index = HnswSingle::<BFloat16>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswSingleBF16Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_BFLOAT16,
+                )),
+                VecSimType::VecSimType_BFLOAT16,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                false,
+            )))
+        }
+        // HNSW Single - Float16
+        (IndexTypeId::HnswSingle, DataTypeId::Float16) => {
+            use vecsim::index::HnswSingle;
+            use vecsim::types::Float16;
+            let index = HnswSingle::<Float16>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswSingleFP16Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_FLOAT16,
+                )),
+                VecSimType::VecSimType_FLOAT16,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                false,
+            )))
+        }
+        // HNSW Single - Int8
+        (IndexTypeId::HnswSingle, DataTypeId::Int8) => {
+            use vecsim::index::HnswSingle;
+            use vecsim::types::Int8;
+            let index = HnswSingle::<Int8>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswSingleI8Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_INT8,
+                )),
+                VecSimType::VecSimType_INT8,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                false,
+            )))
+        }
+        // HNSW Single - UInt8
+        (IndexTypeId::HnswSingle, DataTypeId::UInt8) => {
+            use vecsim::index::HnswSingle;
+            use vecsim::types::UInt8;
+            let index = HnswSingle::<UInt8>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswSingleU8Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_UINT8,
+                )),
+                VecSimType::VecSimType_UINT8,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                false,
+            )))
+        }
+        // HNSW Multi - f32
         (IndexTypeId::HnswMulti, DataTypeId::F32) => {
             use vecsim::index::HnswMulti;
             let index = HnswMulti::<f32>::load_from_file(path).ok()?;
@@ -2287,6 +2387,95 @@ fn load_index_from_file(path: &std::path::Path) -> Option<Box<IndexHandle>> {
                 true,
             )))
         }
+        // HNSW Multi - f64
+        (IndexTypeId::HnswMulti, DataTypeId::F64) => {
+            use vecsim::index::HnswMulti;
+            let index = HnswMulti::<f64>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswMultiF64Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_FLOAT64,
+                )),
+                VecSimType::VecSimType_FLOAT64,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                true,
+            )))
+        }
+        // HNSW Multi - BFloat16
+        (IndexTypeId::HnswMulti, DataTypeId::BFloat16) => {
+            use vecsim::index::HnswMulti;
+            use vecsim::types::BFloat16;
+            let index = HnswMulti::<BFloat16>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswMultiBF16Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_BFLOAT16,
+                )),
+                VecSimType::VecSimType_BFLOAT16,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                true,
+            )))
+        }
+        // HNSW Multi - Float16
+        (IndexTypeId::HnswMulti, DataTypeId::Float16) => {
+            use vecsim::index::HnswMulti;
+            use vecsim::types::Float16;
+            let index = HnswMulti::<Float16>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswMultiFP16Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_FLOAT16,
+                )),
+                VecSimType::VecSimType_FLOAT16,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                true,
+            )))
+        }
+        // HNSW Multi - Int8
+        (IndexTypeId::HnswMulti, DataTypeId::Int8) => {
+            use vecsim::index::HnswMulti;
+            use vecsim::types::Int8;
+            let index = HnswMulti::<Int8>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswMultiI8Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_INT8,
+                )),
+                VecSimType::VecSimType_INT8,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                true,
+            )))
+        }
+        // HNSW Multi - UInt8
+        (IndexTypeId::HnswMulti, DataTypeId::UInt8) => {
+            use vecsim::index::HnswMulti;
+            use vecsim::types::UInt8;
+            let index = HnswMulti::<UInt8>::load_from_file(path).ok()?;
+            let metric = convert_metric_to_c(header.metric);
+            Some(Box::new(IndexHandle::new(
+                Box::new(index::HnswMultiU8Wrapper::new(
+                    index,
+                    VecSimType::VecSimType_UINT8,
+                )),
+                VecSimType::VecSimType_UINT8,
+                VecSimAlgo::VecSimAlgo_HNSWLIB,
+                metric,
+                header.dimension,
+                true,
+            )))
+        }
         // SVS Single
         (IndexTypeId::SvsSingle, DataTypeId::F32) => {
             use vecsim::index::SvsSingle;
@@ -3368,11 +3557,11 @@ mod tests {
     #[test]
     fn test_tiered_unsupported_type_returns_null() {
         let mut params = test_tiered_params();
-        params.base.type_ = VecSimType::VecSimType_FLOAT64; // Not supported
+        params.base.type_ = VecSimType::VecSimType_INT32; // Not supported
 
         unsafe {
             let index = VecSimIndex_NewTiered(&params);
-            assert!(index.is_null(), "Tiered should fail for f64");
+            assert!(index.is_null(), "Tiered should fail for INT32");
         }
     }
 
@@ -3630,13 +3819,13 @@ mod tests {
     #[test]
     fn test_disk_unsupported_type_returns_null() {
         let dir = tempdir().unwrap();
-        let path = dir.path().join("test_disk_f64.bin");
+        let path = dir.path().join("test_disk_int32.bin");
         let path_cstr = CString::new(path.to_str().unwrap()).unwrap();
 
         let params = DiskParams {
             base: VecSimParams {
                 algo: VecSimAlgo::VecSimAlgo_BF,
-                type_: VecSimType::VecSimType_FLOAT64, // Not supported
+                type_: VecSimType::VecSimType_INT32, // Not supported
                 dim: 4,
                 metric: VecSimMetric::VecSimMetric_L2,
                 multi: false,
@@ -3653,7 +3842,7 @@ mod tests {
 
         unsafe {
             let index = VecSimIndex_NewDisk(&params);
-            assert!(index.is_null(), "Disk index should fail for f64");
+            assert!(index.is_null(), "Disk index should fail for INT32");
         }
     }
 
diff --git a/rust/vecsim-c/src/params.rs b/rust/vecsim-c/src/params.rs
index 317a286c7..30d23fecf 100644
--- a/rust/vecsim-c/src/params.rs
+++ b/rust/vecsim-c/src/params.rs
@@ -1,6 +1,6 @@
 //! C-compatible parameter structs for index creation.
 
-use crate::types::{VecSimAlgo, VecSimMetric, VecSimType};
+use crate::types::{VecSimAlgo, VecSimMetric, VecSimSvsQuantBits, VecSimType};
 
 /// Common base parameters for all index types.
 #[repr(C)]
@@ -102,6 +102,8 @@ pub struct SVSParams {
     pub searchWindowSize: usize,
     /// Enable two-pass construction for better recall (default: true).
     pub twoPassConstruction: bool,
+    /// Quantization mode for memory-efficient storage.
+    pub quantBits: VecSimSvsQuantBits,
 }
 
 impl Default for SVSParams {
@@ -116,6 +118,7 @@ impl Default for SVSParams {
             constructionWindowSize: 200,
             searchWindowSize: 100,
             twoPassConstruction: true,
+            quantBits: VecSimSvsQuantBits::VecSimSvsQuant_NONE,
         }
     }
 }
@@ -327,6 +330,23 @@ impl SVSParams {
             .with_search_l(self.searchWindowSize)
             .with_capacity(self.base.initialCapacity)
             .with_two_pass(self.twoPassConstruction)
+            .with_quantization(self.quantBits.to_rust_quantization())
+    }
+}
+
+impl VecSimSvsQuantBits {
+    /// Convert C FFI quantization enum to Rust quantization enum.
+    pub fn to_rust_quantization(&self) -> vecsim::index::SvsQuantization {
+        match self {
+            VecSimSvsQuantBits::VecSimSvsQuant_NONE => vecsim::index::SvsQuantization::None,
+            VecSimSvsQuantBits::VecSimSvsQuant_Scalar => vecsim::index::SvsQuantization::Scalar,
+            VecSimSvsQuantBits::VecSimSvsQuant_4 => vecsim::index::SvsQuantization::Lvq4,
+            VecSimSvsQuantBits::VecSimSvsQuant_8 => vecsim::index::SvsQuantization::Lvq8,
+            VecSimSvsQuantBits::VecSimSvsQuant_4x4 => vecsim::index::SvsQuantization::Lvq4x4,
+            VecSimSvsQuantBits::VecSimSvsQuant_4x8 => vecsim::index::SvsQuantization::Lvq4x8,
+            VecSimSvsQuantBits::VecSimSvsQuant_4x8_LeanVec => vecsim::index::SvsQuantization::LeanVec4x8,
+            VecSimSvsQuantBits::VecSimSvsQuant_8x8_LeanVec => vecsim::index::SvsQuantization::LeanVec8x8,
+        }
     }
 }
 
diff --git a/rust/vecsim-c/src/query.rs b/rust/vecsim-c/src/query.rs
index 22708dcab..4fba3a6a6 100644
--- a/rust/vecsim-c/src/query.rs
+++ b/rust/vecsim-c/src/query.rs
@@ -33,7 +33,7 @@ impl QueryReplyHandle {
         }
     }
 
-    pub fn get_iterator(&self) -> QueryReplyIteratorHandle {
+    pub fn get_iterator(&self) -> QueryReplyIteratorHandle<'_> {
         QueryReplyIteratorHandle::new(&self.reply.results)
     }
 
diff --git a/rust/vecsim-c/src/types.rs b/rust/vecsim-c/src/types.rs
index 54f19c7f9..7593184cc 100644
--- a/rust/vecsim-c/src/types.rs
+++ b/rust/vecsim-c/src/types.rs
@@ -181,6 +181,32 @@ pub enum VecSearchMode {
     RANGE_QUERY = 5,
 }
 
+/// SVS quantization bits configuration.
+///
+/// Matches C++ VecSimSvsQuantBits enum for compatibility.
+/// The values are encoded as: primary_bits | (residual_bits << 8) | (is_leanvec << 16)
+#[repr(C)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum VecSimSvsQuantBits {
+    /// No quantization (full precision).
+    #[default]
+    VecSimSvsQuant_NONE = 0,
+    /// 8-bit scalar quantization (SQ8).
+    VecSimSvsQuant_Scalar = 1,
+    /// 4-bit LVQ quantization.
+    VecSimSvsQuant_4 = 4,
+    /// 8-bit LVQ quantization.
+    VecSimSvsQuant_8 = 8,
+    /// 4-bit primary + 4-bit residual LVQ.
+    VecSimSvsQuant_4x4 = 0x0404, // 4 | (4 << 8)
+    /// 4-bit primary + 8-bit residual LVQ.
+    VecSimSvsQuant_4x8 = 0x0804, // 4 | (8 << 8)
+    /// LeanVec with 4-bit primary + 8-bit residual.
+    VecSimSvsQuant_4x8_LeanVec = 0x010804, // 4 | (8 << 8) | (1 << 16)
+    /// LeanVec with 8-bit primary + 8-bit residual.
+    VecSimSvsQuant_8x8_LeanVec = 0x010808, // 8 | (8 << 8) | (1 << 16)
+}
+
 /// Timeout callback function type.
 /// Returns non-zero on timeout.
 pub type timeoutCallbackFunction = Option<unsafe extern "C" fn(ctx: *mut std::ffi::c_void) -> i32>;
diff --git a/rust/vecsim/src/distance/simd/avx.rs b/rust/vecsim/src/distance/simd/avx.rs
index 5c168db28..ed17a9305 100644
--- a/rust/vecsim/src/distance/simd/avx.rs
+++ b/rust/vecsim/src/distance/simd/avx.rs
@@ -8,7 +8,7 @@
 
 #![cfg(target_arch = "x86_64")]
 
-use crate::types::VectorElement;
+use crate::types::{DistanceType, VectorElement};
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
diff --git a/rust/vecsim/src/distance/simd/avx2.rs b/rust/vecsim/src/distance/simd/avx2.rs
index 3f66550ea..0e4fc0c91 100644
--- a/rust/vecsim/src/distance/simd/avx2.rs
+++ b/rust/vecsim/src/distance/simd/avx2.rs
@@ -5,7 +5,7 @@
 
 #![cfg(target_arch = "x86_64")]
 
-use crate::types::VectorElement;
+use crate::types::{DistanceType, VectorElement};
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
diff --git a/rust/vecsim/src/distance/simd/avx512.rs b/rust/vecsim/src/distance/simd/avx512.rs
index eeef9559b..d0d0dd8f4 100644
--- a/rust/vecsim/src/distance/simd/avx512.rs
+++ b/rust/vecsim/src/distance/simd/avx512.rs
@@ -5,7 +5,7 @@
 
 #![cfg(target_arch = "x86_64")]
 
-use crate::types::VectorElement;
+use crate::types::{DistanceType, VectorElement};
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
diff --git a/rust/vecsim/src/distance/simd/avx512bw.rs b/rust/vecsim/src/distance/simd/avx512bw.rs
index b7a38fb5f..e48097aea 100644
--- a/rust/vecsim/src/distance/simd/avx512bw.rs
+++ b/rust/vecsim/src/distance/simd/avx512bw.rs
@@ -10,7 +10,7 @@
 
 #![cfg(target_arch = "x86_64")]
 
-use crate::types::{Int8, UInt8, VectorElement};
+use crate::types::{Int8, UInt8, DistanceType, VectorElement};
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
diff --git a/rust/vecsim/src/distance/simd/sse.rs b/rust/vecsim/src/distance/simd/sse.rs
index 850d5f841..0c6d60ad6 100644
--- a/rust/vecsim/src/distance/simd/sse.rs
+++ b/rust/vecsim/src/distance/simd/sse.rs
@@ -5,7 +5,7 @@
 
 #![cfg(target_arch = "x86_64")]
 
-use crate::types::VectorElement;
+use crate::types::{DistanceType, VectorElement};
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
diff --git a/rust/vecsim/src/distance/simd/sse4.rs b/rust/vecsim/src/distance/simd/sse4.rs
index f5bef2c79..d8c2a9c09 100644
--- a/rust/vecsim/src/distance/simd/sse4.rs
+++ b/rust/vecsim/src/distance/simd/sse4.rs
@@ -7,7 +7,7 @@
 
 #![cfg(target_arch = "x86_64")]
 
-use crate::types::VectorElement;
+use crate::types::{DistanceType, VectorElement};
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
diff --git a/rust/vecsim/src/index/hnsw/concurrent_graph.rs b/rust/vecsim/src/index/hnsw/concurrent_graph.rs
index 349ef45a2..ef28ddb5f 100644
--- a/rust/vecsim/src/index/hnsw/concurrent_graph.rs
+++ b/rust/vecsim/src/index/hnsw/concurrent_graph.rs
@@ -12,7 +12,16 @@ use super::ElementGraphData;
 use crate::types::IdType;
 use parking_lot::RwLock;
 use std::cell::UnsafeCell;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering};
+
+/// Flags for element status (matches C++ Flags enum).
+#[allow(dead_code)]
+mod flags {
+    /// Element is logically deleted but still exists in the graph.
+    pub const DELETE_MARK: u8 = 0x1;
+    /// Element is being inserted into the graph.
+    pub const IN_PROCESS: u8 = 0x2;
+}
 
 /// Default segment size (number of elements per segment).
 const SEGMENT_SIZE: usize = 4096;
@@ -90,6 +99,10 @@ pub struct ConcurrentGraph {
     segment_size: usize,
     /// Total number of initialized elements (approximate, may be slightly stale).
     len: AtomicUsize,
+    /// Atomic flags for each element (DELETE_MARK, IN_PROCESS).
+    /// Stored separately for O(1) access without acquiring segment lock.
+    /// This matches the C++ idToMetaData approach.
+    element_flags: RwLock<Vec<AtomicU8>>,
 }
 
 impl ConcurrentGraph {
@@ -102,10 +115,16 @@ impl ConcurrentGraph {
             .map(|_| GraphSegment::new(segment_size))
             .collect();
 
+        // Pre-allocate flags array for initial capacity
+        let flags: Vec<AtomicU8> = (0..initial_capacity)
+            .map(|_| AtomicU8::new(0))
+            .collect();
+
         Self {
             segments: RwLock::new(segments),
             segment_size,
             len: AtomicUsize::new(0),
+            element_flags: RwLock::new(flags),
         }
     }
 
@@ -197,6 +216,56 @@ impl ConcurrentGraph {
         }
     }
 
+    /// Ensure the flags array has capacity for the given ID.
+    fn ensure_flags_capacity(&self, id: IdType) {
+        let id_usize = id as usize;
+
+        // Fast path - check with read lock
+        {
+            let flags = self.element_flags.read();
+            if id_usize < flags.len() {
+                return;
+            }
+        }
+
+        // Slow path - need to grow
+        let mut flags = self.element_flags.write();
+        // Double-check after acquiring write lock
+        let needed = id_usize + 1;
+        let current_len = flags.len();
+        if needed > current_len {
+            // Grow by at least one segment worth
+            let new_capacity = needed.max(current_len + self.segment_size);
+            let additional = new_capacity - current_len;
+            flags.reserve(additional);
+            for _ in 0..additional {
+                flags.push(AtomicU8::new(0));
+            }
+        }
+    }
+
+    /// Check if an element is marked as deleted (O(1) lock-free after initial read lock).
+    #[inline]
+    pub fn is_marked_deleted(&self, id: IdType) -> bool {
+        let id_usize = id as usize;
+        let flags = self.element_flags.read();
+        if id_usize < flags.len() {
+            flags[id_usize].load(Ordering::Acquire) & flags::DELETE_MARK != 0
+        } else {
+            false
+        }
+    }
+
+    /// Mark an element as deleted atomically.
+    pub fn mark_deleted(&self, id: IdType) {
+        self.ensure_flags_capacity(id);
+        let flags = self.element_flags.read();
+        let id_usize = id as usize;
+        if id_usize < flags.len() {
+            flags[id_usize].fetch_or(flags::DELETE_MARK, Ordering::Release);
+        }
+    }
+
     /// Get the approximate number of elements.
     #[inline]
     pub fn len(&self) -> usize {
@@ -260,9 +329,35 @@ impl ConcurrentGraph {
         }
         drop(segments);
 
-        // Set new elements
+        // Reset flags array (clear all flags)
+        {
+            let mut flags = self.element_flags.write();
+            // Clear existing flags
+            for flag in flags.iter() {
+                flag.store(0, Ordering::Release);
+            }
+            // Resize if needed
+            let needed = new_elements.len();
+            let current_len = flags.len();
+            if needed > current_len {
+                let additional = needed - current_len;
+                flags.reserve(additional);
+                for _ in 0..additional {
+                    flags.push(AtomicU8::new(0));
+                }
+            }
+        }
+
+        // Set new elements and update flags for deleted elements
         for (id, element) in new_elements.into_iter().enumerate() {
             if let Some(data) = element {
+                // If the element is marked as deleted in metadata, update flags
+                if data.meta.deleted {
+                    let flags = self.element_flags.read();
+                    if id < flags.len() {
+                        flags[id].fetch_or(flags::DELETE_MARK, Ordering::Release);
+                    }
+                }
                 self.set(id as IdType, data);
             }
         }
diff --git a/rust/vecsim/src/index/hnsw/mod.rs b/rust/vecsim/src/index/hnsw/mod.rs
index 0c2d594d8..9cf47ffd5 100644
--- a/rust/vecsim/src/index/hnsw/mod.rs
+++ b/rust/vecsim/src/index/hnsw/mod.rs
@@ -724,6 +724,10 @@ impl<T: VectorElement> HnswCore<T> {
 
     /// Mark an element as deleted (concurrent version).
     pub fn mark_deleted_concurrent(&self, id: IdType) {
+        // Update the flags array first (O(1) atomic operation for fast deleted checks)
+        self.graph.mark_deleted(id);
+
+        // Also update the element's metadata for consistency
         if let Some(element) = self.graph.get(id) {
             // ElementMetaData.deleted is not atomic, but this is a best-effort
             // tombstone - reads may see stale state briefly, which is acceptable
diff --git a/rust/vecsim/src/index/hnsw/search.rs b/rust/vecsim/src/index/hnsw/search.rs
index 5bc28f228..0b905db78 100644
--- a/rust/vecsim/src/index/hnsw/search.rs
+++ b/rust/vecsim/src/index/hnsw/search.rs
@@ -19,6 +19,13 @@ use crate::utils::{MaxHeap, MinHeap};
 pub trait GraphAccess {
     /// Get an element by ID.
     fn get(&self, id: IdType) -> Option<&ElementGraphData>;
+
+    /// Check if an element is marked as deleted.
+    /// Default implementation uses get(), but can be overridden for efficiency.
+    #[inline]
+    fn is_deleted(&self, id: IdType) -> bool {
+        self.get(id).is_some_and(|e| e.meta.deleted)
+    }
 }
 
 /// Implementation for slice-based graphs (used in tests).
@@ -43,6 +50,13 @@ impl GraphAccess for ConcurrentGraph {
     fn get(&self, id: IdType) -> Option<&ElementGraphData> {
         ConcurrentGraph::get(self, id)
     }
+
+    /// O(1) deleted check using separate flags array.
+    /// This avoids acquiring a segment read lock for every neighbor check.
+    #[inline]
+    fn is_deleted(&self, id: IdType) -> bool {
+        self.is_marked_deleted(id)
+    }
 }
 
 /// Result of a layer search: (id, distance) pairs.
@@ -236,10 +250,7 @@ where
                         candidates.push(neighbor, dist);
 
                         // Only add to results if not deleted and passes filter
-                        let is_deleted = graph
-                            .get(neighbor)
-                            .is_some_and(|e| e.meta.deleted);
-                        if !is_deleted {
+                        if !graph.is_deleted(neighbor) {
                             let passes = filter.is_none_or(|f| f(neighbor));
                             if passes {
                                 results.try_insert(neighbor, dist);
@@ -251,12 +262,8 @@ where
         }
     }
 
-    // Convert results to vector
-    results
-        .into_sorted_vec()
-        .into_iter()
-        .map(|e| (e.id, e.distance))
-        .collect()
+    // Convert results to vector (optimized to minimize allocations)
+    results.into_sorted_pairs()
 }
 
 use crate::types::LabelType;
@@ -402,10 +409,7 @@ where
                     }
 
                     // Only update label tracking if not deleted
-                    let is_deleted = graph
-                        .get(neighbor)
-                        .is_some_and(|e| e.meta.deleted);
-                    if !is_deleted {
+                    if !graph.is_deleted(neighbor) {
                         if let Some(&label) = id_to_label.get(&neighbor) {
                             let passes = filter.is_none_or(|f| f(label));
                             if passes {
@@ -576,15 +580,10 @@ where
                         candidates.push(neighbor, dist);
 
                         // Only add to results if not deleted, within radius, and passes filter
-                        if dist_f64 <= radius.to_f64() {
-                            let is_deleted = graph
-                                .get(neighbor)
-                                .is_some_and(|e| e.meta.deleted);
-                            if !is_deleted {
-                                let passes = filter.is_none_or(|f| f(neighbor));
-                                if passes {
-                                    results.push((neighbor, dist));
-                                }
+                        if dist_f64 <= radius.to_f64() && !graph.is_deleted(neighbor) {
+                            let passes = filter.is_none_or(|f| f(neighbor));
+                            if passes {
+                                results.push((neighbor, dist));
                             }
                         }
                     }
diff --git a/rust/vecsim/src/index/hnsw/single.rs b/rust/vecsim/src/index/hnsw/single.rs
index 1760f9c3a..d16bcd785 100644
--- a/rust/vecsim/src/index/hnsw/single.rs
+++ b/rust/vecsim/src/index/hnsw/single.rs
@@ -6,7 +6,7 @@
 use super::{ElementGraphData, HnswCore, HnswParams};
 use crate::index::traits::{BatchIterator, IndexError, IndexInfo, QueryError, VecSimIndex};
 use crate::query::{QueryParams, QueryReply, QueryResult};
-use crate::types::{DistanceType, IdType, LabelType, VectorElement};
+use crate::types::{IdType, LabelType, VectorElement};
 use dashmap::DashMap;
 
 /// Statistics about an HNSW index.
diff --git a/rust/vecsim/src/index/hnsw/visited.rs b/rust/vecsim/src/index/hnsw/visited.rs
index 80bc47e3c..dd569189b 100644
--- a/rust/vecsim/src/index/hnsw/visited.rs
+++ b/rust/vecsim/src/index/hnsw/visited.rs
@@ -123,7 +123,7 @@ impl VisitedNodesHandlerPool {
     }
 
     /// Get a handler from the pool, creating one if necessary.
-    pub fn get(&self) -> PooledHandler {
+    pub fn get(&self) -> PooledHandler<'_> {
         let cap = self.default_capacity.load(std::sync::atomic::Ordering::Acquire);
         let handler = self.handlers.lock().pop().unwrap_or_else(|| {
             VisitedNodesHandler::new(cap)
diff --git a/rust/vecsim/src/index/mod.rs b/rust/vecsim/src/index/mod.rs
index 78c3ccca4..51dffa3b3 100644
--- a/rust/vecsim/src/index/mod.rs
+++ b/rust/vecsim/src/index/mod.rs
@@ -41,7 +41,7 @@ pub use tiered::{
 
 // Re-export SVS types
 pub use svs::{
-    SvsParams, SvsSingle, SvsMulti, SvsStats,
+    SvsParams, SvsSingle, SvsMulti, SvsStats, SvsQuantization,
 };
 
 // Re-export Tiered SVS types
diff --git a/rust/vecsim/src/index/svs/mod.rs b/rust/vecsim/src/index/svs/mod.rs
index b3280aa4b..a1c899aed 100644
--- a/rust/vecsim/src/index/svs/mod.rs
+++ b/rust/vecsim/src/index/svs/mod.rs
@@ -43,10 +43,12 @@ pub mod graph;
 pub mod search;
 pub mod single;
 pub mod multi;
+pub mod sq8;
 
 pub use graph::{VamanaGraph, VamanaGraphData};
 pub use single::{SvsSingle, SvsStats, SvsSingleBatchIterator};
 pub use multi::{SvsMulti, SvsMultiBatchIterator};
+pub use sq8::SvsSq8Core;
 
 use crate::containers::DataBlocks;
 use crate::distance::{create_distance_function, DistanceFunction, Metric};
@@ -66,6 +68,32 @@ pub const DEFAULT_CONSTRUCTION_L: usize = 200;
 /// Default search window size.
 pub const DEFAULT_SEARCH_L: usize = 100;
 
+/// Quantization mode for SVS index.
+///
+/// Quantization reduces memory usage by storing compressed vectors.
+/// The tradeoff is slightly reduced recall accuracy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum SvsQuantization {
+    /// No quantization - full precision vectors.
+    #[default]
+    None,
+    /// Scalar 8-bit quantization (SQ8).
+    /// Each dimension is quantized to 8-bit unsigned integer with per-vector scaling.
+    Scalar,
+    /// 4-bit LVQ quantization.
+    Lvq4,
+    /// 8-bit LVQ quantization.
+    Lvq8,
+    /// 4-bit primary + 4-bit residual LVQ (two-level).
+    Lvq4x4,
+    /// 4-bit primary + 8-bit residual LVQ (two-level).
+    Lvq4x8,
+    /// LeanVec with 4-bit primary + 8-bit residual (dimension reduction + quantization).
+    LeanVec4x8,
+    /// LeanVec with 8-bit primary + 8-bit residual (dimension reduction + quantization).
+    LeanVec8x8,
+}
+
 /// Parameters for creating an SVS (Vamana) index.
 #[derive(Debug, Clone)]
 pub struct SvsParams {
@@ -85,6 +113,8 @@ pub struct SvsParams {
     pub initial_capacity: usize,
     /// Use two-pass construction (recommended for better recall).
     pub two_pass_construction: bool,
+    /// Quantization mode for memory-efficient storage.
+    pub quantization: SvsQuantization,
 }
 
 impl SvsParams {
@@ -99,6 +129,7 @@ impl SvsParams {
             search_window_size: DEFAULT_SEARCH_L,
             initial_capacity: 1024,
             two_pass_construction: true,
+            quantization: SvsQuantization::None,
         }
     }
 
@@ -132,6 +163,12 @@ impl SvsParams {
         self
     }
 
+    /// Set quantization mode.
+    pub fn with_quantization(mut self, quantization: SvsQuantization) -> Self {
+        self.quantization = quantization;
+        self
+    }
+
     /// Enable/disable two-pass construction.
     pub fn with_two_pass(mut self, enable: bool) -> Self {
         self.two_pass_construction = enable;
diff --git a/rust/vecsim/src/index/svs/search.rs b/rust/vecsim/src/index/svs/search.rs
index 1b0da3b63..1aa6e4be8 100644
--- a/rust/vecsim/src/index/svs/search.rs
+++ b/rust/vecsim/src/index/svs/search.rs
@@ -249,6 +249,134 @@ fn select_closest<D: DistanceType>(candidates: &[(IdType, D)], max_degree: usize
     sorted.into_iter().take(max_degree).map(|(id, _)| id).collect()
 }
 
+// ============================================================================
+// SQ8 Asymmetric Distance Search
+// ============================================================================
+
+use crate::containers::Sq8DataBlocks;
+use crate::distance::Metric;
+use crate::quantization::sq8::{sq8_asymmetric_cosine, sq8_asymmetric_inner_product, sq8_asymmetric_l2_squared};
+
+/// Result of an SQ8 search with f32 distances.
+pub struct Sq8SearchResult {
+    pub results: Vec<Sq8SearchResultEntry>,
+}
+
+/// Single SQ8 search result entry.
+pub struct Sq8SearchResultEntry {
+    pub id: IdType,
+    pub distance: f32,
+}
+
+/// Greedy beam search using SQ8 asymmetric distance.
+///
+/// Query vector is in f32, stored vectors are in SQ8 format.
+#[allow(clippy::too_many_arguments)]
+pub fn greedy_beam_search_sq8<P>(
+    entry_point: IdType,
+    query: &[f32],
+    beam_width: usize,
+    graph: &VamanaGraph,
+    data: &Sq8DataBlocks,
+    metric: Metric,
+    dim: usize,
+    visited: &VisitedNodesHandler,
+    filter: Option<&P>,
+) -> Sq8SearchResult
+where
+    P: Fn(IdType) -> bool + ?Sized,
+{
+    // Compute asymmetric distance
+    let compute_dist = |id: IdType| -> Option<f32> {
+        let (quantized, meta) = data.get(id)?;
+        Some(match metric {
+            Metric::L2 => sq8_asymmetric_l2_squared(query, quantized, meta, dim),
+            Metric::InnerProduct => sq8_asymmetric_inner_product(query, quantized, meta, dim),
+            Metric::Cosine => sq8_asymmetric_cosine(query, quantized, meta, dim),
+        })
+    };
+
+    // Candidates to explore (min-heap: closest first)
+    let mut candidates = MinHeap::<f32>::with_capacity(beam_width * 2);
+
+    // Results (max-heap: keeps L closest, largest at top)
+    let mut results = MaxHeap::<f32>::new(beam_width);
+
+    // Initialize with entry point
+    visited.visit(entry_point);
+
+    if let Some(dist) = compute_dist(entry_point) {
+        candidates.push(entry_point, dist);
+
+        if !graph.is_deleted(entry_point) {
+            let passes = filter.is_none_or(|f| f(entry_point));
+            if passes {
+                results.insert(entry_point, dist);
+            }
+        }
+    }
+
+    // Explore candidates
+    while let Some(candidate) = candidates.pop() {
+        // Check if we can stop early
+        if results.is_full() {
+            if let Some(worst_dist) = results.top_distance() {
+                if (candidate.distance as f64) > worst_dist.to_f64() {
+                    break;
+                }
+            }
+        }
+
+        // Skip deleted nodes
+        if graph.is_deleted(candidate.id) {
+            continue;
+        }
+
+        // Explore neighbors
+        for neighbor in graph.get_neighbors(candidate.id) {
+            if visited.visit(neighbor) {
+                continue; // Already visited
+            }
+
+            // Skip deleted neighbors
+            if graph.is_deleted(neighbor) {
+                continue;
+            }
+
+            // Compute distance
+            if let Some(dist) = compute_dist(neighbor) {
+                // Check filter
+                let passes = filter.is_none_or(|f| f(neighbor));
+
+                // Add to results if it passes filter and is close enough
+                if passes
+                    && (!results.is_full()
+                        || (dist as f64) < results.top_distance().unwrap().to_f64())
+                {
+                    results.try_insert(neighbor, dist);
+                }
+
+                // Add to candidates for exploration if close enough
+                if !results.is_full() || (dist as f64) < results.top_distance().unwrap().to_f64() {
+                    candidates.push(neighbor, dist);
+                }
+            }
+        }
+    }
+
+    // Convert to sorted vector
+    Sq8SearchResult {
+        results: results
+            .into_sorted_vec()
+            .into_iter()
+            .map(|e| Sq8SearchResultEntry {
+                id: e.id,
+                distance: e.distance,
+            })
+            .collect(),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/rust/vecsim/src/index/svs/single.rs b/rust/vecsim/src/index/svs/single.rs
index 1ffef3d91..c4f58536a 100644
--- a/rust/vecsim/src/index/svs/single.rs
+++ b/rust/vecsim/src/index/svs/single.rs
@@ -646,6 +646,7 @@ impl SvsSingle<f32> {
             search_window_size,
             initial_capacity: header.count.max(1024),
             two_pass_construction,
+            quantization: super::SvsQuantization::None, // Loaded indices use no quantization
         };
 
         // Create the index
diff --git a/rust/vecsim/src/index/svs/sq8.rs b/rust/vecsim/src/index/svs/sq8.rs
new file mode 100644
index 000000000..bf26332a6
--- /dev/null
+++ b/rust/vecsim/src/index/svs/sq8.rs
@@ -0,0 +1,278 @@
+//! SQ8-quantized SVS (Vamana) index implementation.
+//!
+//! This module provides SVS indices that use scalar 8-bit quantization for
+//! memory-efficient vector storage. Vectors are stored as u8 with per-vector
+//! metadata for dequantization and asymmetric distance computation.
+//!
+//! ## Memory Savings
+//!
+//! SQ8 quantization reduces memory usage by ~4x for f32 vectors:
+//! - Original: 4 bytes per dimension
+//! - Quantized: 1 byte per dimension + 16 bytes metadata per vector
+//!
+//! ## Asymmetric Distance
+//!
+//! During search, the query vector stays in f32 while stored vectors are in SQ8.
+//! This provides better accuracy than symmetric quantization.
+
+use super::{SvsParams, VamanaGraph};
+use crate::containers::Sq8DataBlocks;
+use crate::distance::Metric;
+use crate::index::hnsw::VisitedNodesHandlerPool;
+use crate::quantization::sq8::{sq8_asymmetric_cosine, sq8_asymmetric_inner_product, sq8_asymmetric_l2_squared};
+use crate::quantization::Sq8VectorMeta;
+use crate::types::{IdType, INVALID_ID};
+use std::sync::atomic::AtomicU32;
+
+/// Core SQ8-quantized SVS implementation.
+pub struct SvsSq8Core {
+    /// Quantized vector storage.
+    pub data: Sq8DataBlocks,
+    /// Graph structure.
+    pub graph: VamanaGraph,
+    /// Distance metric.
+    pub metric: Metric,
+    /// Medoid (entry point).
+    pub medoid: AtomicU32,
+    /// Pool of visited handlers for concurrent searches.
+    pub visited_pool: VisitedNodesHandlerPool,
+    /// Parameters.
+    pub params: SvsParams,
+}
+
+impl SvsSq8Core {
+    /// Create a new SQ8-quantized SVS core.
+    pub fn new(params: SvsParams) -> Self {
+        let data = Sq8DataBlocks::new(params.dim, params.initial_capacity);
+        let graph = VamanaGraph::new(params.initial_capacity, params.graph_max_degree);
+        let visited_pool = VisitedNodesHandlerPool::new(params.initial_capacity);
+
+        Self {
+            data,
+            graph,
+            metric: params.metric,
+            medoid: AtomicU32::new(INVALID_ID),
+            visited_pool,
+            params,
+        }
+    }
+
+    /// Add a vector (will be quantized) and return its internal ID.
+    pub fn add_vector(&mut self, vector: &[f32]) -> Option<IdType> {
+        self.data.add(vector)
+    }
+
+    /// Get quantized data and metadata by ID.
+    #[inline]
+    pub fn get_quantized(&self, id: IdType) -> Option<(&[u8], &Sq8VectorMeta)> {
+        self.data.get(id)
+    }
+
+    /// Decode a vector back to f32.
+    pub fn decode_vector(&self, id: IdType) -> Option<Vec<f32>> {
+        self.data.decode(id)
+    }
+
+    /// Compute asymmetric distance between f32 query and quantized stored vector.
+    #[inline]
+    pub fn asymmetric_distance(&self, query: &[f32], quantized: &[u8], meta: &Sq8VectorMeta) -> f32 {
+        match self.metric {
+            Metric::L2 => sq8_asymmetric_l2_squared(query, quantized, meta, self.params.dim),
+            Metric::InnerProduct => sq8_asymmetric_inner_product(query, quantized, meta, self.params.dim),
+            Metric::Cosine => sq8_asymmetric_cosine(query, quantized, meta, self.params.dim),
+        }
+    }
+
+    /// Compute distance between query and stored vector by ID.
+    #[inline]
+    pub fn distance_to(&self, query: &[f32], id: IdType) -> Option<f32> {
+        let (quantized, meta) = self.get_quantized(id)?;
+        Some(self.asymmetric_distance(query, quantized, meta))
+    }
+
+    /// Find the medoid (approximate centroid) of all vectors.
+    pub fn find_medoid(&self) -> Option<IdType> {
+        let ids: Vec<IdType> = self.data.iter_ids().collect();
+        if ids.is_empty() {
+            return None;
+        }
+        if ids.len() == 1 {
+            return Some(ids[0]);
+        }
+
+        // Sample at most 1000 vectors for medoid computation
+        let sample_size = ids.len().min(1000);
+        let sample: Vec<IdType> = if ids.len() <= sample_size {
+            ids.clone()
+        } else {
+            use rand::seq::SliceRandom;
+            let mut rng = rand::thread_rng();
+            let mut shuffled = ids.clone();
+            shuffled.shuffle(&mut rng);
+            shuffled.into_iter().take(sample_size).collect()
+        };
+
+        // Find vector with minimum total distance
+        // For SQ8, we decode to f32 for accurate medoid computation
+        let mut best_id = sample[0];
+        let mut best_total_dist = f64::MAX;
+
+        for &candidate in &sample {
+            if let Some(candidate_data) = self.decode_vector(candidate) {
+                let total_dist: f64 = sample
+                    .iter()
+                    .filter(|&&id| id != candidate)
+                    .filter_map(|&id| self.distance_to(&candidate_data, id))
+                    .map(|d| d as f64)
+                    .sum();
+
+                if total_dist < best_total_dist {
+                    best_total_dist = total_dist;
+                    best_id = candidate;
+                }
+            }
+        }
+
+        Some(best_id)
+    }
+
+    /// Get the dimension.
+    #[inline]
+    pub fn dim(&self) -> usize {
+        self.params.dim
+    }
+
+    /// Get the number of vectors.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Check if empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+
+    /// Mark a slot as deleted.
+    pub fn mark_deleted(&mut self, id: IdType) -> bool {
+        self.data.mark_deleted(id)
+    }
+
+    /// Update a vector at the given ID.
+    pub fn update_vector(&mut self, id: IdType, vector: &[f32]) -> bool {
+        self.data.update(id, vector)
+    }
+
+    /// Get the search window size.
+    pub fn search_window_size(&self) -> usize {
+        self.params.search_window_size
+    }
+
+    /// Get the medoid ID.
+    pub fn get_medoid(&self) -> IdType {
+        self.medoid.load(std::sync::atomic::Ordering::Acquire)
+    }
+
+    /// Set the medoid ID.
+    pub fn set_medoid(&self, id: IdType) {
+        self.medoid.store(id, std::sync::atomic::Ordering::Release)
+    }
+}
+
+use super::search::greedy_beam_search_sq8;
+use crate::query::QueryResult;
+use crate::types::LabelType;
+use parking_lot::RwLock;
+use std::collections::HashMap;
+use std::sync::atomic::AtomicUsize;
+
+/// Single-value SQ8-quantized SVS index.
+///
+/// Each label has exactly one associated vector, stored in SQ8 format.
+pub struct SvsSq8Single {
+    /// Core SQ8-quantized SVS implementation.
+    core: RwLock<SvsSq8Core>,
+    /// Label to internal ID mapping.
+    label_to_id: RwLock<HashMap<LabelType, IdType>>,
+    /// Internal ID to label mapping.
+    id_to_label: RwLock<HashMap<IdType, LabelType>>,
+    /// Number of vectors.
+    count: AtomicUsize,
+    /// Maximum capacity (if set).
+    capacity: Option<usize>,
+    /// Construction completed flag.
+    construction_done: RwLock<bool>,
+}
+
+impl SvsSq8Single {
+    /// Create a new SQ8-quantized SVS index.
+    pub fn new(params: SvsParams) -> Self {
+        let initial_capacity = params.initial_capacity;
+        Self {
+            core: RwLock::new(SvsSq8Core::new(params)),
+            label_to_id: RwLock::new(HashMap::with_capacity(initial_capacity)),
+            id_to_label: RwLock::new(HashMap::with_capacity(initial_capacity)),
+            count: AtomicUsize::new(0),
+            capacity: None,
+            construction_done: RwLock::new(false),
+        }
+    }
+
+    /// Get the distance metric.
+    pub fn metric(&self) -> Metric {
+        self.core.read().metric
+    }
+
+    /// Get the dimension.
+    pub fn dim(&self) -> usize {
+        self.core.read().params.dim
+    }
+
+    /// Get the search window size.
+    pub fn search_l(&self) -> usize {
+        self.core.read().search_window_size()
+    }
+
+    /// Search for the k nearest neighbors using SQ8 asymmetric distance.
+    fn search_knn(&self, query: &[f32], k: usize, ef: usize) -> Vec<QueryResult<f32>> {
+        let core = self.core.read();
+
+        let medoid = core.get_medoid();
+        if medoid == INVALID_ID || core.is_empty() {
+            return vec![];
+        }
+
+        let visited = core.visited_pool.get();
+
+        // Perform greedy beam search with SQ8 asymmetric distance
+        let search_result = greedy_beam_search_sq8(
+            medoid,
+            query,
+            ef.max(k),
+            &core.graph,
+            &core.data,
+            core.metric,
+            core.params.dim,
+            &visited,
+            None::<&fn(IdType) -> bool>,
+        );
+
+        // Map internal IDs to labels
+        let id_to_label = self.id_to_label.read();
+        let mut results: Vec<QueryResult<f32>> = search_result
+            .results
+            .into_iter()
+            .filter_map(|r| {
+                id_to_label.get(&r.id).map(|&label| QueryResult {
+                    label,
+                    distance: r.distance,
+                })
+            })
+            .collect();
+
+        results.truncate(k);
+        results
+    }
+}
+
diff --git a/rust/vecsim/src/utils/heap.rs b/rust/vecsim/src/utils/heap.rs
index 148bca482..71dcac12b 100644
--- a/rust/vecsim/src/utils/heap.rs
+++ b/rust/vecsim/src/utils/heap.rs
@@ -139,10 +139,14 @@ impl<D: DistanceType> MaxHeap<D> {
         if self.heap.len() < self.capacity {
             self.heap.push(MaxHeapEntry(HeapEntry::new(id, distance)));
             true
-        } else if let Some(top) = self.heap.peek() {
+        } else if let Some(mut top) = self.heap.peek_mut() {
             if distance < top.0.distance {
-                self.heap.pop();
-                self.heap.push(MaxHeapEntry(HeapEntry::new(id, distance)));
+                // Replace in-place using PeekMut - avoids separate pop+push operations
+                // PeekMut::pop() removes the top element efficiently, then we push the new one
+                // This is more efficient than pop() + push() because it avoids redundant sift operations
+                *top = MaxHeapEntry(HeapEntry::new(id, distance));
+                // Drop the PeekMut to trigger sift_down
+                drop(top);
                 true
             } else {
                 false
@@ -169,7 +173,10 @@ impl<D: DistanceType> MaxHeap<D> {
 
     /// Convert to a sorted vector (smallest distance first).
     pub fn into_sorted_vec(self) -> Vec<HeapEntry<D>> {
-        let mut entries: Vec<_> = self.heap.into_iter().map(|e| e.0).collect();
+        // Pre-allocate with exact capacity to avoid reallocations
+        let len = self.heap.len();
+        let mut entries = Vec::with_capacity(len);
+        entries.extend(self.heap.into_iter().map(|e| e.0));
         entries.sort_by(|a, b| {
             a.distance
                 .partial_cmp(&b.distance)
@@ -178,6 +185,25 @@ impl<D: DistanceType> MaxHeap<D> {
         entries
     }
 
+    /// Convert to a sorted vector of (id, distance) pairs.
+    /// This is optimized to minimize allocations by reusing the heap's buffer.
+    #[inline]
+    pub fn into_sorted_pairs(self) -> Vec<(IdType, D)> {
+        // Use into_sorted_iter from BinaryHeap which pops in sorted order
+        let len = self.heap.len();
+        let mut result = Vec::with_capacity(len);
+        // BinaryHeap::into_sorted_vec() uses into_iter + sort, we do the same
+        // but map directly to the output format
+        let mut entries: Vec<_> = self.heap.into_vec();
+        entries.sort_by(|a, b| {
+            a.0.distance
+                .partial_cmp(&b.0.distance)
+                .unwrap_or(Ordering::Equal)
+        });
+        result.extend(entries.into_iter().map(|e| (e.0.id, e.0.distance)));
+        result
+    }
+
     /// Convert to a vector (unordered).
     pub fn into_vec(self) -> Vec<HeapEntry<D>> {
         self.heap.into_iter().map(|e| e.0).collect()
diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
index f9d94dc52..a4d5e08e4 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
@@ -94,6 +94,11 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
     // associated swap jobs.
     std::mutex idToRepairJobsGuard;
 
+    // Counter for vectors inserted directly into HNSW by the main thread (bypassing flat buffer).
+    // This happens in WriteInPlace mode or when the flat buffer is full.
+    // Not atomic since it's only accessed from the main thread.
+    size_t directHNSWInsertions{0};
+
     void executeInsertJob(HNSWInsertJob *job);
     void executeRepairJob(HNSWRepairJob *job);
 
@@ -211,6 +216,7 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
     // needed.
     VecSimIndexDebugInfo debugInfo() const override;
     VecSimIndexBasicInfo basicInfo() const override;
+    VecSimIndexStatsInfo statisticInfo() const override;
     VecSimDebugInfoIterator *debugInfoIterator() const override;
     VecSimBatchIterator *newBatchIterator(const void *queryBlob,
                                           VecSimQueryParams *queryParams) const override {
@@ -729,6 +735,8 @@ int TieredHNSWIndex<DataType, DistType>::addVector(const void *blob, labelType l
         this->lockMainIndexGuard();
         hnsw_index->addVector(storage_blob.get(), label);
         this->unlockMainIndexGuard();
+        // Track direct insertion to HNSW (bypassing flat buffer)
+        ++this->directHNSWInsertions;
         return ret;
     }
     if (this->frontendIndex->indexSize() >= this->flatBufferLimit) {
@@ -746,6 +754,8 @@ int TieredHNSWIndex<DataType, DistType>::addVector(const void *blob, labelType l
             // index.
             auto storage_blob = this->frontendIndex->preprocessForStorage(blob);
             this->insertVectorToHNSW<false>(hnsw_index, label, storage_blob.get());
+            // Track direct insertion to HNSW (flat buffer was full)
+            ++this->directHNSWInsertions;
             return ret;
         }
         // Otherwise, we fall back to the "regular" insertion into the flat buffer
@@ -1151,6 +1161,13 @@ void TieredHNSWIndex<DataType, DistType>::TieredHNSW_BatchIterator::filter_irrel
     results.resize(cur_end - results.begin());
 }
 
+template <typename DataType, typename DistType>
+VecSimIndexStatsInfo TieredHNSWIndex<DataType, DistType>::statisticInfo() const {
+    auto stats = VecSimTieredIndex<DataType, DistType>::statisticInfo();
+    stats.directHNSWInsertions = this->directHNSWInsertions;
+    return stats;
+}
+
 template <typename DataType, typename DistType>
 VecSimIndexDebugInfo TieredHNSWIndex<DataType, DistType>::debugInfo() const {
     auto info = VecSimTieredIndex<DataType, DistType>::debugInfo();
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index fa136b7fe..9f6544d1b 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -250,6 +250,7 @@ typedef struct {
     void *storage; // Opaque pointer to disk storage
     const char *indexName;
     size_t indexNameLen;
+    bool rerank; // Whether to enable reranking for disk-based HNSW
 } VecSimDiskContext;
 
 typedef struct {
@@ -342,6 +343,9 @@ typedef struct {
     size_t memory;
     size_t numberOfMarkedDeleted; // The number of vectors that are marked as deleted (HNSW/tiered
                                   // only).
+    size_t directHNSWInsertions;  // Count of vectors inserted directly into HNSW by main thread
+                                  // (bypassing flat buffer). Tiered HNSW only.
+    size_t flatBufferSize;        // Current flat buffer size. Tiered indexes only.
 } VecSimIndexStatsInfo;
 
 typedef struct {
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index 5cd18d38d..88eabea69 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -111,6 +111,8 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         return info;
     }
 
+    IndexCalculatorInterface<DistType> *getIndexCalculator() const { return indexCalculator; }
+
 public:
     /**
      * @brief Construct a new Vec Sim Index object
@@ -196,6 +198,8 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         return VecSimIndexStatsInfo{
             .memory = this->getAllocationSize(),
             .numberOfMarkedDeleted = 0,
+            .directHNSWInsertions = 0,
+            .flatBufferSize = 0,
         };
     }
 
diff --git a/src/VecSim/vec_sim_tiered_index.h b/src/VecSim/vec_sim_tiered_index.h
index 0a36b1f86..61294b90b 100644
--- a/src/VecSim/vec_sim_tiered_index.h
+++ b/src/VecSim/vec_sim_tiered_index.h
@@ -320,6 +320,8 @@ VecSimIndexStatsInfo VecSimTieredIndex<DataType, DistType>::statisticInfo() cons
     auto stats = VecSimIndexStatsInfo{
         .memory = this->getAllocationSize(),
         .numberOfMarkedDeleted = this->getNumMarkedDeleted(),
+        .directHNSWInsertions = 0, // Base tiered index returns 0; TieredHNSWIndex overrides
+        .flatBufferSize = this->frontendIndex->indexSize(),
     };
 
     return stats;
diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt
index 859f2c0af..c98aa7c30 100644
--- a/tests/benchmark/CMakeLists.txt
+++ b/tests/benchmark/CMakeLists.txt
@@ -39,6 +39,10 @@ endif()
 # Spaces benchmarks								                                          #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 
+# Simple HNSW benchmark (self-contained, no external data needed)
+add_executable(simple_hnsw_bench simple_hnsw_bench.cpp)
+target_link_libraries(simple_hnsw_bench VectorSimilarity)
+
 set(DATA_TYPE fp32 fp64 bf16 fp16 int8 uint8 sq8_fp32 sq8_sq8)
 foreach(data_type IN LISTS DATA_TYPE)
 	add_executable(bm_spaces_${data_type} spaces_benchmarks/bm_spaces_${data_type}.cpp)
diff --git a/tests/benchmark/simple_hnsw_bench.cpp b/tests/benchmark/simple_hnsw_bench.cpp
new file mode 100644
index 000000000..5f6af44a9
--- /dev/null
+++ b/tests/benchmark/simple_hnsw_bench.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ *
+ * Simple HNSW benchmark that creates an in-memory index for comparison with Rust.
+ * This benchmark is self-contained and doesn't require external data files.
+ *
+ * Usage: ./simple_hnsw_bench
+ */
+#include <iostream>
+#include <vector>
+#include <random>
+#include <chrono>
+#include "VecSim/vec_sim.h"
+#include "VecSim/algorithms/hnsw/hnsw.h"
+#include "VecSim/index_factories/hnsw_factory.h"
+
+constexpr size_t DIM = 128;
+constexpr size_t N_VECTORS = 10000;
+constexpr size_t M = 16;
+constexpr size_t EF_CONSTRUCTION = 100;
+constexpr size_t EF_RUNTIME = 100;
+constexpr size_t N_QUERIES = 1000;
+
+std::vector<float> generate_random_vector(size_t dim, std::mt19937 &gen) {
+    std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+    std::vector<float> vec(dim);
+    for (size_t i = 0; i < dim; ++i) {
+        vec[i] = dist(gen);
+    }
+    return vec;
+}
+
+int main() {
+    std::mt19937 gen(42); // Fixed seed for reproducibility
+
+    std::cout << "=== C++ HNSW Benchmark ===" << std::endl;
+    std::cout << "Config: " << N_VECTORS << " vectors, " << DIM << " dimensions, M=" << M
+              << ", ef_construction=" << EF_CONSTRUCTION << ", ef_runtime=" << EF_RUNTIME
+              << std::endl;
+    std::cout << std::endl;
+
+    // Create HNSW parameters
+    HNSWParams params = {.dim = DIM,
+                         .metric = VecSimMetric_L2,
+                         .type = VecSimType_FLOAT32,
+                         .M = M,
+                         .efConstruction = EF_CONSTRUCTION,
+                         .efRuntime = EF_RUNTIME};
+
+    // Create index
+    VecSimIndex *index = HNSWFactory::NewIndex(&params);
+
+    // Generate and insert vectors
+    std::cout << "Inserting " << N_VECTORS << " vectors..." << std::endl;
+    auto insert_start = std::chrono::high_resolution_clock::now();
+
+    for (size_t i = 0; i < N_VECTORS; ++i) {
+        auto vec = generate_random_vector(DIM, gen);
+        VecSimIndex_AddVector(index, vec.data(), i);
+    }
+
+    auto insert_end = std::chrono::high_resolution_clock::now();
+    auto insert_duration =
+        std::chrono::duration_cast<std::chrono::milliseconds>(insert_end - insert_start);
+    double insert_throughput = N_VECTORS * 1000.0 / insert_duration.count();
+
+    std::cout << "Insertion time: " << insert_duration.count() << " ms (" << insert_throughput
+              << " vec/s)" << std::endl;
+    std::cout << std::endl;
+
+    // Generate query vectors
+    std::vector<std::vector<float>> queries;
+    for (size_t i = 0; i < N_QUERIES; ++i) {
+        queries.push_back(generate_random_vector(DIM, gen));
+    }
+
+    // KNN Search benchmark
+    std::cout << "Running " << N_QUERIES << " KNN queries (k=10)..." << std::endl;
+
+    auto knn_start = std::chrono::high_resolution_clock::now();
+
+    for (size_t i = 0; i < N_QUERIES; ++i) {
+        VecSimQueryReply *results =
+            VecSimIndex_TopKQuery(index, queries[i].data(), 10, nullptr, BY_SCORE);
+        VecSimQueryReply_Free(results);
+    }
+
+    auto knn_end = std::chrono::high_resolution_clock::now();
+    auto knn_duration = std::chrono::duration_cast<std::chrono::microseconds>(knn_end - knn_start);
+    double avg_knn_time = static_cast<double>(knn_duration.count()) / N_QUERIES;
+    double knn_throughput = N_QUERIES * 1000000.0 / knn_duration.count();
+
+    std::cout << "KNN k=10 (ef=" << EF_RUNTIME << "): avg " << avg_knn_time << " µs ("
+              << knn_throughput << " queries/s)" << std::endl;
+    std::cout << std::endl;
+
+    // Range Search benchmark
+    std::cout << "Running " << N_QUERIES << " Range queries (radius=10.0)..." << std::endl;
+
+    auto range_start = std::chrono::high_resolution_clock::now();
+
+    for (size_t i = 0; i < N_QUERIES; ++i) {
+        VecSimQueryReply *results =
+            VecSimIndex_RangeQuery(index, queries[i].data(), 10.0, nullptr, BY_SCORE);
+        VecSimQueryReply_Free(results);
+    }
+
+    auto range_end = std::chrono::high_resolution_clock::now();
+    auto range_duration =
+        std::chrono::duration_cast<std::chrono::microseconds>(range_end - range_start);
+    double avg_range_time = static_cast<double>(range_duration.count()) / N_QUERIES;
+    double range_throughput = N_QUERIES * 1000000.0 / range_duration.count();
+
+    std::cout << "Range (r=10): avg " << avg_range_time << " µs (" << range_throughput
+              << " queries/s)" << std::endl;
+    std::cout << std::endl;
+
+    // Cleanup
+    VecSimIndex_Free(index);
+
+    std::cout << "=== Summary ===" << std::endl;
+    std::cout << "Insertion:    " << insert_throughput << " vec/s" << std::endl;
+    std::cout << "KNN (k=10):   " << avg_knn_time << " µs (" << knn_throughput << " q/s)"
+              << std::endl;
+    std::cout << "Range (r=10): " << avg_range_time << " µs (" << range_throughput << " q/s)"
+              << std::endl;
+
+    return 0;
+}
diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index 04b1f18b9..ffcac02ea 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -2761,6 +2761,9 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) {
     EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
     EXPECT_EQ(info.tieredInfo.bufferLimit, 1000);
     EXPECT_EQ(info.tieredInfo.specificTieredBackendInfo.hnswTieredInfo.pendingSwapJobsThreshold, 1);
+    // Verify new tiered-specific stats
+    EXPECT_EQ(stats.flatBufferSize, 0);
+    EXPECT_EQ(stats.directHNSWInsertions, 0);
 
     // Validate that Static info returns the right restricted info as well.
     VecSimIndexBasicInfo s_info = VecSimIndex_BasicInfo(tiered_index);
@@ -2787,6 +2790,9 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) {
                                           info.tieredInfo.frontendCommonInfo.memory);
     EXPECT_EQ(info.commonInfo.memory, stats.memory);
     EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+    // Vector is in flat buffer, no direct insertions yet
+    EXPECT_EQ(stats.flatBufferSize, 1);
+    EXPECT_EQ(stats.directHNSWInsertions, 0);
 
     mock_thread_pool.thread_iteration();
     info = tiered_index->debugInfo();
@@ -2803,6 +2809,9 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) {
                                           info.tieredInfo.frontendCommonInfo.memory);
     EXPECT_EQ(info.commonInfo.memory, stats.memory);
     EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+    // Vector moved from flat buffer to HNSW by background thread
+    EXPECT_EQ(stats.flatBufferSize, 0);
+    EXPECT_EQ(stats.directHNSWInsertions, 0);
 
     if (TypeParam::isMulti()) {
         GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 1, 1);
@@ -2839,6 +2848,74 @@ TYPED_TEST(HNSWTieredIndexTest, testInfo) {
     EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
 }
 
+TYPED_TEST(HNSWTieredIndexTest, testDirectHNSWInsertionsStats) {
+    // Test that directHNSWInsertions counter is incremented when flat buffer is full.
+    size_t dim = 4;
+    size_t buffer_limit = 5;
+    HNSWParams params = {.type = TypeParam::get_index_type(),
+                         .dim = dim,
+                         .metric = VecSimMetric_L2,
+                         .multi = TypeParam::isMulti()};
+    VecSimParams hnsw_params = CreateParams(params);
+    auto mock_thread_pool = tieredIndexMock();
+
+    // Create index with small buffer limit
+    auto *tiered_index =
+        this->CreateTieredHNSWIndex(hnsw_params, mock_thread_pool, 1, buffer_limit);
+
+    // Fill the flat buffer
+    for (size_t i = 0; i < buffer_limit; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, i, i);
+    }
+
+    VecSimIndexStatsInfo stats = tiered_index->statisticInfo();
+    EXPECT_EQ(stats.flatBufferSize, buffer_limit);
+    EXPECT_EQ(stats.directHNSWInsertions, 0);
+
+    // Add more vectors - these should go directly to HNSW
+    size_t extra_vectors = 3;
+    for (size_t i = buffer_limit; i < buffer_limit + extra_vectors; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, i, i);
+    }
+
+    stats = tiered_index->statisticInfo();
+    EXPECT_EQ(stats.flatBufferSize, buffer_limit);
+    EXPECT_EQ(stats.directHNSWInsertions, extra_vectors);
+
+    // Drain the flat buffer by starting threads and waiting for them to finish
+    mock_thread_pool.init_threads();
+    mock_thread_pool.thread_pool_join();
+
+    stats = tiered_index->statisticInfo();
+    EXPECT_EQ(stats.flatBufferSize, 0);
+    // Direct insertions counter should be preserved
+    EXPECT_EQ(stats.directHNSWInsertions, extra_vectors);
+
+    // Test write-in-place mode: vectors should go directly to HNSW even when buffer is not full
+    VecSim_SetWriteMode(VecSim_WriteInPlace);
+
+    size_t write_in_place_vectors = 4;
+    size_t label_offset = buffer_limit + extra_vectors;
+    for (size_t i = 0; i < write_in_place_vectors; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, label_offset + i, label_offset + i);
+    }
+
+    stats = tiered_index->statisticInfo();
+    // Flat buffer should still be empty (vectors went directly to HNSW)
+    EXPECT_EQ(stats.flatBufferSize, 0);
+    // Direct insertions counter should include the write-in-place vectors
+    EXPECT_EQ(stats.directHNSWInsertions, extra_vectors + write_in_place_vectors);
+
+    // Verify all vectors are in the backend index
+    VecSimIndexDebugInfo info = tiered_index->debugInfo();
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize,
+              buffer_limit + extra_vectors + write_in_place_vectors);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+
+    // Reset to async mode
+    VecSim_SetWriteMode(VecSim_WriteAsync);
+}
+
 TYPED_TEST(HNSWTieredIndexTest, testInfoIterator) {
     // Create TieredHNSW index instance with a mock queue.
     size_t dim = 4;