From 8d099f92ef2735d6589c53b5a1f3fccf34b228a7 Mon Sep 17 00:00:00 2001 From: zhourrr Date: Fri, 13 Mar 2026 17:51:09 +0800 Subject: [PATCH 1/7] feat: enlarge the limit of sparse vectors --- .../algorithm/flat_sparse/flat_sparse_streamer.cc | 12 ++++++++---- src/core/algorithm/flat_sparse/flat_sparse_utility.h | 2 +- src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h | 2 +- .../algorithm/hnsw_sparse/hnsw_sparse_streamer.cc | 12 ++++++++---- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc b/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc index 4df83d8f7..bc5a3da17 100644 --- a/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc +++ b/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc @@ -195,8 +195,10 @@ int FlatSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count, } if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) { - LOG_ERROR("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)pkey); + LOG_ERROR( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey); (*stats_.mutable_discarded_count())++; return IndexError_InvalidValue; } @@ -252,8 +254,10 @@ int FlatSparseStreamer::add_with_id_impl(uint32_t pkey, } if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) { - LOG_ERROR("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)pkey); + LOG_ERROR( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey); (*stats_.mutable_discarded_count())++; return IndexError_InvalidValue; } diff --git a/src/core/algorithm/flat_sparse/flat_sparse_utility.h b/src/core/algorithm/flat_sparse/flat_sparse_utility.h index 4566b5dee..a66e2f631 100644 --- a/src/core/algorithm/flat_sparse/flat_sparse_utility.h +++ b/src/core/algorithm/flat_sparse/flat_sparse_utility.h @@ -19,7 +19,7 @@ namespace zvec { namespace core { -static constexpr uint32_t PARAM_FLAT_SPARSE_MAX_DIM_SIZE = 4096; +static constexpr uint32_t PARAM_FLAT_SPARSE_MAX_DIM_SIZE = 16384; static const std::string PARAM_FLAT_SPARSE_META_SEG_ID = "bruteforce_sparse_meta"; diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h b/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h index 37166027e..d514e0c2e 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h @@ -610,7 +610,7 @@ class HnswSparseEntity { constexpr static uint32_t kSparseMetaSize = 2u * sizeof(uint64_t); constexpr static float kDefaultSparseNeighborRatio = 0.5f; - constexpr static uint32_t kSparseMaxDimSize = 4096; + constexpr static uint32_t kSparseMaxDimSize = 16384; constexpr static float kDefaultQueryFilteringRatio = 0.0f; // turn off protected: diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc index f51ebb5ea..ed12e9981 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc @@ -439,8 +439,10 @@ int HnswSparseStreamer::add_with_id_impl(uint32_t id, } if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { - LOG_WARN("Add vector failed, dim size too larg, dim_size=%u, id=%u", - sparse_count, id); + LOG_WARN( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), id=%u", + sparse_count, HnswSparseEntity::kSparseMaxDimSize, id); return IndexError_InvalidValue; } @@ -524,8 +526,10 @@ int HnswSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count, } if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { - LOG_WARN("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)pkey); + LOG_WARN( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)pkey); return IndexError_InvalidValue; } From 7263b386457df9fc57a0750fd8976db9602d75d4 Mon Sep 17 00:00:00 2001 From: zhourrr Date: Mon, 16 Mar 2026 10:37:42 +0800 Subject: [PATCH 2/7] fix: revise logging --- .../algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc index 48c20d726..d8fb29a1b 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc @@ -89,8 +89,10 @@ int HnswSparseBuilderEntity::add_vector(level_t level, key_t key, const uint32_t *sparse_indices, const void *sparse_vec, node_id_t *id) { if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { - LOG_WARN("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)key); + LOG_WARN( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)key); return IndexError_InvalidValue; } From 9859d3e69f6f8cc8f9f56859e5bf346d0f93297b Mon Sep 17 00:00:00 2001 From: zhourrr Date: Mon, 16 Mar 2026 15:30:43 +0800 Subject: [PATCH 3/7] fix: adjust query limit --- src/db/common/constants.h | 2 +- src/db/index/common/doc.cc | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/db/common/constants.h b/src/db/common/constants.h index 39b13f445..d07512f3b 100644 --- a/src/db/common/constants.h +++ b/src/db/common/constants.h @@ -32,7 +32,7 @@ const std::string GLOBAL_DOC_ID = "_zvec_g_doc_id_"; const std::string USER_ID = "_zvec_uid_"; -const int kSparseMaxDimSize = 4096; +const int kSparseMaxDimSize = 16384; const int64_t kMaxRecordBatchNumRows = 4096; diff --git a/src/db/index/common/doc.cc b/src/db/index/common/doc.cc index 6d411bfb2..93abff8c1 100644 --- a/src/db/index/common/doc.cc +++ b/src/db/index/common/doc.cc @@ -1253,7 +1253,9 @@ Status VectorQuery::validate(const FieldSchema *schema) const { // validate sparse indices size if (query_sparse_indices_.size() >= kSparseMaxDimSize * sizeof(uint32_t)) { return Status::InvalidArgument( - "query validate failed: sparse indices size is too large"); + "query validate failed: the number of sparse indices exceeds the " + "maximum limit %d", + kSparseMaxDimSize); } } else { return Status::InvalidArgument( From 8eea0c4c3128ff9161a883029b6f97eca456e271 Mon Sep 17 00:00:00 2001 From: zhourrr Date: Mon, 16 Mar 2026 15:37:29 +0800 Subject: [PATCH 4/7] fix: uses greater instead of greater equal --- src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc | 2 +- src/db/index/common/doc.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc index ed12e9981..9d101016a 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc @@ -438,7 +438,7 @@ int HnswSparseStreamer::add_with_id_impl(uint32_t id, return ret; } - if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { + if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) { LOG_WARN( "Failed to add sparse vector: number of non-zero elements (%u) exceeds " "maximum allowed (%u), id=%u", diff --git a/src/db/index/common/doc.cc b/src/db/index/common/doc.cc index 93abff8c1..aed813fad 100644 --- a/src/db/index/common/doc.cc +++ b/src/db/index/common/doc.cc @@ -1251,10 +1251,10 @@ Status VectorQuery::validate(const FieldSchema *schema) const { } } else if (schema->is_sparse_vector()) { // validate sparse indices size - if (query_sparse_indices_.size() >= kSparseMaxDimSize * sizeof(uint32_t)) { + if (query_sparse_indices_.size() > kSparseMaxDimSize * sizeof(uint32_t)) { return Status::InvalidArgument( "query validate failed: the number of sparse indices exceeds the " - "maximum limit %d", + "maximum limit ", kSparseMaxDimSize); } } else { From bc33e81536a3414310edb6006a5238128a2a0c14 Mon Sep 17 00:00:00 2001 From: zhourrr Date: Mon, 16 Mar 2026 15:41:21 +0800 Subject: [PATCH 5/7] fix: revise --- src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc | 2 +- src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc index d8fb29a1b..25c5c00cd 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc @@ -88,7 +88,7 @@ int HnswSparseBuilderEntity::add_vector(level_t level, key_t key, const uint32_t sparse_count, const uint32_t *sparse_indices, const void *sparse_vec, node_id_t *id) { - if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { + if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) { LOG_WARN( "Failed to add sparse vector: number of non-zero elements (%u) exceeds " "maximum allowed (%u), key=%zu", diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc index 9d101016a..3abce8087 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc @@ -525,7 +525,7 @@ int HnswSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count, return ret; } - if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { + if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) { LOG_WARN( "Failed to add sparse vector: number of non-zero elements (%u) exceeds " "maximum allowed (%u), key=%zu", From db86ab0e8eec023ae4de06494419ff90fa623ac6 Mon Sep 17 00:00:00 2001 From: zhourrr Date: Mon, 16 Mar 2026 16:17:26 +0800 Subject: [PATCH 6/7] fix: validation [skip ci] --- src/db/index/common/doc.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/db/index/common/doc.cc b/src/db/index/common/doc.cc index aed813fad..2f9f12b03 100644 --- a/src/db/index/common/doc.cc +++ b/src/db/index/common/doc.cc @@ -866,6 +866,12 @@ Status Doc::validate(const CollectionSchema::Ptr &schema, "doc validate failed: field[", field_name, "]'s sparse vector indices and values size not match"); } + if (sparse_indices.size() > kSparseMaxDimSize) { + return Status::InvalidArgument( + "doc validate failed: vector[", field_name, + "], the number of sparse indices exceeds the maximum limit ", + kSparseMaxDimSize); + } } break; } @@ -881,6 +887,12 @@ Status Doc::validate(const CollectionSchema::Ptr &schema, "doc validate failed: field[", field_name, "]'s sparse vector indices and values size not match"); } + if (sparse_indices.size() > kSparseMaxDimSize) { + return Status::InvalidArgument( + "doc validate failed: vector[", field_name, + "], the number of sparse indices exceeds the maximum limit ", + kSparseMaxDimSize); + } } break; } From c2c1aee028759463e8774084cd32fa23f30e6b1d Mon Sep 17 00:00:00 2001 From: zhourrr Date: Mon, 16 Mar 2026 16:25:15 +0800 Subject: [PATCH 7/7] fix: unit test --- tests/db/index/common/doc_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/db/index/common/doc_test.cc b/tests/db/index/common/doc_test.cc index 5047ecb5d..9098478be 100644 --- a/tests/db/index/common/doc_test.cc +++ b/tests/db/index/common/doc_test.cc @@ -1255,7 +1255,7 @@ TEST(VectorQuery, Validate) { VectorQuery query; query.field_name_ = "field_name"; query.topk_ = 100; - std::vector query_indices = std::vector(4097); + std::vector query_indices = std::vector(16385); std::string query_indices_str = std::string(reinterpret_cast(query_indices.data()), query_indices.size() * sizeof(uint32_t));